Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull x86 kvm fixes from Paolo Bonzini:
"x86 fixes. Everyone else is already in holiday mood apparently.

- Add a missing 'break' to fix param parsing in the rseq selftest

- Apply runtime updates to the _current_ CPUID when userspace is
setting CPUID, e.g. as part of vCPU hotplug, to fix a false
positive and to avoid dropping the pending update

- Disallow toggling KVM_MEM_GUEST_MEMFD on an existing memslot, as
it's not supported by KVM and leads to a use-after-free due to KVM
failing to unbind the memslot from the previously-associated
guest_memfd instance

- Harden against similar KVM_MEM_GUEST_MEMFD goofs, and prepare for
supporting flags-only changes on KVM_MEM_GUEST_MEMFD memlslots,
e.g. for dirty logging

- Set exit_code[63:32] to -1 (all 0xffs) when synthesizing a nested
SVM_EXIT_ERR (a.k.a. VMEXIT_INVALID) #VMEXIT, as VMEXIT_INVALID is
defined as -1ull (a 64-bit value)

- Update SVI when activating APICv to fix a bug where a
post-activation EOI for an in-service IRQ would effective be lost
due to SVI being stale

- Immediately refresh APICv controls (if necessary) on a nested
VM-Exit instead of deferring the update via KVM_REQ_APICV_UPDATE,
as the request is effectively ignored because KVM thinks the vCPU
already has the correct APICv settings"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: nVMX: Immediately refresh APICv controls as needed on nested VM-Exit
KVM: VMX: Update SVI during runtime APICv activation
KVM: nSVM: Set exit_code_hi to -1 when synthesizing SVM_EXIT_ERR (failed VMRUN)
KVM: nSVM: Clear exit_code_hi in VMCB when synthesizing nested VM-Exits
KVM: Harden and prepare for modifying existing guest_memfd memslots
KVM: Disallow toggling KVM_MEM_GUEST_MEMFD on an existing memslot
KVM: selftests: Add a CPUID testcase for KVM_SET_CPUID2 with runtime updates
KVM: x86: Apply runtime updates to current CPUID during KVM_SET_CPUID{,2}
KVM: selftests: Add missing "break" in rseq_test's param parsing

+58 -18
+9 -2
arch/x86/kvm/cpuid.c
··· 510 510 int r; 511 511 512 512 /* 513 + * Apply pending runtime CPUID updates to the current CPUID entries to 514 + * avoid false positives due to mismatches on KVM-owned feature flags. 515 + */ 516 + if (vcpu->arch.cpuid_dynamic_bits_dirty) 517 + kvm_update_cpuid_runtime(vcpu); 518 + 519 + /* 513 520 * Swap the existing (old) entries with the incoming (new) entries in 514 521 * order to massage the new entries, e.g. to account for dynamic bits 515 - * that KVM controls, without clobbering the current guest CPUID, which 516 - * KVM needs to preserve in order to unwind on failure. 522 + * that KVM controls, without losing the current guest CPUID, which KVM 523 + * needs to preserve in order to unwind on failure. 517 524 * 518 525 * Similarly, save the vCPU's current cpu_caps so that the capabilities 519 526 * can be updated alongside the CPUID entries when performing runtime
+2 -2
arch/x86/kvm/svm/nested.c
··· 985 985 if (!nested_vmcb_check_save(vcpu) || 986 986 !nested_vmcb_check_controls(vcpu)) { 987 987 vmcb12->control.exit_code = SVM_EXIT_ERR; 988 - vmcb12->control.exit_code_hi = 0; 988 + vmcb12->control.exit_code_hi = -1u; 989 989 vmcb12->control.exit_info_1 = 0; 990 990 vmcb12->control.exit_info_2 = 0; 991 991 goto out; ··· 1018 1018 svm->soft_int_injected = false; 1019 1019 1020 1020 svm->vmcb->control.exit_code = SVM_EXIT_ERR; 1021 - svm->vmcb->control.exit_code_hi = 0; 1021 + svm->vmcb->control.exit_code_hi = -1u; 1022 1022 svm->vmcb->control.exit_info_1 = 0; 1023 1023 svm->vmcb->control.exit_info_2 = 0; 1024 1024
+2
arch/x86/kvm/svm/svm.c
··· 2443 2443 2444 2444 if (cr0 ^ val) { 2445 2445 svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; 2446 + svm->vmcb->control.exit_code_hi = 0; 2446 2447 ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE); 2447 2448 } 2448 2449 ··· 4618 4617 if (static_cpu_has(X86_FEATURE_NRIPS)) 4619 4618 vmcb->control.next_rip = info->next_rip; 4620 4619 vmcb->control.exit_code = icpt_info.exit_code; 4620 + vmcb->control.exit_code_hi = 0; 4621 4621 vmexit = nested_svm_exit_handled(svm); 4622 4622 4623 4623 ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
+4 -3
arch/x86/kvm/svm/svm.h
··· 761 761 762 762 static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code) 763 763 { 764 - svm->vmcb->control.exit_code = exit_code; 765 - svm->vmcb->control.exit_info_1 = 0; 766 - svm->vmcb->control.exit_info_2 = 0; 764 + svm->vmcb->control.exit_code = exit_code; 765 + svm->vmcb->control.exit_code_hi = 0; 766 + svm->vmcb->control.exit_info_1 = 0; 767 + svm->vmcb->control.exit_info_2 = 0; 767 768 return nested_svm_vmexit(svm); 768 769 } 769 770
+2 -1
arch/x86/kvm/vmx/nested.c
··· 19 19 #include "trace.h" 20 20 #include "vmx.h" 21 21 #include "smm.h" 22 + #include "x86_ops.h" 22 23 23 24 static bool __read_mostly enable_shadow_vmcs = 1; 24 25 module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO); ··· 5166 5165 5167 5166 if (vmx->nested.update_vmcs01_apicv_status) { 5168 5167 vmx->nested.update_vmcs01_apicv_status = false; 5169 - kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); 5168 + vmx_refresh_apicv_exec_ctrl(vcpu); 5170 5169 } 5171 5170 5172 5171 if (vmx->nested.update_vmcs01_hwapic_isr) {
-9
arch/x86/kvm/vmx/vmx.c
··· 6937 6937 * VM-Exit, otherwise L1 with run with a stale SVI. 6938 6938 */ 6939 6939 if (is_guest_mode(vcpu)) { 6940 - /* 6941 - * KVM is supposed to forward intercepted L2 EOIs to L1 if VID 6942 - * is enabled in vmcs12; as above, the EOIs affect L2's vAPIC. 6943 - * Note, userspace can stuff state while L2 is active; assert 6944 - * that VID is disabled if and only if the vCPU is in KVM_RUN 6945 - * to avoid false positives if userspace is setting APIC state. 6946 - */ 6947 - WARN_ON_ONCE(vcpu->wants_to_run && 6948 - nested_cpu_has_vid(get_vmcs12(vcpu))); 6949 6940 to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true; 6950 6941 return; 6951 6942 }
+7
arch/x86/kvm/x86.c
··· 10886 10886 * pending. At the same time, KVM_REQ_EVENT may not be set as APICv was 10887 10887 * still active when the interrupt got accepted. Make sure 10888 10888 * kvm_check_and_inject_events() is called to check for that. 10889 + * 10890 + * Update SVI when APICv gets enabled, otherwise SVI won't reflect the 10891 + * highest bit in vISR and the next accelerated EOI in the guest won't 10892 + * be virtualized correctly (the CPU uses SVI to determine which vISR 10893 + * vector to clear). 10889 10894 */ 10890 10895 if (!apic->apicv_active) 10891 10896 kvm_make_request(KVM_REQ_EVENT, vcpu); 10897 + else 10898 + kvm_apic_update_hwapic_isr(vcpu); 10892 10899 10893 10900 out: 10894 10901 preempt_enable();
+1
tools/testing/selftests/kvm/rseq_test.c
··· 215 215 switch (opt) { 216 216 case 'u': 217 217 skip_sanity_check = true; 218 + break; 218 219 case 'l': 219 220 latency = atoi_paranoid(optarg); 220 221 break;
+15
tools/testing/selftests/kvm/x86/cpuid_test.c
··· 155 155 static void set_cpuid_after_run(struct kvm_vcpu *vcpu) 156 156 { 157 157 struct kvm_cpuid_entry2 *ent; 158 + struct kvm_sregs sregs; 158 159 int rc; 159 160 u32 eax, ebx, x; 160 161 161 162 /* Setting unmodified CPUID is allowed */ 163 + rc = __vcpu_set_cpuid(vcpu); 164 + TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc); 165 + 166 + /* 167 + * Toggle CR4 bits that affect dynamic CPUID feature flags to verify 168 + * setting unmodified CPUID succeeds with runtime CPUID updates. 169 + */ 170 + vcpu_sregs_get(vcpu, &sregs); 171 + if (kvm_cpu_has(X86_FEATURE_XSAVE)) 172 + sregs.cr4 ^= X86_CR4_OSXSAVE; 173 + if (kvm_cpu_has(X86_FEATURE_PKU)) 174 + sregs.cr4 ^= X86_CR4_PKE; 175 + vcpu_sregs_set(vcpu, &sregs); 176 + 162 177 rc = __vcpu_set_cpuid(vcpu); 163 178 TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc); 164 179
+16 -1
virt/kvm/kvm_main.c
··· 1749 1749 kvm_free_memslot(kvm, old); 1750 1750 break; 1751 1751 case KVM_MR_MOVE: 1752 + /* 1753 + * Moving a guest_memfd memslot isn't supported, and will never 1754 + * be supported. 1755 + */ 1756 + WARN_ON_ONCE(old->flags & KVM_MEM_GUEST_MEMFD); 1757 + fallthrough; 1752 1758 case KVM_MR_FLAGS_ONLY: 1753 1759 /* 1754 1760 * Free the dirty bitmap as needed; the below check encompasses ··· 1762 1756 */ 1763 1757 if (old->dirty_bitmap && !new->dirty_bitmap) 1764 1758 kvm_destroy_dirty_bitmap(old); 1759 + 1760 + /* 1761 + * Unbind the guest_memfd instance as needed; the @new slot has 1762 + * already created its own binding. TODO: Drop the WARN when 1763 + * dirty logging guest_memfd memslots is supported. Until then, 1764 + * flags-only changes on guest_memfd slots should be impossible. 1765 + */ 1766 + if (WARN_ON_ONCE(old->flags & KVM_MEM_GUEST_MEMFD)) 1767 + kvm_gmem_unbind(old); 1765 1768 1766 1769 /* 1767 1770 * The final quirk. Free the detached, old slot, but only its ··· 2101 2086 return -EINVAL; 2102 2087 if ((mem->userspace_addr != old->userspace_addr) || 2103 2088 (npages != old->npages) || 2104 - ((mem->flags ^ old->flags) & KVM_MEM_READONLY)) 2089 + ((mem->flags ^ old->flags) & (KVM_MEM_READONLY | KVM_MEM_GUEST_MEMFD))) 2105 2090 return -EINVAL; 2106 2091 2107 2092 if (base_gfn != old->base_gfn)