Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull more kvm fixes from Paolo Bonzini:
"Five small fixes.

The nested migration bug will be fixed with a better API in 5.10 or
5.11, for now this is a fix that works with existing userspace but
keeps the current ugly API"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: SVM: Add a dedicated INVD intercept routine
KVM: x86: Reset MMU context if guest toggles CR4.SMAP or CR4.PKE
KVM: x86: fix MSR_IA32_TSC read for nested migration
selftests: kvm: Fix assert failure in single-step test
KVM: x86: VMX: Make smaller physical guest address space support user-configurable

+40 -12
+7 -1
arch/x86/kvm/svm/svm.c
··· 2183 2183 return 1; 2184 2184 } 2185 2185 2186 + static int invd_interception(struct vcpu_svm *svm) 2187 + { 2188 + /* Treat an INVD instruction as a NOP and just skip it. */ 2189 + return kvm_skip_emulated_instruction(&svm->vcpu); 2190 + } 2191 + 2186 2192 static int invlpg_interception(struct vcpu_svm *svm) 2187 2193 { 2188 2194 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) ··· 2780 2774 [SVM_EXIT_RDPMC] = rdpmc_interception, 2781 2775 [SVM_EXIT_CPUID] = cpuid_interception, 2782 2776 [SVM_EXIT_IRET] = iret_interception, 2783 - [SVM_EXIT_INVD] = emulate_on_interception, 2777 + [SVM_EXIT_INVD] = invd_interception, 2784 2778 [SVM_EXIT_PAUSE] = pause_interception, 2785 2779 [SVM_EXIT_HLT] = halt_interception, 2786 2780 [SVM_EXIT_INVLPG] = invlpg_interception,
+10 -5
arch/x86/kvm/vmx/vmx.c
··· 129 129 module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); 130 130 #endif 131 131 132 + extern bool __read_mostly allow_smaller_maxphyaddr; 133 + module_param(allow_smaller_maxphyaddr, bool, S_IRUGO); 134 + 132 135 #define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD) 133 136 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE 134 137 #define KVM_VM_CR0_ALWAYS_ON \ ··· 4806 4803 * EPT will cause page fault only if we need to 4807 4804 * detect illegal GPAs. 4808 4805 */ 4806 + WARN_ON_ONCE(!allow_smaller_maxphyaddr); 4809 4807 kvm_fixup_and_inject_pf_error(vcpu, cr2, error_code); 4810 4808 return 1; 4811 4809 } else ··· 5335 5331 * would also use advanced VM-exit information for EPT violations to 5336 5332 * reconstruct the page fault error code. 5337 5333 */ 5338 - if (unlikely(kvm_mmu_is_illegal_gpa(vcpu, gpa))) 5334 + if (unlikely(allow_smaller_maxphyaddr && kvm_mmu_is_illegal_gpa(vcpu, gpa))) 5339 5335 return kvm_emulate_instruction(vcpu, 0); 5340 5336 5341 5337 return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); ··· 8309 8305 vmx_check_vmcs12_offsets(); 8310 8306 8311 8307 /* 8312 - * Intel processors don't have problems with 8313 - * GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable 8314 - * it for VMX by default 8308 + * Shadow paging doesn't have a (further) performance penalty 8309 + * from GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable it 8310 + * by default 8315 8311 */ 8316 - allow_smaller_maxphyaddr = true; 8312 + if (!enable_ept) 8313 + allow_smaller_maxphyaddr = true; 8317 8314 8318 8315 return 0; 8319 8316 }
+4 -1
arch/x86/kvm/vmx/vmx.h
··· 552 552 553 553 static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu) 554 554 { 555 - return !enable_ept || cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits; 555 + if (!enable_ept) 556 + return true; 557 + 558 + return allow_smaller_maxphyaddr && cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits; 556 559 } 557 560 558 561 void dump_vmcs(void);
+18 -4
arch/x86/kvm/x86.c
··· 188 188 u64 __read_mostly host_efer; 189 189 EXPORT_SYMBOL_GPL(host_efer); 190 190 191 - bool __read_mostly allow_smaller_maxphyaddr; 191 + bool __read_mostly allow_smaller_maxphyaddr = 0; 192 192 EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr); 193 193 194 194 static u64 __read_mostly host_xss; ··· 976 976 unsigned long old_cr4 = kvm_read_cr4(vcpu); 977 977 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | 978 978 X86_CR4_SMEP; 979 + unsigned long mmu_role_bits = pdptr_bits | X86_CR4_SMAP | X86_CR4_PKE; 979 980 980 981 if (kvm_valid_cr4(vcpu, cr4)) 981 982 return 1; ··· 1004 1003 if (kvm_x86_ops.set_cr4(vcpu, cr4)) 1005 1004 return 1; 1006 1005 1007 - if (((cr4 ^ old_cr4) & pdptr_bits) || 1006 + if (((cr4 ^ old_cr4) & mmu_role_bits) || 1008 1007 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) 1009 1008 kvm_mmu_reset_context(vcpu); 1010 1009 ··· 3222 3221 case MSR_IA32_POWER_CTL: 3223 3222 msr_info->data = vcpu->arch.msr_ia32_power_ctl; 3224 3223 break; 3225 - case MSR_IA32_TSC: 3226 - msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset; 3224 + case MSR_IA32_TSC: { 3225 + /* 3226 + * Intel SDM states that MSR_IA32_TSC read adds the TSC offset 3227 + * even when not intercepted. AMD manual doesn't explicitly 3228 + * state this but appears to behave the same. 3229 + * 3230 + * On userspace reads and writes, however, we unconditionally 3231 + * operate L1's TSC value to ensure backwards-compatible 3232 + * behavior for migration. 3233 + */ 3234 + u64 tsc_offset = msr_info->host_initiated ? vcpu->arch.l1_tsc_offset : 3235 + vcpu->arch.tsc_offset; 3236 + 3237 + msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + tsc_offset; 3227 3238 break; 3239 + } 3228 3240 case MSR_MTRRcap: 3229 3241 case 0x200 ... 0x2ff: 3230 3242 return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
+1 -1
tools/testing/selftests/kvm/x86_64/debug_regs.c
··· 73 73 int i; 74 74 /* Instruction lengths starting at ss_start */ 75 75 int ss_size[4] = { 76 - 3, /* xor */ 76 + 2, /* xor */ 77 77 2, /* cpuid */ 78 78 5, /* mov */ 79 79 2, /* rdmsr */