Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
"This includes a fix for two oopses, one on PPC and on x86.

The rest is fixes for bugs with newer Intel processors"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
kvm/fpu: Enable eager restore kvm FPU for MPX
Revert "KVM: x86: drop fpu_activate hook"
kvm: fix crash in kvm_vcpu_reload_apic_access_page
KVM: MMU: fix SMAP virtualization
KVM: MMU: fix CR4.SMEP=1, CR0.WP=0 with shadow pages
KVM: MMU: fix smap permission check
KVM: PPC: Book3S HV: Fix list traversal in error case

+74 -19
+14 -4
Documentation/virtual/kvm/mmu.txt
··· 169 169 Contains the value of cr4.smep && !cr0.wp for which the page is valid 170 170 (pages for which this is true are different from other pages; see the 171 171 treatment of cr0.wp=0 below). 172 + role.smap_andnot_wp: 173 + Contains the value of cr4.smap && !cr0.wp for which the page is valid 174 + (pages for which this is true are different from other pages; see the 175 + treatment of cr0.wp=0 below). 172 176 gfn: 173 177 Either the guest page table containing the translations shadowed by this 174 178 page, or the base page frame for linear translations. See role.direct. ··· 348 344 349 345 (user write faults generate a #PF) 350 346 351 - In the first case there is an additional complication if CR4.SMEP is 352 - enabled: since we've turned the page into a kernel page, the kernel may now 353 - execute it. We handle this by also setting spte.nx. If we get a user 354 - fetch or read fault, we'll change spte.u=1 and spte.nx=gpte.nx back. 347 + In the first case there are two additional complications: 348 + - if CR4.SMEP is enabled: since we've turned the page into a kernel page, 349 + the kernel may now execute it. We handle this by also setting spte.nx. 350 + If we get a user fetch or read fault, we'll change spte.u=1 and 351 + spte.nx=gpte.nx back. 352 + - if CR4.SMAP is disabled: since the page has been changed to a kernel 353 + page, it can not be reused when CR4.SMAP is enabled. We set 354 + CR4.SMAP && !CR0.WP into shadow page's role to avoid this case. Note, 355 + here we do not care the case that CR4.SMAP is enabled since KVM will 356 + directly inject #PF to guest due to failed permission check. 355 357 356 358 To prevent an spte that was converted into a kernel page with cr0.wp=0 357 359 from being written by the kernel after cr0.wp has changed to 1, we make
+3 -2
arch/powerpc/kvm/book3s_hv.c
··· 1952 1952 */ 1953 1953 static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) 1954 1954 { 1955 - struct kvm_vcpu *vcpu; 1955 + struct kvm_vcpu *vcpu, *vnext; 1956 1956 int i; 1957 1957 int srcu_idx; 1958 1958 ··· 1982 1982 */ 1983 1983 if ((threads_per_core > 1) && 1984 1984 ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { 1985 - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 1985 + list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, 1986 + arch.run_list) { 1986 1987 vcpu->arch.ret = -EBUSY; 1987 1988 kvmppc_remove_runnable(vc, vcpu); 1988 1989 wake_up(&vcpu->arch.cpu_run);
+3
arch/x86/include/asm/kvm_host.h
··· 207 207 unsigned nxe:1; 208 208 unsigned cr0_wp:1; 209 209 unsigned smep_andnot_wp:1; 210 + unsigned smap_andnot_wp:1; 210 211 }; 211 212 }; 212 213 ··· 401 400 struct kvm_mmu_memory_cache mmu_page_header_cache; 402 401 403 402 struct fpu guest_fpu; 403 + bool eager_fpu; 404 404 u64 xcr0; 405 405 u64 guest_supported_xcr0; 406 406 u32 guest_xstate_size; ··· 745 743 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); 746 744 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); 747 745 void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); 746 + void (*fpu_activate)(struct kvm_vcpu *vcpu); 748 747 void (*fpu_deactivate)(struct kvm_vcpu *vcpu); 749 748 750 749 void (*tlb_flush)(struct kvm_vcpu *vcpu);
+4
arch/x86/kvm/cpuid.c
··· 16 16 #include <linux/module.h> 17 17 #include <linux/vmalloc.h> 18 18 #include <linux/uaccess.h> 19 + #include <asm/i387.h> /* For use_eager_fpu. Ugh! */ 20 + #include <asm/fpu-internal.h> /* For use_eager_fpu. Ugh! */ 19 21 #include <asm/user.h> 20 22 #include <asm/xsave.h> 21 23 #include "cpuid.h" ··· 96 94 best = kvm_find_cpuid_entry(vcpu, 0xD, 1); 97 95 if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) 98 96 best->ebx = xstate_required_size(vcpu->arch.xcr0, true); 97 + 98 + vcpu->arch.eager_fpu = guest_cpuid_has_mpx(vcpu); 99 99 100 100 /* 101 101 * The existing code assumes virtual address is 48-bit in the canonical
+8
arch/x86/kvm/cpuid.h
··· 117 117 best = kvm_find_cpuid_entry(vcpu, 7, 0); 118 118 return best && (best->ebx & bit(X86_FEATURE_RTM)); 119 119 } 120 + 121 + static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) 122 + { 123 + struct kvm_cpuid_entry2 *best; 124 + 125 + best = kvm_find_cpuid_entry(vcpu, 7, 0); 126 + return best && (best->ebx & bit(X86_FEATURE_MPX)); 127 + } 120 128 #endif
+12 -4
arch/x86/kvm/mmu.c
··· 3736 3736 } 3737 3737 } 3738 3738 3739 - void update_permission_bitmask(struct kvm_vcpu *vcpu, 3740 - struct kvm_mmu *mmu, bool ept) 3739 + static void update_permission_bitmask(struct kvm_vcpu *vcpu, 3740 + struct kvm_mmu *mmu, bool ept) 3741 3741 { 3742 3742 unsigned bit, byte, pfec; 3743 3743 u8 map; ··· 3918 3918 void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) 3919 3919 { 3920 3920 bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); 3921 + bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); 3921 3922 struct kvm_mmu *context = &vcpu->arch.mmu; 3922 3923 3923 3924 MMU_WARN_ON(VALID_PAGE(context->root_hpa)); ··· 3937 3936 context->base_role.cr0_wp = is_write_protection(vcpu); 3938 3937 context->base_role.smep_andnot_wp 3939 3938 = smep && !is_write_protection(vcpu); 3939 + context->base_role.smap_andnot_wp 3940 + = smap && !is_write_protection(vcpu); 3940 3941 } 3941 3942 EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); 3942 3943 ··· 4210 4207 const u8 *new, int bytes) 4211 4208 { 4212 4209 gfn_t gfn = gpa >> PAGE_SHIFT; 4213 - union kvm_mmu_page_role mask = { .word = 0 }; 4214 4210 struct kvm_mmu_page *sp; 4215 4211 LIST_HEAD(invalid_list); 4216 4212 u64 entry, gentry, *spte; 4217 4213 int npte; 4218 4214 bool remote_flush, local_flush, zap_page; 4215 + union kvm_mmu_page_role mask = (union kvm_mmu_page_role) { 4216 + .cr0_wp = 1, 4217 + .cr4_pae = 1, 4218 + .nxe = 1, 4219 + .smep_andnot_wp = 1, 4220 + .smap_andnot_wp = 1, 4221 + }; 4219 4222 4220 4223 /* 4221 4224 * If we don't have indirect shadow pages, it means no page is ··· 4247 4238 ++vcpu->kvm->stat.mmu_pte_write; 4248 4239 kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); 4249 4240 4250 - mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; 4251 4241 for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { 4252 4242 if (detect_write_misaligned(sp, gpa, bytes) || 4253 4243 detect_write_flooding(sp)) {
+2 -2
arch/x86/kvm/mmu.h
··· 71 71 int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); 72 72 void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); 73 73 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly); 74 - void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, 75 - bool ept); 76 74 77 75 static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) 78 76 { ··· 163 165 unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC); 164 166 int index = (pfec >> 1) + 165 167 (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1)); 168 + 169 + WARN_ON(pfec & PFERR_RSVD_MASK); 166 170 167 171 return (mmu->permissions[index] >> pte_access) & 1; 168 172 }
+7
arch/x86/kvm/paging_tmpl.h
··· 718 718 mmu_is_nested(vcpu)); 719 719 if (likely(r != RET_MMIO_PF_INVALID)) 720 720 return r; 721 + 722 + /* 723 + * page fault with PFEC.RSVD = 1 is caused by shadow 724 + * page fault, should not be used to walk guest page 725 + * table. 726 + */ 727 + error_code &= ~PFERR_RSVD_MASK; 721 728 }; 722 729 723 730 r = mmu_topup_memory_caches(vcpu);
+1
arch/x86/kvm/svm.c
··· 4381 4381 .cache_reg = svm_cache_reg, 4382 4382 .get_rflags = svm_get_rflags, 4383 4383 .set_rflags = svm_set_rflags, 4384 + .fpu_activate = svm_fpu_activate, 4384 4385 .fpu_deactivate = svm_fpu_deactivate, 4385 4386 4386 4387 .tlb_flush = svm_flush_tlb,
+1
arch/x86/kvm/vmx.c
··· 10185 10185 .cache_reg = vmx_cache_reg, 10186 10186 .get_rflags = vmx_get_rflags, 10187 10187 .set_rflags = vmx_set_rflags, 10188 + .fpu_activate = vmx_fpu_activate, 10188 10189 .fpu_deactivate = vmx_fpu_deactivate, 10189 10190 10190 10191 .tlb_flush = vmx_flush_tlb,
+19 -7
arch/x86/kvm/x86.c
··· 702 702 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 703 703 { 704 704 unsigned long old_cr4 = kvm_read_cr4(vcpu); 705 - unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | 706 - X86_CR4_PAE | X86_CR4_SMEP; 705 + unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | 706 + X86_CR4_SMEP | X86_CR4_SMAP; 707 + 707 708 if (cr4 & CR4_RESERVED_BITS) 708 709 return 1; 709 710 ··· 744 743 if (((cr4 ^ old_cr4) & pdptr_bits) || 745 744 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) 746 745 kvm_mmu_reset_context(vcpu); 747 - 748 - if ((cr4 ^ old_cr4) & X86_CR4_SMAP) 749 - update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false); 750 746 751 747 if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) 752 748 kvm_update_cpuid(vcpu); ··· 6195 6197 return; 6196 6198 6197 6199 page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); 6200 + if (is_error_page(page)) 6201 + return; 6198 6202 kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page)); 6199 6203 6200 6204 /* ··· 7060 7060 fpu_save_init(&vcpu->arch.guest_fpu); 7061 7061 __kernel_fpu_end(); 7062 7062 ++vcpu->stat.fpu_reload; 7063 - kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); 7063 + if (!vcpu->arch.eager_fpu) 7064 + kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); 7065 + 7064 7066 trace_kvm_fpu(0); 7065 7067 } 7066 7068 ··· 7078 7076 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, 7079 7077 unsigned int id) 7080 7078 { 7079 + struct kvm_vcpu *vcpu; 7080 + 7081 7081 if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) 7082 7082 printk_once(KERN_WARNING 7083 7083 "kvm: SMP vm created on host with unstable TSC; " 7084 7084 "guest TSC will not be reliable\n"); 7085 - return kvm_x86_ops->vcpu_create(kvm, id); 7085 + 7086 + vcpu = kvm_x86_ops->vcpu_create(kvm, id); 7087 + 7088 + /* 7089 + * Activate fpu unconditionally in case the guest needs eager FPU. It will be 7090 + * deactivated soon if it doesn't. 7091 + */ 7092 + kvm_x86_ops->fpu_activate(vcpu); 7093 + return vcpu; 7086 7094 } 7087 7095 7088 7096 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)