Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

tjh.dev / kernel

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
"ARM:

- Fix unexpected sign extension of KVM_ARM_DEVICE_ID_MASK

- Tidy-up handling of AArch32 on asymmetric systems

x86:

- Fix 'missing ENDBR' BUG for fastop functions

Generic:

- Some cleanup and static analyzer patches

- More fixes to KVM_CREATE_VM unwind paths"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: Drop unnecessary initialization of "ops" in kvm_ioctl_create_device()
KVM: Drop unnecessary initialization of "npages" in hva_to_pfn_slow()
x86/kvm: Fix "missing ENDBR" BUG for fastop functions
x86/kvm: Simplify FOP_SETCC()
x86/ibt, objtool: Add IBT_NOSEAL()
KVM: Rename mmu_notifier_* to mmu_invalidate_*
KVM: Rename KVM_PRIVATE_MEM_SLOTS to KVM_INTERNAL_MEM_SLOTS
KVM: MIPS: remove unnecessary definition of KVM_PRIVATE_MEM_SLOTS
KVM: Move coalesced MMIO initialization (back) into kvm_create_vm()
KVM: Unconditionally get a ref to /dev/kvm module when creating a VM
KVM: Properly unwind VM creation if creating debugfs fails
KVM: arm64: Reject 32bit user PSTATE on asymmetric systems
KVM: arm64: Treat PMCR_EL1.LC as RES1 on asymmetric systems
KVM: arm64: Fix compile error due to sign extension

Linus Torvalds 3 years ago ca052cfd 42c54d54

+157 -156

25 changed files

expand all collapse all

arch

arm64

include

asm

kvm_host.h

uapi

asm

kvm.h

kvm

arm.c

guest.c

mmu.c

sys_regs.c

mips

include

asm

kvm_host.h

kvm

mmu.c

powerpc

include

asm

kvm_book3s_64.h

kvm

book3s_64_mmu_host.c

book3s_64_mmu_hv.c

book3s_64_mmu_radix.c

book3s_hv_nested.c

book3s_hv_rm_mmu.c

e500_mmu_host.c

riscv

kvm

mmu.c

x86

include

asm

ibt.h

kvm_host.h

kvm

emulate.c

mmu

mmu.c

paging_tmpl.h

include

linux

kvm_host.h

tools

objtool

check.c

virt

kvm

kvm_main.c

pfncache.c

arch/arm64/include/asm/kvm_host.h

reviewed

··· 929 929 (system_supports_mte() && \ 930 930 test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &(kvm)->arch.flags)) 931 931 932 932 + #define kvm_supports_32bit_el0() \ 933 933 + (system_supports_32bit_el0() && \ 934 934 + !static_branch_unlikely(&arm64_mismatched_32bit_el0)) 935 935 + 932 936 int kvm_trng_call(struct kvm_vcpu *vcpu); 933 937 #ifdef CONFIG_KVM 934 938 extern phys_addr_t hyp_mem_base;

+4 -2

arch/arm64/include/uapi/asm/kvm.h

reviewed

··· 75 75 76 76 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ 77 77 #define KVM_ARM_DEVICE_TYPE_SHIFT 0 78 78 - #define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT) 78 78 + #define KVM_ARM_DEVICE_TYPE_MASK GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ 79 79 + KVM_ARM_DEVICE_TYPE_SHIFT) 79 80 #define KVM_ARM_DEVICE_ID_SHIFT 16 80 80 - #define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT) 81 81 + #define KVM_ARM_DEVICE_ID_MASK GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ 82 82 + KVM_ARM_DEVICE_ID_SHIFT) 81 83 82 84 /* Supported device IDs */ 83 85 #define KVM_ARM_DEVICE_VGIC_V2 0

+1 -2

arch/arm64/kvm/arm.c

reviewed

··· 757 757 if (likely(!vcpu_mode_is_32bit(vcpu))) 758 758 return false; 759 759 760 760 - return !system_supports_32bit_el0() || 761 761 - static_branch_unlikely(&arm64_mismatched_32bit_el0); 760 760 + return !kvm_supports_32bit_el0(); 762 761 } 763 762 764 763 /**

+1 -1

arch/arm64/kvm/guest.c

reviewed

··· 242 242 u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK; 243 243 switch (mode) { 244 244 case PSR_AA32_MODE_USR: 245 245 - if (!system_supports_32bit_el0()) 245 245 + if (!kvm_supports_32bit_el0()) 246 246 return -EINVAL; 247 247 break; 248 248 case PSR_AA32_MODE_FIQ:

+4 -4

arch/arm64/kvm/mmu.c

reviewed

··· 993 993 * THP doesn't start to split while we are adjusting the 994 994 * refcounts. 995 995 * 996 996 - * We are sure this doesn't happen, because mmu_notifier_retry 996 996 + * We are sure this doesn't happen, because mmu_invalidate_retry 997 997 * was successful and we are holding the mmu_lock, so if this 998 998 * THP is trying to split, it will be blocked in the mmu 999 999 * notifier before touching any of the pages, specifically ··· 1188 1188 return ret; 1189 1189 } 1190 1190 1191 1191 - mmu_seq = vcpu->kvm->mmu_notifier_seq; 1191 1191 + mmu_seq = vcpu->kvm->mmu_invalidate_seq; 1192 1192 /* 1193 1193 - * Ensure the read of mmu_notifier_seq happens before we call 1193 1193 + * Ensure the read of mmu_invalidate_seq happens before we call 1194 1194 * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk 1195 1195 * the page we just got a reference to gets unmapped before we have a 1196 1196 * chance to grab the mmu_lock, which ensure that if the page gets ··· 1246 1246 else 1247 1247 write_lock(&kvm->mmu_lock); 1248 1248 pgt = vcpu->arch.hw_mmu->pgt; 1249 1249 - if (mmu_notifier_retry(kvm, mmu_seq)) 1249 1249 + if (mmu_invalidate_retry(kvm, mmu_seq)) 1250 1250 goto out_unlock; 1251 1251 1252 1252 /*

+2 -2

arch/arm64/kvm/sys_regs.c

reviewed

··· 652 652 */ 653 653 val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) 654 654 | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); 655 655 - if (!system_supports_32bit_el0()) 655 655 + if (!kvm_supports_32bit_el0()) 656 656 val |= ARMV8_PMU_PMCR_LC; 657 657 __vcpu_sys_reg(vcpu, r->reg) = val; 658 658 } ··· 701 701 val = __vcpu_sys_reg(vcpu, PMCR_EL0); 702 702 val &= ~ARMV8_PMU_PMCR_MASK; 703 703 val |= p->regval & ARMV8_PMU_PMCR_MASK; 704 704 - if (!system_supports_32bit_el0()) 704 704 + if (!kvm_supports_32bit_el0()) 705 705 val |= ARMV8_PMU_PMCR_LC; 706 706 __vcpu_sys_reg(vcpu, PMCR_EL0) = val; 707 707 kvm_pmu_handle_pmcr(vcpu, val);

-2

arch/mips/include/asm/kvm_host.h

reviewed

··· 84 84 85 85 86 86 #define KVM_MAX_VCPUS 16 87 87 - /* memory slots that does not exposed to userspace */ 88 88 - #define KVM_PRIVATE_MEM_SLOTS 0 89 87 90 88 #define KVM_HALT_POLL_NS_DEFAULT 500000 91 89

+6 -6

arch/mips/kvm/mmu.c

reviewed

··· 615 615 * Used to check for invalidations in progress, of the pfn that is 616 616 * returned by pfn_to_pfn_prot below. 617 617 */ 618 618 - mmu_seq = kvm->mmu_notifier_seq; 618 618 + mmu_seq = kvm->mmu_invalidate_seq; 619 619 /* 620 620 - * Ensure the read of mmu_notifier_seq isn't reordered with PTE reads in 621 621 - * gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't 620 620 + * Ensure the read of mmu_invalidate_seq isn't reordered with PTE reads 621 621 + * in gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't 622 622 * risk the page we get a reference to getting unmapped before we have a 623 623 - * chance to grab the mmu_lock without mmu_notifier_retry() noticing. 623 623 + * chance to grab the mmu_lock without mmu_invalidate_retry() noticing. 624 624 * 625 625 * This smp_rmb() pairs with the effective smp_wmb() of the combination 626 626 * of the pte_unmap_unlock() after the PTE is zapped, and the 627 627 * spin_lock() in kvm_mmu_notifier_invalidate_<page|range_end>() before 628 628 - * mmu_notifier_seq is incremented. 628 628 + * mmu_invalidate_seq is incremented. 629 629 */ 630 630 smp_rmb(); 631 631 ··· 638 638 639 639 spin_lock(&kvm->mmu_lock); 640 640 /* Check if an invalidation has taken place since we got pfn */ 641 641 - if (mmu_notifier_retry(kvm, mmu_seq)) { 641 641 + if (mmu_invalidate_retry(kvm, mmu_seq)) { 642 642 /* 643 643 * This can happen when mappings are changed asynchronously, but 644 644 * also synchronously if a COW is triggered by

+1 -1

arch/powerpc/include/asm/kvm_book3s_64.h

reviewed

··· 666 666 VM_WARN(!spin_is_locked(&kvm->mmu_lock), 667 667 "%s called with kvm mmu_lock not held \n", __func__); 668 668 669 669 - if (mmu_notifier_retry(kvm, mmu_seq)) 669 669 + if (mmu_invalidate_retry(kvm, mmu_seq)) 670 670 return NULL; 671 671 672 672 pte = __find_linux_pte(kvm->mm->pgd, ea, NULL, hshift);

+2 -2

arch/powerpc/kvm/book3s_64_mmu_host.c

reviewed

··· 90 90 unsigned long pfn; 91 91 92 92 /* used to check for invalidations in progress */ 93 93 - mmu_seq = kvm->mmu_notifier_seq; 93 93 + mmu_seq = kvm->mmu_invalidate_seq; 94 94 smp_rmb(); 95 95 96 96 /* Get host physical address for gpa */ ··· 151 151 cpte = kvmppc_mmu_hpte_cache_next(vcpu); 152 152 153 153 spin_lock(&kvm->mmu_lock); 154 154 - if (!cpte || mmu_notifier_retry(kvm, mmu_seq)) { 154 154 + if (!cpte || mmu_invalidate_retry(kvm, mmu_seq)) { 155 155 r = -EAGAIN; 156 156 goto out_unlock; 157 157 }

+2 -2

arch/powerpc/kvm/book3s_64_mmu_hv.c

reviewed

··· 578 578 return -EFAULT; 579 579 580 580 /* used to check for invalidations in progress */ 581 581 - mmu_seq = kvm->mmu_notifier_seq; 581 581 + mmu_seq = kvm->mmu_invalidate_seq; 582 582 smp_rmb(); 583 583 584 584 ret = -EFAULT; ··· 693 693 694 694 /* Check if we might have been invalidated; let the guest retry if so */ 695 695 ret = RESUME_GUEST; 696 696 - if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) { 696 696 + if (mmu_invalidate_retry(vcpu->kvm, mmu_seq)) { 697 697 unlock_rmap(rmap); 698 698 goto out_unlock; 699 699 }

+3 -3

arch/powerpc/kvm/book3s_64_mmu_radix.c

reviewed

··· 640 640 /* Check if we might have been invalidated; let the guest retry if so */ 641 641 spin_lock(&kvm->mmu_lock); 642 642 ret = -EAGAIN; 643 643 - if (mmu_notifier_retry(kvm, mmu_seq)) 643 643 + if (mmu_invalidate_retry(kvm, mmu_seq)) 644 644 goto out_unlock; 645 645 646 646 /* Now traverse again under the lock and change the tree */ ··· 830 830 bool large_enable; 831 831 832 832 /* used to check for invalidations in progress */ 833 833 - mmu_seq = kvm->mmu_notifier_seq; 833 833 + mmu_seq = kvm->mmu_invalidate_seq; 834 834 smp_rmb(); 835 835 836 836 /* ··· 1191 1191 * Increase the mmu notifier sequence number to prevent any page 1192 1192 * fault that read the memslot earlier from writing a PTE. 1193 1193 */ 1194 1194 - kvm->mmu_notifier_seq++; 1194 1194 + kvm->mmu_invalidate_seq++; 1195 1195 spin_unlock(&kvm->mmu_lock); 1196 1196 } 1197 1197

+1 -1

arch/powerpc/kvm/book3s_hv_nested.c

reviewed

··· 1580 1580 /* 2. Find the host pte for this L1 guest real address */ 1581 1581 1582 1582 /* Used to check for invalidations in progress */ 1583 1583 - mmu_seq = kvm->mmu_notifier_seq; 1583 1583 + mmu_seq = kvm->mmu_invalidate_seq; 1584 1584 smp_rmb(); 1585 1585 1586 1586 /* See if can find translation in our partition scoped tables for L1 */

+4 -4

arch/powerpc/kvm/book3s_hv_rm_mmu.c

reviewed

··· 219 219 g_ptel = ptel; 220 220 221 221 /* used later to detect if we might have been invalidated */ 222 222 - mmu_seq = kvm->mmu_notifier_seq; 222 222 + mmu_seq = kvm->mmu_invalidate_seq; 223 223 smp_rmb(); 224 224 225 225 /* Find the memslot (if any) for this address */ ··· 366 366 rmap = real_vmalloc_addr(rmap); 367 367 lock_rmap(rmap); 368 368 /* Check for pending invalidations under the rmap chain lock */ 369 369 - if (mmu_notifier_retry(kvm, mmu_seq)) { 369 369 + if (mmu_invalidate_retry(kvm, mmu_seq)) { 370 370 /* inval in progress, write a non-present HPTE */ 371 371 pteh |= HPTE_V_ABSENT; 372 372 pteh &= ~HPTE_V_VALID; ··· 932 932 int i; 933 933 934 934 /* Used later to detect if we might have been invalidated */ 935 935 - mmu_seq = kvm->mmu_notifier_seq; 935 935 + mmu_seq = kvm->mmu_invalidate_seq; 936 936 smp_rmb(); 937 937 938 938 arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); ··· 960 960 long ret = H_SUCCESS; 961 961 962 962 /* Used later to detect if we might have been invalidated */ 963 963 - mmu_seq = kvm->mmu_notifier_seq; 963 963 + mmu_seq = kvm->mmu_invalidate_seq; 964 964 smp_rmb(); 965 965 966 966 arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);

+2 -2

arch/powerpc/kvm/e500_mmu_host.c

reviewed

··· 339 339 unsigned long flags; 340 340 341 341 /* used to check for invalidations in progress */ 342 342 - mmu_seq = kvm->mmu_notifier_seq; 342 342 + mmu_seq = kvm->mmu_invalidate_seq; 343 343 smp_rmb(); 344 344 345 345 /* ··· 460 460 } 461 461 462 462 spin_lock(&kvm->mmu_lock); 463 463 - if (mmu_notifier_retry(kvm, mmu_seq)) { 463 463 + if (mmu_invalidate_retry(kvm, mmu_seq)) { 464 464 ret = -EAGAIN; 465 465 goto out; 466 466 }

+2 -2

arch/riscv/kvm/mmu.c

reviewed

··· 666 666 return ret; 667 667 } 668 668 669 669 - mmu_seq = kvm->mmu_notifier_seq; 669 669 + mmu_seq = kvm->mmu_invalidate_seq; 670 670 671 671 hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writable); 672 672 if (hfn == KVM_PFN_ERR_HWPOISON) { ··· 686 686 687 687 spin_lock(&kvm->mmu_lock); 688 688 689 689 - if (mmu_notifier_retry(kvm, mmu_seq)) 689 689 + if (mmu_invalidate_retry(kvm, mmu_seq)) 690 690 goto out_unlock; 691 691 692 692 if (writable) {

+11

arch/x86/include/asm/ibt.h

reviewed

··· 31 31 32 32 #define __noendbr __attribute__((nocf_check)) 33 33 34 34 + /* 35 35 + * Create a dummy function pointer reference to prevent objtool from marking 36 36 + * the function as needing to be "sealed" (i.e. ENDBR converted to NOP by 37 37 + * apply_ibt_endbr()). 38 38 + */ 39 39 + #define IBT_NOSEAL(fname) \ 40 40 + ".pushsection .discard.ibt_endbr_noseal\n\t" \ 41 41 + _ASM_PTR fname "\n\t" \ 42 42 + ".popsection\n\t" 43 43 + 34 44 static inline __attribute_const__ u32 gen_endbr(void) 35 45 { 36 46 u32 endbr; ··· 94 84 #ifndef __ASSEMBLY__ 95 85 96 86 #define ASM_ENDBR 87 87 + #define IBT_NOSEAL(name) 97 88 98 89 #define __noendbr 99 90

+1 -1

arch/x86/include/asm/kvm_host.h

reviewed

··· 53 53 #define KVM_MAX_VCPU_IDS (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO) 54 54 55 55 /* memory slots that are not exposed to userspace */ 56 56 - #define KVM_PRIVATE_MEM_SLOTS 3 56 56 + #define KVM_INTERNAL_MEM_SLOTS 3 57 57 58 58 #define KVM_HALT_POLL_NS_DEFAULT 200000 59 59

+6 -20

arch/x86/kvm/emulate.c

reviewed

··· 326 326 ".align " __stringify(FASTOP_SIZE) " \n\t" \ 327 327 ".type " name ", @function \n\t" \ 328 328 name ":\n\t" \ 329 329 - ASM_ENDBR 329 329 + ASM_ENDBR \ 330 330 + IBT_NOSEAL(name) 330 331 331 332 #define FOP_FUNC(name) \ 332 333 __FOP_FUNC(#name) ··· 447 446 FOP_END 448 447 449 448 /* Special case for SETcc - 1 instruction per cc */ 450 450 - 451 451 - /* 452 452 - * Depending on .config the SETcc functions look like: 453 453 - * 454 454 - * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT] 455 455 - * SETcc %al [3 bytes] 456 456 - * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] 457 457 - * INT3 [1 byte; CONFIG_SLS] 458 458 - */ 459 459 - #define SETCC_ALIGN 16 460 460 - 461 449 #define FOP_SETCC(op) \ 462 462 - ".align " __stringify(SETCC_ALIGN) " \n\t" \ 463 463 - ".type " #op ", @function \n\t" \ 464 464 - #op ": \n\t" \ 465 465 - ASM_ENDBR \ 450 450 + FOP_FUNC(op) \ 466 451 #op " %al \n\t" \ 467 467 - __FOP_RET(#op) \ 468 468 - ".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t" 452 452 + FOP_RET(op) 469 453 470 470 - __FOP_START(setcc, SETCC_ALIGN) 454 454 + FOP_START(setcc) 471 455 FOP_SETCC(seto) 472 456 FOP_SETCC(setno) 473 457 FOP_SETCC(setc) ··· 1065 1079 static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) 1066 1080 { 1067 1081 u8 rc; 1068 1068 - void (*fop)(void) = (void *)em_setcc + SETCC_ALIGN * (condition & 0xf); 1082 1082 + void (*fop)(void) = (void *)em_setcc + FASTOP_SIZE * (condition & 0xf); 1069 1083 1070 1084 flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; 1071 1085 asm("push %[flags]; popf; " CALL_NOSPEC

+7 -7

arch/x86/kvm/mmu/mmu.c

reviewed

··· 2914 2914 * If addresses are being invalidated, skip prefetching to avoid 2915 2915 * accidentally prefetching those addresses. 2916 2916 */ 2917 2917 - if (unlikely(vcpu->kvm->mmu_notifier_count)) 2917 2917 + if (unlikely(vcpu->kvm->mmu_invalidate_in_progress)) 2918 2918 return; 2919 2919 2920 2920 __direct_pte_prefetch(vcpu, sp, sptep); ··· 2928 2928 * 2929 2929 * There are several ways to safely use this helper: 2930 2930 * 2931 2931 - * - Check mmu_notifier_retry_hva() after grabbing the mapping level, before 2931 2931 + * - Check mmu_invalidate_retry_hva() after grabbing the mapping level, before 2932 2932 * consuming it. In this case, mmu_lock doesn't need to be held during the 2933 2933 * lookup, but it does need to be held while checking the MMU notifier. 2934 2934 * ··· 3056 3056 return; 3057 3057 3058 3058 /* 3059 3059 - * mmu_notifier_retry() was successful and mmu_lock is held, so 3059 3059 + * mmu_invalidate_retry() was successful and mmu_lock is held, so 3060 3060 * the pmd can't be split from under us. 3061 3061 */ 3062 3062 fault->goal_level = fault->req_level; ··· 4203 4203 return true; 4204 4204 4205 4205 return fault->slot && 4206 4206 - mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva); 4206 4206 + mmu_invalidate_retry_hva(vcpu->kvm, mmu_seq, fault->hva); 4207 4207 } 4208 4208 4209 4209 static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) ··· 4227 4227 if (r) 4228 4228 return r; 4229 4229 4230 4230 - mmu_seq = vcpu->kvm->mmu_notifier_seq; 4230 4230 + mmu_seq = vcpu->kvm->mmu_invalidate_seq; 4231 4231 smp_rmb(); 4232 4232 4233 4233 r = kvm_faultin_pfn(vcpu, fault); ··· 6055 6055 6056 6056 write_lock(&kvm->mmu_lock); 6057 6057 6058 6058 - kvm_inc_notifier_count(kvm, gfn_start, gfn_end); 6058 6058 + kvm_mmu_invalidate_begin(kvm, gfn_start, gfn_end); 6059 6059 6060 6060 flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end); 6061 6061 ··· 6069 6069 kvm_flush_remote_tlbs_with_address(kvm, gfn_start, 6070 6070 gfn_end - gfn_start); 6071 6071 6072 6072 - kvm_dec_notifier_count(kvm, gfn_start, gfn_end); 6072 6072 + kvm_mmu_invalidate_end(kvm, gfn_start, gfn_end); 6073 6073 6074 6074 write_unlock(&kvm->mmu_lock); 6075 6075 }

+2 -2

arch/x86/kvm/mmu/paging_tmpl.h

reviewed

··· 589 589 * If addresses are being invalidated, skip prefetching to avoid 590 590 * accidentally prefetching those addresses. 591 591 */ 592 592 - if (unlikely(vcpu->kvm->mmu_notifier_count)) 592 592 + if (unlikely(vcpu->kvm->mmu_invalidate_in_progress)) 593 593 return; 594 594 595 595 if (sp->role.direct) ··· 838 838 else 839 839 fault->max_level = walker.level; 840 840 841 841 - mmu_seq = vcpu->kvm->mmu_notifier_seq; 841 841 + mmu_seq = vcpu->kvm->mmu_invalidate_seq; 842 842 smp_rmb(); 843 843 844 844 r = kvm_faultin_pfn(vcpu, fault);

+34 -32

include/linux/kvm_host.h

reviewed

··· 656 656 }; 657 657 #endif 658 658 659 659 - #ifndef KVM_PRIVATE_MEM_SLOTS 660 660 - #define KVM_PRIVATE_MEM_SLOTS 0 659 659 + #ifndef KVM_INTERNAL_MEM_SLOTS 660 660 + #define KVM_INTERNAL_MEM_SLOTS 0 661 661 #endif 662 662 663 663 #define KVM_MEM_SLOTS_NUM SHRT_MAX 664 664 - #define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_PRIVATE_MEM_SLOTS) 664 664 + #define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_INTERNAL_MEM_SLOTS) 665 665 666 666 #ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE 667 667 static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) ··· 765 765 766 766 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 767 767 struct mmu_notifier mmu_notifier; 768 768 - unsigned long mmu_notifier_seq; 769 769 - long mmu_notifier_count; 770 770 - unsigned long mmu_notifier_range_start; 771 771 - unsigned long mmu_notifier_range_end; 768 768 + unsigned long mmu_invalidate_seq; 769 769 + long mmu_invalidate_in_progress; 770 770 + unsigned long mmu_invalidate_range_start; 771 771 + unsigned long mmu_invalidate_range_end; 772 772 #endif 773 773 struct list_head devices; 774 774 u64 manual_dirty_log_protect; ··· 1357 1357 void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); 1358 1358 #endif 1359 1359 1360 1360 - void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start, 1361 1361 - unsigned long end); 1362 1362 - void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start, 1363 1363 - unsigned long end); 1360 1360 + void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start, 1361 1361 + unsigned long end); 1362 1362 + void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start, 1363 1363 + unsigned long end); 1364 1364 1365 1365 long kvm_arch_dev_ioctl(struct file *filp, 1366 1366 unsigned int ioctl, unsigned long arg); ··· 1907 1907 extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[]; 1908 1908 1909 1909 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 1910 1910 - static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) 1910 1910 + static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq) 1911 1911 { 1912 1912 - if (unlikely(kvm->mmu_notifier_count)) 1912 1912 + if (unlikely(kvm->mmu_invalidate_in_progress)) 1913 1913 return 1; 1914 1914 /* 1915 1915 - * Ensure the read of mmu_notifier_count happens before the read 1916 1916 - * of mmu_notifier_seq. This interacts with the smp_wmb() in 1917 1917 - * mmu_notifier_invalidate_range_end to make sure that the caller 1918 1918 - * either sees the old (non-zero) value of mmu_notifier_count or 1919 1919 - * the new (incremented) value of mmu_notifier_seq. 1920 1920 - * PowerPC Book3s HV KVM calls this under a per-page lock 1921 1921 - * rather than under kvm->mmu_lock, for scalability, so 1922 1922 - * can't rely on kvm->mmu_lock to keep things ordered. 1915 1915 + * Ensure the read of mmu_invalidate_in_progress happens before 1916 1916 + * the read of mmu_invalidate_seq. This interacts with the 1917 1917 + * smp_wmb() in mmu_notifier_invalidate_range_end to make sure 1918 1918 + * that the caller either sees the old (non-zero) value of 1919 1919 + * mmu_invalidate_in_progress or the new (incremented) value of 1920 1920 + * mmu_invalidate_seq. 1921 1921 + * 1922 1922 + * PowerPC Book3s HV KVM calls this under a per-page lock rather 1923 1923 + * than under kvm->mmu_lock, for scalability, so can't rely on 1924 1924 + * kvm->mmu_lock to keep things ordered. 1923 1925 */ 1924 1926 smp_rmb(); 1925 1925 - if (kvm->mmu_notifier_seq != mmu_seq) 1927 1927 + if (kvm->mmu_invalidate_seq != mmu_seq) 1926 1928 return 1; 1927 1929 return 0; 1928 1930 } 1929 1931 1930 1930 - static inline int mmu_notifier_retry_hva(struct kvm *kvm, 1931 1931 - unsigned long mmu_seq, 1932 1932 - unsigned long hva) 1932 1932 + static inline int mmu_invalidate_retry_hva(struct kvm *kvm, 1933 1933 + unsigned long mmu_seq, 1934 1934 + unsigned long hva) 1933 1935 { 1934 1936 lockdep_assert_held(&kvm->mmu_lock); 1935 1937 /* 1936 1936 - * If mmu_notifier_count is non-zero, then the range maintained by 1937 1937 - * kvm_mmu_notifier_invalidate_range_start contains all addresses that 1938 1938 - * might be being invalidated. Note that it may include some false 1938 1938 + * If mmu_invalidate_in_progress is non-zero, then the range maintained 1939 1939 + * by kvm_mmu_notifier_invalidate_range_start contains all addresses 1940 1940 + * that might be being invalidated. Note that it may include some false 1939 1941 * positives, due to shortcuts when handing concurrent invalidations. 1940 1942 */ 1941 1941 - if (unlikely(kvm->mmu_notifier_count) && 1942 1942 - hva >= kvm->mmu_notifier_range_start && 1943 1943 - hva < kvm->mmu_notifier_range_end) 1943 1943 + if (unlikely(kvm->mmu_invalidate_in_progress) && 1944 1944 + hva >= kvm->mmu_invalidate_range_start && 1945 1945 + hva < kvm->mmu_invalidate_range_end) 1944 1946 return 1; 1945 1945 - if (kvm->mmu_notifier_seq != mmu_seq) 1947 1947 + if (kvm->mmu_invalidate_seq != mmu_seq) 1946 1948 return 1; 1947 1949 return 0; 1948 1950 }

+2 -1

tools/objtool/check.c

reviewed

··· 4096 4096 * These sections can reference text addresses, but not with 4097 4097 * the intent to indirect branch to them. 4098 4098 */ 4099 4099 - if (!strncmp(sec->name, ".discard", 8) || 4099 4099 + if ((!strncmp(sec->name, ".discard", 8) && 4100 4100 + strcmp(sec->name, ".discard.ibt_endbr_noseal")) || 4100 4101 !strncmp(sec->name, ".debug", 6) || 4101 4102 !strcmp(sec->name, ".altinstructions") || 4102 4103 !strcmp(sec->name, ".ibt_endbr_seal") ||

+46 -49

virt/kvm/kvm_main.c

reviewed

··· 702 702 703 703 /* 704 704 * .change_pte() must be surrounded by .invalidate_range_{start,end}(). 705 705 - * If mmu_notifier_count is zero, then no in-progress invalidations, 706 706 - * including this one, found a relevant memslot at start(); rechecking 707 707 - * memslots here is unnecessary. Note, a false positive (count elevated 708 708 - * by a different invalidation) is sub-optimal but functionally ok. 705 705 + * If mmu_invalidate_in_progress is zero, then no in-progress 706 706 + * invalidations, including this one, found a relevant memslot at 707 707 + * start(); rechecking memslots here is unnecessary. Note, a false 708 708 + * positive (count elevated by a different invalidation) is sub-optimal 709 709 + * but functionally ok. 709 710 */ 710 711 WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count)); 711 711 - if (!READ_ONCE(kvm->mmu_notifier_count)) 712 712 + if (!READ_ONCE(kvm->mmu_invalidate_in_progress)) 712 713 return; 713 714 714 715 kvm_handle_hva_range(mn, address, address + 1, pte, kvm_set_spte_gfn); 715 716 } 716 717 717 717 - void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start, 718 718 - unsigned long end) 718 718 + void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start, 719 719 + unsigned long end) 719 720 { 720 721 /* 721 722 * The count increase must become visible at unlock time as no 722 723 * spte can be established without taking the mmu_lock and 723 724 * count is also read inside the mmu_lock critical section. 724 725 */ 725 725 - kvm->mmu_notifier_count++; 726 726 - if (likely(kvm->mmu_notifier_count == 1)) { 727 727 - kvm->mmu_notifier_range_start = start; 728 728 - kvm->mmu_notifier_range_end = end; 726 726 + kvm->mmu_invalidate_in_progress++; 727 727 + if (likely(kvm->mmu_invalidate_in_progress == 1)) { 728 728 + kvm->mmu_invalidate_range_start = start; 729 729 + kvm->mmu_invalidate_range_end = end; 729 730 } else { 730 731 /* 731 732 * Fully tracking multiple concurrent ranges has diminishing ··· 737 736 * accumulate and persist until all outstanding invalidates 738 737 * complete. 739 738 */ 740 740 - kvm->mmu_notifier_range_start = 741 741 - min(kvm->mmu_notifier_range_start, start); 742 742 - kvm->mmu_notifier_range_end = 743 743 - max(kvm->mmu_notifier_range_end, end); 739 739 + kvm->mmu_invalidate_range_start = 740 740 + min(kvm->mmu_invalidate_range_start, start); 741 741 + kvm->mmu_invalidate_range_end = 742 742 + max(kvm->mmu_invalidate_range_end, end); 744 743 } 745 744 } 746 745 ··· 753 752 .end = range->end, 754 753 .pte = __pte(0), 755 754 .handler = kvm_unmap_gfn_range, 756 756 - .on_lock = kvm_inc_notifier_count, 755 755 + .on_lock = kvm_mmu_invalidate_begin, 757 756 .on_unlock = kvm_arch_guest_memory_reclaimed, 758 757 .flush_on_ret = true, 759 758 .may_block = mmu_notifier_range_blockable(range), ··· 764 763 /* 765 764 * Prevent memslot modification between range_start() and range_end() 766 765 * so that conditionally locking provides the same result in both 767 767 - * functions. Without that guarantee, the mmu_notifier_count 766 766 + * functions. Without that guarantee, the mmu_invalidate_in_progress 768 767 * adjustments will be imbalanced. 769 768 * 770 769 * Pairs with the decrement in range_end(). ··· 780 779 * any given time, and the caches themselves can check for hva overlap, 781 780 * i.e. don't need to rely on memslot overlap checks for performance. 782 781 * Because this runs without holding mmu_lock, the pfn caches must use 783 783 - * mn_active_invalidate_count (see above) instead of mmu_notifier_count. 782 782 + * mn_active_invalidate_count (see above) instead of 783 783 + * mmu_invalidate_in_progress. 784 784 */ 785 785 gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end, 786 786 hva_range.may_block); ··· 791 789 return 0; 792 790 } 793 791 794 794 - void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start, 795 795 - unsigned long end) 792 792 + void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start, 793 793 + unsigned long end) 796 794 { 797 795 /* 798 796 * This sequence increase will notify the kvm page fault that 799 797 * the page that is going to be mapped in the spte could have 800 798 * been freed. 801 799 */ 802 802 - kvm->mmu_notifier_seq++; 800 800 + kvm->mmu_invalidate_seq++; 803 801 smp_wmb(); 804 802 /* 805 803 * The above sequence increase must be visible before the 806 804 * below count decrease, which is ensured by the smp_wmb above 807 807 - * in conjunction with the smp_rmb in mmu_notifier_retry(). 805 805 + * in conjunction with the smp_rmb in mmu_invalidate_retry(). 808 806 */ 809 809 - kvm->mmu_notifier_count--; 807 807 + kvm->mmu_invalidate_in_progress--; 810 808 } 811 809 812 810 static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, ··· 818 816 .end = range->end, 819 817 .pte = __pte(0), 820 818 .handler = (void *)kvm_null_fn, 821 821 - .on_lock = kvm_dec_notifier_count, 819 819 + .on_lock = kvm_mmu_invalidate_end, 822 820 .on_unlock = (void *)kvm_null_fn, 823 821 .flush_on_ret = false, 824 822 .may_block = mmu_notifier_range_blockable(range), ··· 839 837 if (wake) 840 838 rcuwait_wake_up(&kvm->mn_memslots_update_rcuwait); 841 839 842 842 - BUG_ON(kvm->mmu_notifier_count < 0); 840 840 + BUG_ON(kvm->mmu_invalidate_in_progress < 0); 843 841 } 844 842 845 843 static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, ··· 1136 1134 if (!kvm) 1137 1135 return ERR_PTR(-ENOMEM); 1138 1136 1137 1137 + /* KVM is pinned via open("/dev/kvm"), the fd passed to this ioctl(). */ 1138 1138 + __module_get(kvm_chardev_ops.owner); 1139 1139 + 1139 1140 KVM_MMU_LOCK_INIT(kvm); 1140 1141 mmgrab(current->mm); 1141 1142 kvm->mm = current->mm; ··· 1216 1211 if (r) 1217 1212 goto out_err_no_mmu_notifier; 1218 1213 1214 1214 + r = kvm_coalesced_mmio_init(kvm); 1215 1215 + if (r < 0) 1216 1216 + goto out_no_coalesced_mmio; 1217 1217 + 1218 1218 + r = kvm_create_vm_debugfs(kvm, fdname); 1219 1219 + if (r) 1220 1220 + goto out_err_no_debugfs; 1221 1221 + 1219 1222 r = kvm_arch_post_init_vm(kvm); 1220 1223 if (r) 1221 1221 - goto out_err_mmu_notifier; 1224 1224 + goto out_err; 1222 1225 1223 1226 mutex_lock(&kvm_lock); 1224 1227 list_add(&kvm->vm_list, &vm_list); ··· 1235 1222 preempt_notifier_inc(); 1236 1223 kvm_init_pm_notifier(kvm); 1237 1224 1238 1238 - /* 1239 1239 - * When the fd passed to this ioctl() is opened it pins the module, 1240 1240 - * but try_module_get() also prevents getting a reference if the module 1241 1241 - * is in MODULE_STATE_GOING (e.g. if someone ran "rmmod --wait"). 1242 1242 - */ 1243 1243 - if (!try_module_get(kvm_chardev_ops.owner)) { 1244 1244 - r = -ENODEV; 1245 1245 - goto out_err_mmu_notifier; 1246 1246 - } 1247 1247 - 1248 1248 - r = kvm_create_vm_debugfs(kvm, fdname); 1249 1249 - if (r) 1250 1250 - goto out_err; 1251 1251 - 1252 1225 return kvm; 1253 1226 1254 1227 out_err: 1255 1255 - module_put(kvm_chardev_ops.owner); 1256 1256 - out_err_mmu_notifier: 1228 1228 + kvm_destroy_vm_debugfs(kvm); 1229 1229 + out_err_no_debugfs: 1230 1230 + kvm_coalesced_mmio_free(kvm); 1231 1231 + out_no_coalesced_mmio: 1257 1232 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 1258 1233 if (kvm->mmu_notifier.ops) 1259 1234 mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); ··· 1260 1259 out_err_no_srcu: 1261 1260 kvm_arch_free_vm(kvm); 1262 1261 mmdrop(current->mm); 1262 1262 + module_put(kvm_chardev_ops.owner); 1263 1263 return ERR_PTR(r); 1264 1264 } 1265 1265 ··· 2518 2516 { 2519 2517 unsigned int flags = FOLL_HWPOISON; 2520 2518 struct page *page; 2521 2521 - int npages = 0; 2519 2519 + int npages; 2522 2520 2523 2521 might_sleep(); 2524 2522 ··· 4380 4378 static int kvm_ioctl_create_device(struct kvm *kvm, 4381 4379 struct kvm_create_device *cd) 4382 4380 { 4383 4383 - const struct kvm_device_ops *ops = NULL; 4381 4381 + const struct kvm_device_ops *ops; 4384 4382 struct kvm_device *dev; 4385 4383 bool test = cd->flags & KVM_CREATE_DEVICE_TEST; 4386 4384 int type; ··· 4915 4913 goto put_fd; 4916 4914 } 4917 4915 4918 4918 - #ifdef CONFIG_KVM_MMIO 4919 4919 - r = kvm_coalesced_mmio_init(kvm); 4920 4920 - if (r < 0) 4921 4921 - goto put_kvm; 4922 4922 - #endif 4923 4916 file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); 4924 4917 if (IS_ERR(file)) { 4925 4918 r = PTR_ERR(file);

+9 -8

virt/kvm/pfncache.c

reviewed

··· 112 112 { 113 113 /* 114 114 * mn_active_invalidate_count acts for all intents and purposes 115 115 - * like mmu_notifier_count here; but the latter cannot be used 116 116 - * here because the invalidation of caches in the mmu_notifier 117 117 - * event occurs _before_ mmu_notifier_count is elevated. 115 115 + * like mmu_invalidate_in_progress here; but the latter cannot 116 116 + * be used here because the invalidation of caches in the 117 117 + * mmu_notifier event occurs _before_ mmu_invalidate_in_progress 118 118 + * is elevated. 118 119 * 119 120 * Note, it does not matter that mn_active_invalidate_count 120 121 * is not protected by gpc->lock. It is guaranteed to 121 122 * be elevated before the mmu_notifier acquires gpc->lock, and 122 122 - * isn't dropped until after mmu_notifier_seq is updated. 123 123 + * isn't dropped until after mmu_invalidate_seq is updated. 123 124 */ 124 125 if (kvm->mn_active_invalidate_count) 125 126 return true; 126 127 127 128 /* 128 129 * Ensure mn_active_invalidate_count is read before 129 129 - * mmu_notifier_seq. This pairs with the smp_wmb() in 130 130 + * mmu_invalidate_seq. This pairs with the smp_wmb() in 130 131 * mmu_notifier_invalidate_range_end() to guarantee either the 131 132 * old (non-zero) value of mn_active_invalidate_count or the 132 132 - * new (incremented) value of mmu_notifier_seq is observed. 133 133 + * new (incremented) value of mmu_invalidate_seq is observed. 133 134 */ 134 135 smp_rmb(); 135 135 - return kvm->mmu_notifier_seq != mmu_seq; 136 136 + return kvm->mmu_invalidate_seq != mmu_seq; 136 137 } 137 138 138 139 static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) ··· 156 155 gpc->valid = false; 157 156 158 157 do { 159 159 - mmu_seq = kvm->mmu_notifier_seq; 158 158 + mmu_seq = kvm->mmu_invalidate_seq; 160 159 smp_rmb(); 161 160 162 161 write_unlock_irq(&gpc->lock);