Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm updates from Paolo Bonzini:
"The bulk of the changes here is a largish change to guest_memfd,
delaying the clearing and encryption of guest-private pages until they
are actually added to guest page tables. This started as "let's make
it impossible to misuse the API" for SEV-SNP; but then it ballooned a
bit.

The new logic is generally simpler and more ready for hugepage support
in guest_memfd.

Summary:

- fix latent bug in how usage of large pages is determined for
confidential VMs

- fix "underline too short" in docs

- eliminate log spam from limited APIC timer periods

- disallow pre-faulting of memory before SEV-SNP VMs are initialized

- delay clearing and encrypting private memory until it is added to
guest page tables

- this change also enables another small cleanup: the checks in
SNP_LAUNCH_UPDATE that limit it to non-populated, private pages can
now be moved in the common kvm_gmem_populate() function

- fix compilation error that the RISC-V merge introduced in selftests"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: x86/mmu: fix determination of max NPT mapping level for private pages
KVM: riscv: selftests: Fix compile error
KVM: guest_memfd: abstract how prepared folios are recorded
KVM: guest_memfd: let kvm_gmem_populate() operate only on private gfns
KVM: extend kvm_range_has_memory_attributes() to check subset of attributes
KVM: cleanup and add shortcuts to kvm_range_has_memory_attributes()
KVM: guest_memfd: move check for already-populated page to common code
KVM: remove kvm_arch_gmem_prepare_needed()
KVM: guest_memfd: make kvm_gmem_prepare_folio() operate on a single struct kvm
KVM: guest_memfd: delay kvm_gmem_prepare_folio() until the memory is passed to the guest
KVM: guest_memfd: return locked folio from __kvm_gmem_get_pfn
KVM: rename CONFIG_HAVE_KVM_GMEM_* to CONFIG_HAVE_KVM_ARCH_GMEM_*
KVM: guest_memfd: do not go through struct page
KVM: guest_memfd: delay folio_mark_uptodate() until after successful preparation
KVM: guest_memfd: return folio from __kvm_gmem_get_pfn()
KVM: x86: disallow pre-fault for SNP VMs before initialization
KVM: Documentation: Fix title underline too short warning
KVM: x86: Eliminate log spam from limited APIC timer periods

+214 -159
+7 -1
Documentation/virt/kvm/api.rst
··· 6368 6368 See KVM_SET_USER_MEMORY_REGION2 for additional details. 6369 6369 6370 6370 4.143 KVM_PRE_FAULT_MEMORY 6371 - ------------------------ 6371 + --------------------------- 6372 6372 6373 6373 :Capability: KVM_CAP_PRE_FAULT_MEMORY 6374 6374 :Architectures: none ··· 6404 6404 for the current vCPU state. KVM maps memory as if the vCPU generated a 6405 6405 stage-2 read page fault, e.g. faults in memory as needed, but doesn't break 6406 6406 CoW. However, KVM does not mark any newly created stage-2 PTE as Accessed. 6407 + 6408 + In the case of confidential VM types where there is an initial set up of 6409 + private guest memory before the guest is 'finalized'/measured, this ioctl 6410 + should only be issued after completing all the necessary setup to put the 6411 + guest into a 'finalized' state so that the above semantics can be reliably 6412 + ensured. 6407 6413 6408 6414 In some cases, multiple vCPUs might share the page tables. In this 6409 6415 case, the ioctl can be called in parallel.
+1
arch/x86/include/asm/kvm_host.h
··· 1305 1305 u8 vm_type; 1306 1306 bool has_private_mem; 1307 1307 bool has_protected_state; 1308 + bool pre_fault_allowed; 1308 1309 struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; 1309 1310 struct list_head active_mmu_pages; 1310 1311 struct list_head zapped_obsolete_pages;
+2 -2
arch/x86/kvm/Kconfig
··· 141 141 depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m) 142 142 select ARCH_HAS_CC_PLATFORM 143 143 select KVM_GENERIC_PRIVATE_MEM 144 - select HAVE_KVM_GMEM_PREPARE 145 - select HAVE_KVM_GMEM_INVALIDATE 144 + select HAVE_KVM_ARCH_GMEM_PREPARE 145 + select HAVE_KVM_ARCH_GMEM_INVALIDATE 146 146 help 147 147 Provides support for launching Encrypted VMs (SEV) and Encrypted VMs 148 148 with Encrypted State (SEV-ES) on AMD processors.
+1 -1
arch/x86/kvm/lapic.c
··· 1743 1743 s64 min_period = min_timer_period_us * 1000LL; 1744 1744 1745 1745 if (apic->lapic_timer.period < min_period) { 1746 - pr_info_ratelimited( 1746 + pr_info_once( 1747 1747 "vcpu %i: requested %lld ns " 1748 1748 "lapic timer period limited to %lld ns\n", 1749 1749 apic->vcpu->vcpu_id,
+5 -2
arch/x86/kvm/mmu/mmu.c
··· 4335 4335 if (req_max_level) 4336 4336 max_level = min(max_level, req_max_level); 4337 4337 4338 - return req_max_level; 4338 + return max_level; 4339 4339 } 4340 4340 4341 4341 static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, ··· 4742 4742 u8 level = PG_LEVEL_4K; 4743 4743 u64 end; 4744 4744 int r; 4745 + 4746 + if (!vcpu->kvm->arch.pre_fault_allowed) 4747 + return -EOPNOTSUPP; 4745 4748 4746 4749 /* 4747 4750 * reload is efficient when called repeatedly, so we can do it on ··· 7513 7510 const unsigned long end = start + KVM_PAGES_PER_HPAGE(level); 7514 7511 7515 7512 if (level == PG_LEVEL_2M) 7516 - return kvm_range_has_memory_attributes(kvm, start, end, attrs); 7513 + return kvm_range_has_memory_attributes(kvm, start, end, ~0, attrs); 7517 7514 7518 7515 for (gfn = start; gfn < end; gfn += KVM_PAGES_PER_HPAGE(level - 1)) { 7519 7516 if (hugepage_test_mixed(slot, gfn, level - 1) ||
+9 -8
arch/x86/kvm/svm/sev.c
··· 2279 2279 bool assigned; 2280 2280 int level; 2281 2281 2282 - if (!kvm_mem_is_private(kvm, gfn)) { 2283 - pr_debug("%s: Failed to ensure GFN 0x%llx has private memory attribute set\n", 2284 - __func__, gfn); 2285 - ret = -EINVAL; 2286 - goto err; 2287 - } 2288 - 2289 2282 ret = snp_lookup_rmpentry((u64)pfn + i, &assigned, &level); 2290 2283 if (ret || assigned) { 2291 2284 pr_debug("%s: Failed to ensure GFN 0x%llx RMP entry is initial shared state, ret: %d assigned: %d\n", 2292 2285 __func__, gfn, ret, assigned); 2293 - ret = -EINVAL; 2286 + ret = ret ? -EINVAL : -EEXIST; 2294 2287 goto err; 2295 2288 } 2296 2289 ··· 2541 2548 memcpy(data->host_data, params.host_data, KVM_SEV_SNP_FINISH_DATA_SIZE); 2542 2549 data->gctx_paddr = __psp_pa(sev->snp_context); 2543 2550 ret = sev_issue_cmd(kvm, SEV_CMD_SNP_LAUNCH_FINISH, data, &argp->error); 2551 + 2552 + /* 2553 + * Now that there will be no more SNP_LAUNCH_UPDATE ioctls, private pages 2554 + * can be given to the guest simply by marking the RMP entry as private. 2555 + * This can happen on first access and also with KVM_PRE_FAULT_MEMORY. 2556 + */ 2557 + if (!ret) 2558 + kvm->arch.pre_fault_allowed = true; 2544 2559 2545 2560 kfree(id_auth); 2546 2561
+1
arch/x86/kvm/svm/svm.c
··· 4949 4949 to_kvm_sev_info(kvm)->need_init = true; 4950 4950 4951 4951 kvm->arch.has_private_mem = (type == KVM_X86_SNP_VM); 4952 + kvm->arch.pre_fault_allowed = !kvm->arch.has_private_mem; 4952 4953 } 4953 4954 4954 4955 if (!pause_filter_count || !pause_filter_thresh)
+5 -7
arch/x86/kvm/x86.c
··· 12646 12646 kvm->arch.vm_type = type; 12647 12647 kvm->arch.has_private_mem = 12648 12648 (type == KVM_X86_SW_PROTECTED_VM); 12649 + /* Decided by the vendor code for other VM types. */ 12650 + kvm->arch.pre_fault_allowed = 12651 + type == KVM_X86_DEFAULT_VM || type == KVM_X86_SW_PROTECTED_VM; 12649 12652 12650 12653 ret = kvm_page_track_init(kvm); 12651 12654 if (ret) ··· 13644 13641 } 13645 13642 EXPORT_SYMBOL_GPL(kvm_arch_no_poll); 13646 13643 13647 - #ifdef CONFIG_HAVE_KVM_GMEM_PREPARE 13648 - bool kvm_arch_gmem_prepare_needed(struct kvm *kvm) 13649 - { 13650 - return kvm->arch.vm_type == KVM_X86_SNP_VM; 13651 - } 13652 - 13644 + #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE 13653 13645 int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order) 13654 13646 { 13655 13647 return kvm_x86_call(gmem_prepare)(kvm, pfn, gfn, max_order); 13656 13648 } 13657 13649 #endif 13658 13650 13659 - #ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE 13651 + #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE 13660 13652 void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end) 13661 13653 { 13662 13654 kvm_x86_call(gmem_invalidate)(start, end);
+5 -4
include/linux/kvm_host.h
··· 2414 2414 } 2415 2415 2416 2416 bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end, 2417 - unsigned long attrs); 2417 + unsigned long mask, unsigned long attrs); 2418 2418 bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, 2419 2419 struct kvm_gfn_range *range); 2420 2420 bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, ··· 2445 2445 } 2446 2446 #endif /* CONFIG_KVM_PRIVATE_MEM */ 2447 2447 2448 - #ifdef CONFIG_HAVE_KVM_GMEM_PREPARE 2448 + #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE 2449 2449 int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); 2450 - bool kvm_arch_gmem_prepare_needed(struct kvm *kvm); 2451 2450 #endif 2452 2451 2452 + #ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM 2453 2453 /** 2454 2454 * kvm_gmem_populate() - Populate/prepare a GPA range with guest data 2455 2455 * ··· 2476 2476 2477 2477 long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages, 2478 2478 kvm_gmem_populate_cb post_populate, void *opaque); 2479 + #endif 2479 2480 2480 - #ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE 2481 + #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE 2481 2482 void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end); 2482 2483 #endif 2483 2484
+4 -4
tools/testing/selftests/kvm/riscv/get-reg-list.c
··· 961 961 KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC); 962 962 KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX); 963 963 KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS); 964 - KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA), 965 - KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB), 966 - KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD), 967 - KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF), 964 + KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA); 965 + KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB); 966 + KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD); 967 + KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF); 968 968 KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP); 969 969 KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA); 970 970 KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH);
+2 -2
virt/kvm/Kconfig
··· 113 113 select KVM_PRIVATE_MEM 114 114 bool 115 115 116 - config HAVE_KVM_GMEM_PREPARE 116 + config HAVE_KVM_ARCH_GMEM_PREPARE 117 117 bool 118 118 depends on KVM_PRIVATE_MEM 119 119 120 - config HAVE_KVM_GMEM_INVALIDATE 120 + config HAVE_KVM_ARCH_GMEM_INVALIDATE 121 121 bool 122 122 depends on KVM_PRIVATE_MEM
+136 -91
virt/kvm/guest_memfd.c
··· 13 13 struct list_head entry; 14 14 }; 15 15 16 - static int kvm_gmem_prepare_folio(struct inode *inode, pgoff_t index, struct folio *folio) 16 + /** 17 + * folio_file_pfn - like folio_file_page, but return a pfn. 18 + * @folio: The folio which contains this index. 19 + * @index: The index we want to look up. 20 + * 21 + * Return: The pfn for this index. 22 + */ 23 + static inline kvm_pfn_t folio_file_pfn(struct folio *folio, pgoff_t index) 17 24 { 18 - #ifdef CONFIG_HAVE_KVM_GMEM_PREPARE 19 - struct list_head *gmem_list = &inode->i_mapping->i_private_list; 20 - struct kvm_gmem *gmem; 25 + return folio_pfn(folio) + (index & (folio_nr_pages(folio) - 1)); 26 + } 21 27 22 - list_for_each_entry(gmem, gmem_list, entry) { 23 - struct kvm_memory_slot *slot; 24 - struct kvm *kvm = gmem->kvm; 25 - struct page *page; 26 - kvm_pfn_t pfn; 27 - gfn_t gfn; 28 - int rc; 29 - 30 - if (!kvm_arch_gmem_prepare_needed(kvm)) 31 - continue; 32 - 33 - slot = xa_load(&gmem->bindings, index); 34 - if (!slot) 35 - continue; 36 - 37 - page = folio_file_page(folio, index); 38 - pfn = page_to_pfn(page); 39 - gfn = slot->base_gfn + index - slot->gmem.pgoff; 40 - rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, compound_order(compound_head(page))); 41 - if (rc) { 42 - pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx GFN %llx PFN %llx error %d.\n", 43 - index, gfn, pfn, rc); 44 - return rc; 45 - } 28 + static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot, 29 + pgoff_t index, struct folio *folio) 30 + { 31 + #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE 32 + kvm_pfn_t pfn = folio_file_pfn(folio, index); 33 + gfn_t gfn = slot->base_gfn + index - slot->gmem.pgoff; 34 + int rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, folio_order(folio)); 35 + if (rc) { 36 + pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx GFN %llx PFN %llx error %d.\n", 37 + index, gfn, pfn, rc); 38 + return rc; 46 39 } 47 - 48 40 #endif 41 + 49 42 return 0; 50 43 } 51 44 52 - static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool prepare) 45 + static inline void kvm_gmem_mark_prepared(struct folio *folio) 53 46 { 54 - struct folio *folio; 47 + folio_mark_uptodate(folio); 48 + } 55 49 56 - /* TODO: Support huge pages. */ 57 - folio = filemap_grab_folio(inode->i_mapping, index); 58 - if (IS_ERR(folio)) 59 - return folio; 50 + /* 51 + * Process @folio, which contains @gfn, so that the guest can use it. 52 + * The folio must be locked and the gfn must be contained in @slot. 53 + * On successful return the guest sees a zero page so as to avoid 54 + * leaking host data and the up-to-date flag is set. 55 + */ 56 + static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot, 57 + gfn_t gfn, struct folio *folio) 58 + { 59 + unsigned long nr_pages, i; 60 + pgoff_t index; 61 + int r; 62 + 63 + nr_pages = folio_nr_pages(folio); 64 + for (i = 0; i < nr_pages; i++) 65 + clear_highpage(folio_page(folio, i)); 60 66 61 67 /* 62 - * Use the up-to-date flag to track whether or not the memory has been 63 - * zeroed before being handed off to the guest. There is no backing 64 - * storage for the memory, so the folio will remain up-to-date until 65 - * it's removed. 68 + * Preparing huge folios should always be safe, since it should 69 + * be possible to split them later if needed. 66 70 * 67 - * TODO: Skip clearing pages when trusted firmware will do it when 68 - * assigning memory to the guest. 71 + * Right now the folio order is always going to be zero, but the 72 + * code is ready for huge folios. The only assumption is that 73 + * the base pgoff of memslots is naturally aligned with the 74 + * requested page order, ensuring that huge folios can also use 75 + * huge page table entries for GPA->HPA mapping. 76 + * 77 + * The order will be passed when creating the guest_memfd, and 78 + * checked when creating memslots. 69 79 */ 70 - if (!folio_test_uptodate(folio)) { 71 - unsigned long nr_pages = folio_nr_pages(folio); 72 - unsigned long i; 80 + WARN_ON(!IS_ALIGNED(slot->gmem.pgoff, 1 << folio_order(folio))); 81 + index = gfn - slot->base_gfn + slot->gmem.pgoff; 82 + index = ALIGN_DOWN(index, 1 << folio_order(folio)); 83 + r = __kvm_gmem_prepare_folio(kvm, slot, index, folio); 84 + if (!r) 85 + kvm_gmem_mark_prepared(folio); 73 86 74 - for (i = 0; i < nr_pages; i++) 75 - clear_highpage(folio_page(folio, i)); 87 + return r; 88 + } 76 89 77 - folio_mark_uptodate(folio); 78 - } 79 - 80 - if (prepare) { 81 - int r = kvm_gmem_prepare_folio(inode, index, folio); 82 - if (r < 0) { 83 - folio_unlock(folio); 84 - folio_put(folio); 85 - return ERR_PTR(r); 86 - } 87 - } 88 - 89 - /* 90 - * Ignore accessed, referenced, and dirty flags. The memory is 91 - * unevictable and there is no storage to write back to. 92 - */ 93 - return folio; 90 + /* 91 + * Returns a locked folio on success. The caller is responsible for 92 + * setting the up-to-date flag before the memory is mapped into the guest. 93 + * There is no backing storage for the memory, so the folio will remain 94 + * up-to-date until it's removed. 95 + * 96 + * Ignore accessed, referenced, and dirty flags. The memory is 97 + * unevictable and there is no storage to write back to. 98 + */ 99 + static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index) 100 + { 101 + /* TODO: Support huge pages. */ 102 + return filemap_grab_folio(inode->i_mapping, index); 94 103 } 95 104 96 105 static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, ··· 199 190 break; 200 191 } 201 192 202 - folio = kvm_gmem_get_folio(inode, index, true); 193 + folio = kvm_gmem_get_folio(inode, index); 203 194 if (IS_ERR(folio)) { 204 195 r = PTR_ERR(folio); 205 196 break; ··· 352 343 return MF_DELAYED; 353 344 } 354 345 355 - #ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE 346 + #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE 356 347 static void kvm_gmem_free_folio(struct folio *folio) 357 348 { 358 349 struct page *page = folio_page(folio, 0); ··· 367 358 .dirty_folio = noop_dirty_folio, 368 359 .migrate_folio = kvm_gmem_migrate_folio, 369 360 .error_remove_folio = kvm_gmem_error_folio, 370 - #ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE 361 + #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE 371 362 .free_folio = kvm_gmem_free_folio, 372 363 #endif 373 364 }; ··· 550 541 fput(file); 551 542 } 552 543 553 - static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot, 554 - gfn_t gfn, kvm_pfn_t *pfn, int *max_order, bool prepare) 544 + /* Returns a locked folio on success. */ 545 + static struct folio * 546 + __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot, 547 + gfn_t gfn, kvm_pfn_t *pfn, bool *is_prepared, 548 + int *max_order) 555 549 { 556 550 pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff; 557 551 struct kvm_gmem *gmem = file->private_data; 558 552 struct folio *folio; 559 - struct page *page; 560 - int r; 561 553 562 554 if (file != slot->gmem.file) { 563 555 WARN_ON_ONCE(slot->gmem.file); 564 - return -EFAULT; 556 + return ERR_PTR(-EFAULT); 565 557 } 566 558 567 559 gmem = file->private_data; 568 560 if (xa_load(&gmem->bindings, index) != slot) { 569 561 WARN_ON_ONCE(xa_load(&gmem->bindings, index)); 570 - return -EIO; 562 + return ERR_PTR(-EIO); 571 563 } 572 564 573 - folio = kvm_gmem_get_folio(file_inode(file), index, prepare); 565 + folio = kvm_gmem_get_folio(file_inode(file), index); 574 566 if (IS_ERR(folio)) 575 - return PTR_ERR(folio); 567 + return folio; 576 568 577 569 if (folio_test_hwpoison(folio)) { 578 570 folio_unlock(folio); 579 571 folio_put(folio); 580 - return -EHWPOISON; 572 + return ERR_PTR(-EHWPOISON); 581 573 } 582 574 583 - page = folio_file_page(folio, index); 584 - 585 - *pfn = page_to_pfn(page); 575 + *pfn = folio_file_pfn(folio, index); 586 576 if (max_order) 587 577 *max_order = 0; 588 578 589 - r = 0; 590 - 591 - folio_unlock(folio); 592 - 593 - return r; 579 + *is_prepared = folio_test_uptodate(folio); 580 + return folio; 594 581 } 595 582 596 583 int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, 597 584 gfn_t gfn, kvm_pfn_t *pfn, int *max_order) 598 585 { 599 586 struct file *file = kvm_gmem_get_file(slot); 600 - int r; 587 + struct folio *folio; 588 + bool is_prepared = false; 589 + int r = 0; 601 590 602 591 if (!file) 603 592 return -EFAULT; 604 593 605 - r = __kvm_gmem_get_pfn(file, slot, gfn, pfn, max_order, true); 594 + folio = __kvm_gmem_get_pfn(file, slot, gfn, pfn, &is_prepared, max_order); 595 + if (IS_ERR(folio)) { 596 + r = PTR_ERR(folio); 597 + goto out; 598 + } 599 + 600 + if (!is_prepared) 601 + r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio); 602 + 603 + folio_unlock(folio); 604 + if (r < 0) 605 + folio_put(folio); 606 + 607 + out: 606 608 fput(file); 607 609 return r; 608 610 } 609 611 EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn); 610 612 613 + #ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM 611 614 long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages, 612 615 kvm_gmem_populate_cb post_populate, void *opaque) 613 616 { ··· 646 625 647 626 npages = min_t(ulong, slot->npages - (start_gfn - slot->base_gfn), npages); 648 627 for (i = 0; i < npages; i += (1 << max_order)) { 628 + struct folio *folio; 649 629 gfn_t gfn = start_gfn + i; 630 + bool is_prepared = false; 650 631 kvm_pfn_t pfn; 651 632 652 633 if (signal_pending(current)) { ··· 656 633 break; 657 634 } 658 635 659 - ret = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &max_order, false); 660 - if (ret) 636 + folio = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &is_prepared, &max_order); 637 + if (IS_ERR(folio)) { 638 + ret = PTR_ERR(folio); 661 639 break; 640 + } 662 641 663 - if (!IS_ALIGNED(gfn, (1 << max_order)) || 664 - (npages - i) < (1 << max_order)) 665 - max_order = 0; 642 + if (is_prepared) { 643 + folio_unlock(folio); 644 + folio_put(folio); 645 + ret = -EEXIST; 646 + break; 647 + } 648 + 649 + folio_unlock(folio); 650 + WARN_ON(!IS_ALIGNED(gfn, 1 << max_order) || 651 + (npages - i) < (1 << max_order)); 652 + 653 + ret = -EINVAL; 654 + while (!kvm_range_has_memory_attributes(kvm, gfn, gfn + (1 << max_order), 655 + KVM_MEMORY_ATTRIBUTE_PRIVATE, 656 + KVM_MEMORY_ATTRIBUTE_PRIVATE)) { 657 + if (!max_order) 658 + goto put_folio_and_exit; 659 + max_order--; 660 + } 666 661 667 662 p = src ? src + i * PAGE_SIZE : NULL; 668 663 ret = post_populate(kvm, gfn, pfn, p, max_order, opaque); 664 + if (!ret) 665 + kvm_gmem_mark_prepared(folio); 669 666 670 - put_page(pfn_to_page(pfn)); 667 + put_folio_and_exit: 668 + folio_put(folio); 671 669 if (ret) 672 670 break; 673 671 } ··· 699 655 return ret && !i ? ret : i; 700 656 } 701 657 EXPORT_SYMBOL_GPL(kvm_gmem_populate); 658 + #endif
+36 -37
virt/kvm/kvm_main.c
··· 2398 2398 #endif /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ 2399 2399 2400 2400 #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES 2401 - /* 2402 - * Returns true if _all_ gfns in the range [@start, @end) have attributes 2403 - * matching @attrs. 2404 - */ 2405 - bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end, 2406 - unsigned long attrs) 2407 - { 2408 - XA_STATE(xas, &kvm->mem_attr_array, start); 2409 - unsigned long index; 2410 - bool has_attrs; 2411 - void *entry; 2412 - 2413 - rcu_read_lock(); 2414 - 2415 - if (!attrs) { 2416 - has_attrs = !xas_find(&xas, end - 1); 2417 - goto out; 2418 - } 2419 - 2420 - has_attrs = true; 2421 - for (index = start; index < end; index++) { 2422 - do { 2423 - entry = xas_next(&xas); 2424 - } while (xas_retry(&xas, entry)); 2425 - 2426 - if (xas.xa_index != index || xa_to_value(entry) != attrs) { 2427 - has_attrs = false; 2428 - break; 2429 - } 2430 - } 2431 - 2432 - out: 2433 - rcu_read_unlock(); 2434 - return has_attrs; 2435 - } 2436 - 2437 2401 static u64 kvm_supported_mem_attributes(struct kvm *kvm) 2438 2402 { 2439 2403 if (!kvm || kvm_arch_has_private_mem(kvm)) 2440 2404 return KVM_MEMORY_ATTRIBUTE_PRIVATE; 2441 2405 2442 2406 return 0; 2407 + } 2408 + 2409 + /* 2410 + * Returns true if _all_ gfns in the range [@start, @end) have attributes 2411 + * such that the bits in @mask match @attrs. 2412 + */ 2413 + bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end, 2414 + unsigned long mask, unsigned long attrs) 2415 + { 2416 + XA_STATE(xas, &kvm->mem_attr_array, start); 2417 + unsigned long index; 2418 + void *entry; 2419 + 2420 + mask &= kvm_supported_mem_attributes(kvm); 2421 + if (attrs & ~mask) 2422 + return false; 2423 + 2424 + if (end == start + 1) 2425 + return (kvm_get_memory_attributes(kvm, start) & mask) == attrs; 2426 + 2427 + guard(rcu)(); 2428 + if (!attrs) 2429 + return !xas_find(&xas, end - 1); 2430 + 2431 + for (index = start; index < end; index++) { 2432 + do { 2433 + entry = xas_next(&xas); 2434 + } while (xas_retry(&xas, entry)); 2435 + 2436 + if (xas.xa_index != index || 2437 + (xa_to_value(entry) & mask) != attrs) 2438 + return false; 2439 + } 2440 + 2441 + return true; 2443 2442 } 2444 2443 2445 2444 static __always_inline void kvm_handle_gfn_range(struct kvm *kvm, ··· 2533 2534 mutex_lock(&kvm->slots_lock); 2534 2535 2535 2536 /* Nothing to do if the entire range as the desired attributes. */ 2536 - if (kvm_range_has_memory_attributes(kvm, start, end, attributes)) 2537 + if (kvm_range_has_memory_attributes(kvm, start, end, ~0, attributes)) 2537 2538 goto out_unlock; 2538 2539 2539 2540 /*