Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'kvm-x86-gmem-6.20' of https://github.com/kvm-x86/linux into HEAD

KVM guest_memfd changes for 6.20

- Remove kvm_gmem_populate()'s preparation tracking and half-baked hugepage
handling, and instead rely on SNP (the only user of the tracking) to do its
own tracking via the RMP.

- Retroactively document and enforce (for SNP) that KVM_SEV_SNP_LAUNCH_UPDATE
and KVM_TDX_INIT_MEM_REGION require the source page to be 4KiB aligned, to
avoid non-trivial complexity for a non-existent usecase (and because
in-place conversion simply can't support unaligned sources).

- When populating guest_memfd memory, GUP the source page in common code and
pass the refcounted page to the vendor callback, instead of letting vendor
code do the heavy lifting. Doing so avoids a looming deadlock bug with
in-place due an AB-BA conflict betwee mmap_lock and guest_memfd's filemap
invalidate lock.

+134 -145
+1 -1
Documentation/virt/kvm/x86/amd-memory-encryption.rst
··· 523 523 524 524 struct kvm_sev_snp_launch_update { 525 525 __u64 gfn_start; /* Guest page number to load/encrypt data into. */ 526 - __u64 uaddr; /* Userspace address of data to be loaded/encrypted. */ 526 + __u64 uaddr; /* 4k-aligned address of data to be loaded/encrypted. */ 527 527 __u64 len; /* 4k-aligned length in bytes to copy into guest memory.*/ 528 528 __u8 type; /* The type of the guest pages being initialized. */ 529 529 __u8 pad0;
+1 -1
Documentation/virt/kvm/x86/intel-tdx.rst
··· 156 156 :Returns: 0 on success, <0 on error 157 157 158 158 Initialize @nr_pages TDX guest private memory starting from @gpa with userspace 159 - provided data from @source_addr. 159 + provided data from @source_addr. @source_addr must be PAGE_SIZE-aligned. 160 160 161 161 Note, before calling this sub command, memory attribute of the range 162 162 [gpa, gpa + nr_pages] needs to be private. Userspace can use
+50 -64
arch/x86/kvm/svm/sev.c
··· 2277 2277 int fw_error; 2278 2278 }; 2279 2279 2280 - static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn_start, kvm_pfn_t pfn, 2281 - void __user *src, int order, void *opaque) 2280 + static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, 2281 + struct page *src_page, void *opaque) 2282 2282 { 2283 2283 struct sev_gmem_populate_args *sev_populate_args = opaque; 2284 + struct sev_data_snp_launch_update fw_args = {0}; 2284 2285 struct kvm_sev_info *sev = to_kvm_sev_info(kvm); 2285 - int n_private = 0, ret, i; 2286 - int npages = (1 << order); 2287 - gfn_t gfn; 2286 + bool assigned = false; 2287 + int level; 2288 + int ret; 2288 2289 2289 - if (WARN_ON_ONCE(sev_populate_args->type != KVM_SEV_SNP_PAGE_TYPE_ZERO && !src)) 2290 + if (WARN_ON_ONCE(sev_populate_args->type != KVM_SEV_SNP_PAGE_TYPE_ZERO && !src_page)) 2290 2291 return -EINVAL; 2291 2292 2292 - for (gfn = gfn_start, i = 0; gfn < gfn_start + npages; gfn++, i++) { 2293 - struct sev_data_snp_launch_update fw_args = {0}; 2294 - bool assigned = false; 2295 - int level; 2296 - 2297 - ret = snp_lookup_rmpentry((u64)pfn + i, &assigned, &level); 2298 - if (ret || assigned) { 2299 - pr_debug("%s: Failed to ensure GFN 0x%llx RMP entry is initial shared state, ret: %d assigned: %d\n", 2300 - __func__, gfn, ret, assigned); 2301 - ret = ret ? -EINVAL : -EEXIST; 2302 - goto err; 2303 - } 2304 - 2305 - if (src) { 2306 - void *vaddr = kmap_local_pfn(pfn + i); 2307 - 2308 - if (copy_from_user(vaddr, src + i * PAGE_SIZE, PAGE_SIZE)) { 2309 - ret = -EFAULT; 2310 - goto err; 2311 - } 2312 - kunmap_local(vaddr); 2313 - } 2314 - 2315 - ret = rmp_make_private(pfn + i, gfn << PAGE_SHIFT, PG_LEVEL_4K, 2316 - sev_get_asid(kvm), true); 2317 - if (ret) 2318 - goto err; 2319 - 2320 - n_private++; 2321 - 2322 - fw_args.gctx_paddr = __psp_pa(sev->snp_context); 2323 - fw_args.address = __sme_set(pfn_to_hpa(pfn + i)); 2324 - fw_args.page_size = PG_LEVEL_TO_RMP(PG_LEVEL_4K); 2325 - fw_args.page_type = sev_populate_args->type; 2326 - 2327 - ret = __sev_issue_cmd(sev_populate_args->sev_fd, SEV_CMD_SNP_LAUNCH_UPDATE, 2328 - &fw_args, &sev_populate_args->fw_error); 2329 - if (ret) 2330 - goto fw_err; 2293 + ret = snp_lookup_rmpentry((u64)pfn, &assigned, &level); 2294 + if (ret || assigned) { 2295 + pr_debug("%s: Failed to ensure GFN 0x%llx RMP entry is initial shared state, ret: %d assigned: %d\n", 2296 + __func__, gfn, ret, assigned); 2297 + ret = ret ? -EINVAL : -EEXIST; 2298 + goto out; 2331 2299 } 2332 2300 2333 - return 0; 2301 + if (src_page) { 2302 + void *src_vaddr = kmap_local_page(src_page); 2303 + void *dst_vaddr = kmap_local_pfn(pfn); 2334 2304 2335 - fw_err: 2305 + memcpy(dst_vaddr, src_vaddr, PAGE_SIZE); 2306 + 2307 + kunmap_local(src_vaddr); 2308 + kunmap_local(dst_vaddr); 2309 + } 2310 + 2311 + ret = rmp_make_private(pfn, gfn << PAGE_SHIFT, PG_LEVEL_4K, 2312 + sev_get_asid(kvm), true); 2313 + if (ret) 2314 + goto out; 2315 + 2316 + fw_args.gctx_paddr = __psp_pa(sev->snp_context); 2317 + fw_args.address = __sme_set(pfn_to_hpa(pfn)); 2318 + fw_args.page_size = PG_LEVEL_TO_RMP(PG_LEVEL_4K); 2319 + fw_args.page_type = sev_populate_args->type; 2320 + 2321 + ret = __sev_issue_cmd(sev_populate_args->sev_fd, SEV_CMD_SNP_LAUNCH_UPDATE, 2322 + &fw_args, &sev_populate_args->fw_error); 2336 2323 /* 2337 2324 * If the firmware command failed handle the reclaim and cleanup of that 2338 - * PFN specially vs. prior pages which can be cleaned up below without 2339 - * needing to reclaim in advance. 2325 + * PFN before reporting an error. 2340 2326 * 2341 2327 * Additionally, when invalid CPUID function entries are detected, 2342 2328 * firmware writes the expected values into the page and leaves it ··· 2332 2346 * information to provide information on which CPUID leaves/fields 2333 2347 * failed CPUID validation. 2334 2348 */ 2335 - if (!snp_page_reclaim(kvm, pfn + i) && 2349 + if (ret && !snp_page_reclaim(kvm, pfn) && 2336 2350 sev_populate_args->type == KVM_SEV_SNP_PAGE_TYPE_CPUID && 2337 2351 sev_populate_args->fw_error == SEV_RET_INVALID_PARAM) { 2338 - void *vaddr = kmap_local_pfn(pfn + i); 2352 + void *src_vaddr = kmap_local_page(src_page); 2353 + void *dst_vaddr = kmap_local_pfn(pfn); 2339 2354 2340 - if (copy_to_user(src + i * PAGE_SIZE, vaddr, PAGE_SIZE)) 2341 - pr_debug("Failed to write CPUID page back to userspace\n"); 2355 + memcpy(src_vaddr, dst_vaddr, PAGE_SIZE); 2342 2356 2343 - kunmap_local(vaddr); 2357 + kunmap_local(src_vaddr); 2358 + kunmap_local(dst_vaddr); 2344 2359 } 2345 2360 2346 - /* pfn + i is hypervisor-owned now, so skip below cleanup for it. */ 2347 - n_private--; 2348 - 2349 - err: 2350 - pr_debug("%s: exiting with error ret %d (fw_error %d), restoring %d gmem PFNs to shared.\n", 2351 - __func__, ret, sev_populate_args->fw_error, n_private); 2352 - for (i = 0; i < n_private; i++) 2353 - kvm_rmp_make_shared(kvm, pfn + i, PG_LEVEL_4K); 2354 - 2361 + out: 2362 + if (ret) 2363 + pr_debug("%s: error updating GFN %llx, return code %d (fw_error %d)\n", 2364 + __func__, gfn, ret, sev_populate_args->fw_error); 2355 2365 return ret; 2356 2366 } 2357 2367 ··· 2376 2394 params.type != KVM_SEV_SNP_PAGE_TYPE_UNMEASURED && 2377 2395 params.type != KVM_SEV_SNP_PAGE_TYPE_SECRETS && 2378 2396 params.type != KVM_SEV_SNP_PAGE_TYPE_CPUID)) 2397 + return -EINVAL; 2398 + 2399 + src = params.type == KVM_SEV_SNP_PAGE_TYPE_ZERO ? NULL : u64_to_user_ptr(params.uaddr); 2400 + 2401 + if (!PAGE_ALIGNED(src)) 2379 2402 return -EINVAL; 2380 2403 2381 2404 npages = params.len / PAGE_SIZE; ··· 2414 2427 2415 2428 sev_populate_args.sev_fd = argp->sev_fd; 2416 2429 sev_populate_args.type = params.type; 2417 - src = params.type == KVM_SEV_SNP_PAGE_TYPE_ZERO ? NULL : u64_to_user_ptr(params.uaddr); 2418 2430 2419 2431 count = kvm_gmem_populate(kvm, params.gfn_start, src, npages, 2420 2432 sev_gmem_post_populate, &sev_populate_args);
+3 -13
arch/x86/kvm/vmx/tdx.c
··· 3118 3118 }; 3119 3119 3120 3120 static int tdx_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, 3121 - void __user *src, int order, void *_arg) 3121 + struct page *src_page, void *_arg) 3122 3122 { 3123 3123 struct tdx_gmem_post_populate_arg *arg = _arg; 3124 3124 struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); 3125 3125 u64 err, entry, level_state; 3126 3126 gpa_t gpa = gfn_to_gpa(gfn); 3127 - struct page *src_page; 3128 3127 int ret, i; 3129 3128 3130 3129 if (KVM_BUG_ON(kvm_tdx->page_add_src, kvm)) 3131 3130 return -EIO; 3132 3131 3133 - /* 3134 - * Get the source page if it has been faulted in. Return failure if the 3135 - * source page has been swapped out or unmapped in primary memory. 3136 - */ 3137 - ret = get_user_pages_fast((unsigned long)src, 1, 0, &src_page); 3138 - if (ret < 0) 3139 - return ret; 3140 - if (ret != 1) 3141 - return -ENOMEM; 3132 + if (!src_page) 3133 + return -EOPNOTSUPP; 3142 3134 3143 3135 kvm_tdx->page_add_src = src_page; 3144 3136 ret = kvm_tdp_mmu_map_private_pfn(arg->vcpu, gfn, pfn); 3145 3137 kvm_tdx->page_add_src = NULL; 3146 - 3147 - put_page(src_page); 3148 3138 3149 3139 if (ret || !(arg->flags & KVM_TDX_MEASURE_MEMORY_REGION)) 3150 3140 return ret;
+2 -2
include/linux/kvm_host.h
··· 2566 2566 * @gfn: starting GFN to be populated 2567 2567 * @src: userspace-provided buffer containing data to copy into GFN range 2568 2568 * (passed to @post_populate, and incremented on each iteration 2569 - * if not NULL) 2569 + * if not NULL). Must be page-aligned. 2570 2570 * @npages: number of pages to copy from userspace-buffer 2571 2571 * @post_populate: callback to issue for each gmem page that backs the GPA 2572 2572 * range ··· 2581 2581 * Returns the number of pages that were populated. 2582 2582 */ 2583 2583 typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, 2584 - void __user *src, int order, void *opaque); 2584 + struct page *page, void *opaque); 2585 2585 2586 2586 long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages, 2587 2587 kvm_gmem_populate_cb post_populate, void *opaque);
+77 -64
virt/kvm/guest_memfd.c
··· 76 76 return 0; 77 77 } 78 78 79 - static inline void kvm_gmem_mark_prepared(struct folio *folio) 80 - { 81 - folio_mark_uptodate(folio); 82 - } 83 - 84 79 /* 85 80 * Process @folio, which contains @gfn, so that the guest can use it. 86 81 * The folio must be locked and the gfn must be contained in @slot. ··· 85 90 static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot, 86 91 gfn_t gfn, struct folio *folio) 87 92 { 88 - unsigned long nr_pages, i; 89 93 pgoff_t index; 90 - int r; 91 - 92 - nr_pages = folio_nr_pages(folio); 93 - for (i = 0; i < nr_pages; i++) 94 - clear_highpage(folio_page(folio, i)); 95 94 96 95 /* 97 96 * Preparing huge folios should always be safe, since it should ··· 103 114 WARN_ON(!IS_ALIGNED(slot->gmem.pgoff, folio_nr_pages(folio))); 104 115 index = kvm_gmem_get_index(slot, gfn); 105 116 index = ALIGN_DOWN(index, folio_nr_pages(folio)); 106 - r = __kvm_gmem_prepare_folio(kvm, slot, index, folio); 107 - if (!r) 108 - kvm_gmem_mark_prepared(folio); 109 117 110 - return r; 118 + return __kvm_gmem_prepare_folio(kvm, slot, index, folio); 111 119 } 112 120 113 121 /* ··· 136 150 FGP_LOCK | FGP_ACCESSED | FGP_CREAT, 137 151 mapping_gfp_mask(inode->i_mapping), policy); 138 152 mpol_cond_put(policy); 153 + 154 + /* 155 + * External interfaces like kvm_gmem_get_pfn() support dealing 156 + * with hugepages to a degree, but internally, guest_memfd currently 157 + * assumes that all folios are order-0 and handling would need 158 + * to be updated for anything otherwise (e.g. page-clearing 159 + * operations). 160 + */ 161 + WARN_ON_ONCE(!IS_ERR(folio) && folio_order(folio)); 139 162 140 163 return folio; 141 164 } ··· 415 420 416 421 if (!folio_test_uptodate(folio)) { 417 422 clear_highpage(folio_page(folio, 0)); 418 - kvm_gmem_mark_prepared(folio); 423 + folio_mark_uptodate(folio); 419 424 } 420 425 421 426 vmf->page = folio_file_page(folio, vmf->pgoff); ··· 752 757 static struct folio *__kvm_gmem_get_pfn(struct file *file, 753 758 struct kvm_memory_slot *slot, 754 759 pgoff_t index, kvm_pfn_t *pfn, 755 - bool *is_prepared, int *max_order) 760 + int *max_order) 756 761 { 757 762 struct file *slot_file = READ_ONCE(slot->gmem.file); 758 763 struct gmem_file *f = file->private_data; ··· 782 787 if (max_order) 783 788 *max_order = 0; 784 789 785 - *is_prepared = folio_test_uptodate(folio); 786 790 return folio; 787 791 } 788 792 ··· 791 797 { 792 798 pgoff_t index = kvm_gmem_get_index(slot, gfn); 793 799 struct folio *folio; 794 - bool is_prepared = false; 795 800 int r = 0; 796 801 797 802 CLASS(gmem_get_file, file)(slot); 798 803 if (!file) 799 804 return -EFAULT; 800 805 801 - folio = __kvm_gmem_get_pfn(file, slot, index, pfn, &is_prepared, max_order); 806 + folio = __kvm_gmem_get_pfn(file, slot, index, pfn, max_order); 802 807 if (IS_ERR(folio)) 803 808 return PTR_ERR(folio); 804 809 805 - if (!is_prepared) 806 - r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio); 810 + if (!folio_test_uptodate(folio)) { 811 + clear_highpage(folio_page(folio, 0)); 812 + folio_mark_uptodate(folio); 813 + } 814 + 815 + r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio); 807 816 808 817 folio_unlock(folio); 809 818 ··· 820 823 EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_gmem_get_pfn); 821 824 822 825 #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_POPULATE 826 + 827 + static long __kvm_gmem_populate(struct kvm *kvm, struct kvm_memory_slot *slot, 828 + struct file *file, gfn_t gfn, struct page *src_page, 829 + kvm_gmem_populate_cb post_populate, void *opaque) 830 + { 831 + pgoff_t index = kvm_gmem_get_index(slot, gfn); 832 + struct folio *folio; 833 + kvm_pfn_t pfn; 834 + int ret; 835 + 836 + filemap_invalidate_lock(file->f_mapping); 837 + 838 + folio = __kvm_gmem_get_pfn(file, slot, index, &pfn, NULL); 839 + if (IS_ERR(folio)) { 840 + ret = PTR_ERR(folio); 841 + goto out_unlock; 842 + } 843 + 844 + folio_unlock(folio); 845 + 846 + if (!kvm_range_has_memory_attributes(kvm, gfn, gfn + 1, 847 + KVM_MEMORY_ATTRIBUTE_PRIVATE, 848 + KVM_MEMORY_ATTRIBUTE_PRIVATE)) { 849 + ret = -EINVAL; 850 + goto out_put_folio; 851 + } 852 + 853 + ret = post_populate(kvm, gfn, pfn, src_page, opaque); 854 + if (!ret) 855 + folio_mark_uptodate(folio); 856 + 857 + out_put_folio: 858 + folio_put(folio); 859 + out_unlock: 860 + filemap_invalidate_unlock(file->f_mapping); 861 + return ret; 862 + } 863 + 823 864 long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages, 824 865 kvm_gmem_populate_cb post_populate, void *opaque) 825 866 { 826 867 struct kvm_memory_slot *slot; 827 - void __user *p; 828 - 829 - int ret = 0, max_order; 868 + int ret = 0; 830 869 long i; 831 870 832 871 lockdep_assert_held(&kvm->slots_lock); 833 872 834 873 if (WARN_ON_ONCE(npages <= 0)) 874 + return -EINVAL; 875 + 876 + if (WARN_ON_ONCE(!PAGE_ALIGNED(src))) 835 877 return -EINVAL; 836 878 837 879 slot = gfn_to_memslot(kvm, start_gfn); ··· 881 845 if (!file) 882 846 return -EFAULT; 883 847 884 - filemap_invalidate_lock(file->f_mapping); 885 - 886 848 npages = min_t(ulong, slot->npages - (start_gfn - slot->base_gfn), npages); 887 - for (i = 0; i < npages; i += (1 << max_order)) { 888 - struct folio *folio; 889 - gfn_t gfn = start_gfn + i; 890 - pgoff_t index = kvm_gmem_get_index(slot, gfn); 891 - bool is_prepared = false; 892 - kvm_pfn_t pfn; 849 + for (i = 0; i < npages; i++) { 850 + struct page *src_page = NULL; 893 851 894 852 if (signal_pending(current)) { 895 853 ret = -EINTR; 896 854 break; 897 855 } 898 856 899 - folio = __kvm_gmem_get_pfn(file, slot, index, &pfn, &is_prepared, &max_order); 900 - if (IS_ERR(folio)) { 901 - ret = PTR_ERR(folio); 902 - break; 857 + if (src) { 858 + unsigned long uaddr = (unsigned long)src + i * PAGE_SIZE; 859 + 860 + ret = get_user_pages_fast(uaddr, 1, 0, &src_page); 861 + if (ret < 0) 862 + break; 863 + if (ret != 1) { 864 + ret = -ENOMEM; 865 + break; 866 + } 903 867 } 904 868 905 - if (is_prepared) { 906 - folio_unlock(folio); 907 - folio_put(folio); 908 - ret = -EEXIST; 909 - break; 910 - } 869 + ret = __kvm_gmem_populate(kvm, slot, file, start_gfn + i, src_page, 870 + post_populate, opaque); 911 871 912 - folio_unlock(folio); 913 - WARN_ON(!IS_ALIGNED(gfn, 1 << max_order) || 914 - (npages - i) < (1 << max_order)); 872 + if (src_page) 873 + put_page(src_page); 915 874 916 - ret = -EINVAL; 917 - while (!kvm_range_has_memory_attributes(kvm, gfn, gfn + (1 << max_order), 918 - KVM_MEMORY_ATTRIBUTE_PRIVATE, 919 - KVM_MEMORY_ATTRIBUTE_PRIVATE)) { 920 - if (!max_order) 921 - goto put_folio_and_exit; 922 - max_order--; 923 - } 924 - 925 - p = src ? src + i * PAGE_SIZE : NULL; 926 - ret = post_populate(kvm, gfn, pfn, p, max_order, opaque); 927 - if (!ret) 928 - kvm_gmem_mark_prepared(folio); 929 - 930 - put_folio_and_exit: 931 - folio_put(folio); 932 875 if (ret) 933 876 break; 934 877 } 935 - 936 - filemap_invalidate_unlock(file->f_mapping); 937 878 938 879 return ret && !i ? ret : i; 939 880 }