Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd

Pull iommufd updates from Jason Gunthorpe:
"This is a pretty consequential cycle for iommufd, though this pull is
not too big. It is based on a shared branch with VFIO that introduces
VFIO_DEVICE_FEATURE_DMA_BUF a DMABUF exporter for VFIO device's MMIO
PCI BARs. This was a large multiple series journey over the last year
and a half.

Based on that work IOMMUFD gains support for VFIO DMABUF's in its
existing IOMMU_IOAS_MAP_FILE, which closes the last major gap to
support PCI peer to peer transfers within VMs.

In Joerg's iommu tree we have the "generic page table" work which aims
to consolidate all the duplicated page table code in every iommu
driver into a single algorithm. This will be used by iommufd to
implement unique page table operations to start adding new features
and improve performance.

In here:

- Expand IOMMU_IOAS_MAP_FILE to accept a DMABUF exported from VFIO.
This is the first step to broader DMABUF support in iommufd, right
now it only works with VFIO. This closes the last functional gap
with classic VFIO type 1 to safely support PCI peer to peer DMA by
mapping the VFIO device's MMIO into the IOMMU.

- Relax SMMUv3 restrictions on nesting domains to better support
qemu's sequence to have an identity mapping before the vSID is
established"

* tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd:
iommu/arm-smmu-v3-iommufd: Allow attaching nested domain for GBPA cases
iommufd/selftest: Add some tests for the dmabuf flow
iommufd: Accept a DMABUF through IOMMU_IOAS_MAP_FILE
iommufd: Have iopt_map_file_pages convert the fd to a file
iommufd: Have pfn_reader process DMABUF iopt_pages
iommufd: Allow MMIO pages in a batch
iommufd: Allow a DMABUF to be revoked
iommufd: Do not map/unmap revoked DMABUFs
iommufd: Add DMABUF to iopt_pages
vfio/pci: Add vfio_pci_dma_buf_iommufd_map()

+808 -71
+12 -1
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
··· 99 99 int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state, 100 100 struct arm_smmu_nested_domain *nested_domain) 101 101 { 102 + unsigned int cfg = 103 + FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(nested_domain->ste[0])); 102 104 struct arm_smmu_vmaster *vmaster; 103 105 unsigned long vsid; 104 106 int ret; ··· 109 107 110 108 ret = iommufd_viommu_get_vdev_id(&nested_domain->vsmmu->core, 111 109 state->master->dev, &vsid); 112 - if (ret) 110 + /* 111 + * Attaching to a translate nested domain must allocate a vDEVICE prior, 112 + * as CD/ATS invalidations and vevents require a vSID to work properly. 113 + * A abort/bypass domain is allowed to attach w/o vmaster for GBPA case. 114 + */ 115 + if (ret) { 116 + if (cfg == STRTAB_STE_0_CFG_ABORT || 117 + cfg == STRTAB_STE_0_CFG_BYPASS) 118 + return 0; 113 119 return ret; 120 + } 114 121 115 122 vmaster = kzalloc(sizeof(*vmaster), GFP_KERNEL); 116 123 if (!vmaster)
+65 -13
drivers/iommu/iommufd/io_pagetable.c
··· 8 8 * The datastructure uses the iopt_pages to optimize the storage of the PFNs 9 9 * between the domains and xarray. 10 10 */ 11 + #include <linux/dma-buf.h> 11 12 #include <linux/err.h> 12 13 #include <linux/errno.h> 14 + #include <linux/file.h> 13 15 #include <linux/iommu.h> 14 16 #include <linux/iommufd.h> 15 17 #include <linux/lockdep.h> ··· 286 284 case IOPT_ADDRESS_FILE: 287 285 start = elm->start_byte + elm->pages->start; 288 286 break; 287 + case IOPT_ADDRESS_DMABUF: 288 + start = elm->start_byte + elm->pages->dmabuf.start; 289 + break; 289 290 } 290 291 rc = iopt_alloc_iova(iopt, dst_iova, start, length); 291 292 if (rc) ··· 473 468 * @iopt: io_pagetable to act on 474 469 * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains 475 470 * the chosen iova on output. Otherwise is the iova to map to on input 476 - * @file: file to map 471 + * @fd: fdno of a file to map 477 472 * @start: map file starting at this byte offset 478 473 * @length: Number of bytes to map 479 474 * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping 480 475 * @flags: IOPT_ALLOC_IOVA or zero 481 476 */ 482 477 int iopt_map_file_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, 483 - unsigned long *iova, struct file *file, 484 - unsigned long start, unsigned long length, 485 - int iommu_prot, unsigned int flags) 478 + unsigned long *iova, int fd, unsigned long start, 479 + unsigned long length, int iommu_prot, 480 + unsigned int flags) 486 481 { 487 482 struct iopt_pages *pages; 483 + struct dma_buf *dmabuf; 484 + unsigned long start_byte; 485 + unsigned long last; 488 486 489 - pages = iopt_alloc_file_pages(file, start, length, 490 - iommu_prot & IOMMU_WRITE); 491 - if (IS_ERR(pages)) 492 - return PTR_ERR(pages); 487 + if (!length) 488 + return -EINVAL; 489 + if (check_add_overflow(start, length - 1, &last)) 490 + return -EOVERFLOW; 491 + 492 + start_byte = start - ALIGN_DOWN(start, PAGE_SIZE); 493 + dmabuf = dma_buf_get(fd); 494 + if (!IS_ERR(dmabuf)) { 495 + pages = iopt_alloc_dmabuf_pages(ictx, dmabuf, start_byte, start, 496 + length, 497 + iommu_prot & IOMMU_WRITE); 498 + if (IS_ERR(pages)) { 499 + dma_buf_put(dmabuf); 500 + return PTR_ERR(pages); 501 + } 502 + } else { 503 + struct file *file; 504 + 505 + file = fget(fd); 506 + if (!file) 507 + return -EBADF; 508 + 509 + pages = iopt_alloc_file_pages(file, start_byte, start, length, 510 + iommu_prot & IOMMU_WRITE); 511 + fput(file); 512 + if (IS_ERR(pages)) 513 + return PTR_ERR(pages); 514 + } 515 + 493 516 return iopt_map_common(ictx, iopt, pages, iova, length, 494 - start - pages->start, iommu_prot, flags); 517 + start_byte, iommu_prot, flags); 495 518 } 496 519 497 520 struct iova_bitmap_fn_arg { ··· 994 961 WARN_ON(!area->storage_domain); 995 962 if (area->storage_domain == domain) 996 963 area->storage_domain = storage_domain; 964 + if (iopt_is_dmabuf(pages)) { 965 + if (!iopt_dmabuf_revoked(pages)) 966 + iopt_area_unmap_domain(area, domain); 967 + iopt_dmabuf_untrack_domain(pages, area, domain); 968 + } 997 969 mutex_unlock(&pages->mutex); 998 970 999 - iopt_area_unmap_domain(area, domain); 971 + if (!iopt_is_dmabuf(pages)) 972 + iopt_area_unmap_domain(area, domain); 1000 973 } 1001 974 return; 1002 975 } ··· 1019 980 WARN_ON(area->storage_domain != domain); 1020 981 area->storage_domain = NULL; 1021 982 iopt_area_unfill_domain(area, pages, domain); 983 + if (iopt_is_dmabuf(pages)) 984 + iopt_dmabuf_untrack_domain(pages, area, domain); 1022 985 mutex_unlock(&pages->mutex); 1023 986 } 1024 987 } ··· 1050 1009 if (!pages) 1051 1010 continue; 1052 1011 1053 - mutex_lock(&pages->mutex); 1012 + guard(mutex)(&pages->mutex); 1013 + if (iopt_is_dmabuf(pages)) { 1014 + rc = iopt_dmabuf_track_domain(pages, area, domain); 1015 + if (rc) 1016 + goto out_unfill; 1017 + } 1054 1018 rc = iopt_area_fill_domain(area, domain); 1055 1019 if (rc) { 1056 - mutex_unlock(&pages->mutex); 1020 + if (iopt_is_dmabuf(pages)) 1021 + iopt_dmabuf_untrack_domain(pages, area, domain); 1057 1022 goto out_unfill; 1058 1023 } 1059 1024 if (!area->storage_domain) { ··· 1068 1021 interval_tree_insert(&area->pages_node, 1069 1022 &pages->domains_itree); 1070 1023 } 1071 - mutex_unlock(&pages->mutex); 1072 1024 } 1073 1025 return 0; 1074 1026 ··· 1088 1042 area->storage_domain = NULL; 1089 1043 } 1090 1044 iopt_area_unfill_domain(area, pages, domain); 1045 + if (iopt_is_dmabuf(pages)) 1046 + iopt_dmabuf_untrack_domain(pages, area, domain); 1091 1047 mutex_unlock(&pages->mutex); 1092 1048 } 1093 1049 return rc; ··· 1299 1251 1300 1252 if (!pages || area->prevent_access) 1301 1253 return -EBUSY; 1254 + 1255 + /* Maintaining the domains_itree below is a bit complicated */ 1256 + if (iopt_is_dmabuf(pages)) 1257 + return -EOPNOTSUPP; 1302 1258 1303 1259 if (new_start & (alignment - 1) || 1304 1260 iopt_area_start_byte(area, new_start) & (alignment - 1))
+52 -2
drivers/iommu/iommufd/io_pagetable.h
··· 5 5 #ifndef __IO_PAGETABLE_H 6 6 #define __IO_PAGETABLE_H 7 7 8 + #include <linux/dma-buf.h> 8 9 #include <linux/interval_tree.h> 9 10 #include <linux/kref.h> 10 11 #include <linux/mutex.h> ··· 69 68 struct iommu_domain *domain); 70 69 void iopt_area_unmap_domain(struct iopt_area *area, 71 70 struct iommu_domain *domain); 71 + 72 + int iopt_dmabuf_track_domain(struct iopt_pages *pages, struct iopt_area *area, 73 + struct iommu_domain *domain); 74 + void iopt_dmabuf_untrack_domain(struct iopt_pages *pages, 75 + struct iopt_area *area, 76 + struct iommu_domain *domain); 77 + int iopt_dmabuf_track_all_domains(struct iopt_area *area, 78 + struct iopt_pages *pages); 79 + void iopt_dmabuf_untrack_all_domains(struct iopt_area *area, 80 + struct iopt_pages *pages); 72 81 73 82 static inline unsigned long iopt_area_index(struct iopt_area *area) 74 83 { ··· 190 179 191 180 enum iopt_address_type { 192 181 IOPT_ADDRESS_USER = 0, 193 - IOPT_ADDRESS_FILE = 1, 182 + IOPT_ADDRESS_FILE, 183 + IOPT_ADDRESS_DMABUF, 184 + }; 185 + 186 + struct iopt_pages_dmabuf_track { 187 + struct iommu_domain *domain; 188 + struct iopt_area *area; 189 + struct list_head elm; 190 + }; 191 + 192 + struct iopt_pages_dmabuf { 193 + struct dma_buf_attachment *attach; 194 + struct dma_buf_phys_vec phys; 195 + /* Always PAGE_SIZE aligned */ 196 + unsigned long start; 197 + struct list_head tracker; 194 198 }; 195 199 196 200 /* ··· 235 209 struct file *file; 236 210 unsigned long start; 237 211 }; 212 + /* IOPT_ADDRESS_DMABUF */ 213 + struct iopt_pages_dmabuf dmabuf; 238 214 }; 239 215 bool writable:1; 240 216 u8 account_mode; ··· 248 220 struct rb_root_cached domains_itree; 249 221 }; 250 222 223 + static inline bool iopt_is_dmabuf(struct iopt_pages *pages) 224 + { 225 + if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER)) 226 + return false; 227 + return pages->type == IOPT_ADDRESS_DMABUF; 228 + } 229 + 230 + static inline bool iopt_dmabuf_revoked(struct iopt_pages *pages) 231 + { 232 + lockdep_assert_held(&pages->mutex); 233 + if (iopt_is_dmabuf(pages)) 234 + return pages->dmabuf.phys.len == 0; 235 + return false; 236 + } 237 + 251 238 struct iopt_pages *iopt_alloc_user_pages(void __user *uptr, 252 239 unsigned long length, bool writable); 253 - struct iopt_pages *iopt_alloc_file_pages(struct file *file, unsigned long start, 240 + struct iopt_pages *iopt_alloc_file_pages(struct file *file, 241 + unsigned long start_byte, 242 + unsigned long start, 254 243 unsigned long length, bool writable); 244 + struct iopt_pages *iopt_alloc_dmabuf_pages(struct iommufd_ctx *ictx, 245 + struct dma_buf *dmabuf, 246 + unsigned long start_byte, 247 + unsigned long start, 248 + unsigned long length, bool writable); 255 249 void iopt_release_pages(struct kref *kref); 256 250 static inline void iopt_put_pages(struct iopt_pages *pages) 257 251 {
+1 -7
drivers/iommu/iommufd/ioas.c
··· 207 207 unsigned long iova = cmd->iova; 208 208 struct iommufd_ioas *ioas; 209 209 unsigned int flags = 0; 210 - struct file *file; 211 210 int rc; 212 211 213 212 if (cmd->flags & ··· 228 229 if (!(cmd->flags & IOMMU_IOAS_MAP_FIXED_IOVA)) 229 230 flags = IOPT_ALLOC_IOVA; 230 231 231 - file = fget(cmd->fd); 232 - if (!file) 233 - return -EBADF; 234 - 235 - rc = iopt_map_file_pages(ucmd->ictx, &ioas->iopt, &iova, file, 232 + rc = iopt_map_file_pages(ucmd->ictx, &ioas->iopt, &iova, cmd->fd, 236 233 cmd->start, cmd->length, 237 234 conv_iommu_prot(cmd->flags), flags); 238 235 if (rc) ··· 238 243 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); 239 244 out_put: 240 245 iommufd_put_object(ucmd->ictx, &ioas->obj); 241 - fput(file); 242 246 return rc; 243 247 } 244 248
+13 -1
drivers/iommu/iommufd/iommufd_private.h
··· 19 19 struct iommu_group; 20 20 struct iommu_option; 21 21 struct iommufd_device; 22 + struct dma_buf_attachment; 23 + struct dma_buf_phys_vec; 22 24 23 25 struct iommufd_sw_msi_map { 24 26 struct list_head sw_msi_item; ··· 110 108 unsigned long length, int iommu_prot, 111 109 unsigned int flags); 112 110 int iopt_map_file_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, 113 - unsigned long *iova, struct file *file, 111 + unsigned long *iova, int fd, 114 112 unsigned long start, unsigned long length, 115 113 int iommu_prot, unsigned int flags); 116 114 int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list, ··· 506 504 void iommufd_device_destroy(struct iommufd_object *obj); 507 505 int iommufd_get_hw_info(struct iommufd_ucmd *ucmd); 508 506 507 + struct device *iommufd_global_device(void); 508 + 509 509 struct iommufd_access { 510 510 struct iommufd_object obj; 511 511 struct iommufd_ctx *ictx; ··· 717 713 int __init iommufd_test_init(void); 718 714 void iommufd_test_exit(void); 719 715 bool iommufd_selftest_is_mock_dev(struct device *dev); 716 + int iommufd_test_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, 717 + struct dma_buf_phys_vec *phys); 720 718 #else 721 719 static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, 722 720 unsigned int ioas_id, ··· 739 733 static inline bool iommufd_selftest_is_mock_dev(struct device *dev) 740 734 { 741 735 return false; 736 + } 737 + static inline int 738 + iommufd_test_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, 739 + struct dma_buf_phys_vec *phys) 740 + { 741 + return -EOPNOTSUPP; 742 742 } 743 743 #endif 744 744 #endif
+10
drivers/iommu/iommufd/iommufd_test.h
··· 29 29 IOMMU_TEST_OP_PASID_REPLACE, 30 30 IOMMU_TEST_OP_PASID_DETACH, 31 31 IOMMU_TEST_OP_PASID_CHECK_HWPT, 32 + IOMMU_TEST_OP_DMABUF_GET, 33 + IOMMU_TEST_OP_DMABUF_REVOKE, 32 34 }; 33 35 34 36 enum { ··· 186 184 __u32 hwpt_id; 187 185 /* @id is stdev_id */ 188 186 } pasid_check; 187 + struct { 188 + __u32 length; 189 + __u32 open_flags; 190 + } dmabuf_get; 191 + struct { 192 + __s32 dmabuf_fd; 193 + __u32 revoked; 194 + } dmabuf_revoke; 189 195 }; 190 196 __u32 last; 191 197 };
+10
drivers/iommu/iommufd/main.c
··· 751 751 .mode = 0666, 752 752 }; 753 753 754 + /* 755 + * Used only by DMABUF, returns a valid struct device to use as a dummy struct 756 + * device for attachment. 757 + */ 758 + struct device *iommufd_global_device(void) 759 + { 760 + return iommu_misc_dev.this_device; 761 + } 762 + 754 763 static int __init iommufd_init(void) 755 764 { 756 765 int ret; ··· 803 794 #endif 804 795 MODULE_IMPORT_NS("IOMMUFD_INTERNAL"); 805 796 MODULE_IMPORT_NS("IOMMUFD"); 797 + MODULE_IMPORT_NS("DMA_BUF"); 806 798 MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices"); 807 799 MODULE_LICENSE("GPL");
+367 -47
drivers/iommu/iommufd/pages.c
··· 45 45 * last_iova + 1 can overflow. An iopt_pages index will always be much less than 46 46 * ULONG_MAX so last_index + 1 cannot overflow. 47 47 */ 48 + #include <linux/dma-buf.h> 49 + #include <linux/dma-resv.h> 48 50 #include <linux/file.h> 49 51 #include <linux/highmem.h> 50 52 #include <linux/iommu.h> ··· 55 53 #include <linux/overflow.h> 56 54 #include <linux/slab.h> 57 55 #include <linux/sched/mm.h> 56 + #include <linux/vfio_pci_core.h> 58 57 59 58 #include "double_span.h" 60 59 #include "io_pagetable.h" ··· 261 258 return container_of(node, struct iopt_area, pages_node); 262 259 } 263 260 261 + enum batch_kind { 262 + BATCH_CPU_MEMORY = 0, 263 + BATCH_MMIO, 264 + }; 265 + 264 266 /* 265 267 * A simple datastructure to hold a vector of PFNs, optimized for contiguous 266 268 * PFNs. This is used as a temporary holding memory for shuttling pfns from one ··· 279 271 unsigned int array_size; 280 272 unsigned int end; 281 273 unsigned int total_pfns; 274 + enum batch_kind kind; 282 275 }; 276 + enum { MAX_NPFNS = type_max(typeof(((struct pfn_batch *)0)->npfns[0])) }; 283 277 284 278 static void batch_clear(struct pfn_batch *batch) 285 279 { ··· 358 348 } 359 349 360 350 static bool batch_add_pfn_num(struct pfn_batch *batch, unsigned long pfn, 361 - u32 nr) 351 + u32 nr, enum batch_kind kind) 362 352 { 363 - const unsigned int MAX_NPFNS = type_max(typeof(*batch->npfns)); 364 353 unsigned int end = batch->end; 354 + 355 + if (batch->kind != kind) { 356 + /* One kind per batch */ 357 + if (batch->end != 0) 358 + return false; 359 + batch->kind = kind; 360 + } 365 361 366 362 if (end && pfn == batch->pfns[end - 1] + batch->npfns[end - 1] && 367 363 nr <= MAX_NPFNS - batch->npfns[end - 1]) { ··· 395 379 /* true if the pfn was added, false otherwise */ 396 380 static bool batch_add_pfn(struct pfn_batch *batch, unsigned long pfn) 397 381 { 398 - return batch_add_pfn_num(batch, pfn, 1); 382 + return batch_add_pfn_num(batch, pfn, 1, BATCH_CPU_MEMORY); 399 383 } 400 384 401 385 /* ··· 508 492 { 509 493 bool disable_large_pages = area->iopt->disable_large_pages; 510 494 unsigned long last_iova = iopt_area_last_iova(area); 495 + int iommu_prot = area->iommu_prot; 511 496 unsigned int page_offset = 0; 512 497 unsigned long start_iova; 513 498 unsigned long next_iova; 514 499 unsigned int cur = 0; 515 500 unsigned long iova; 516 501 int rc; 502 + 503 + if (batch->kind == BATCH_MMIO) { 504 + iommu_prot &= ~IOMMU_CACHE; 505 + iommu_prot |= IOMMU_MMIO; 506 + } 517 507 518 508 /* The first index might be a partial page */ 519 509 if (start_index == iopt_area_index(area)) ··· 534 512 rc = batch_iommu_map_small( 535 513 domain, iova, 536 514 PFN_PHYS(batch->pfns[cur]) + page_offset, 537 - next_iova - iova, area->iommu_prot); 515 + next_iova - iova, iommu_prot); 538 516 else 539 517 rc = iommu_map(domain, iova, 540 518 PFN_PHYS(batch->pfns[cur]) + page_offset, 541 - next_iova - iova, area->iommu_prot, 519 + next_iova - iova, iommu_prot, 542 520 GFP_KERNEL_ACCOUNT); 543 521 if (rc) 544 522 goto err_unmap; ··· 674 652 nr = min(nr, npages); 675 653 npages -= nr; 676 654 677 - if (!batch_add_pfn_num(batch, pfn, nr)) 655 + if (!batch_add_pfn_num(batch, pfn, nr, BATCH_CPU_MEMORY)) 678 656 break; 679 657 if (nr > 1) { 680 658 rc = folio_add_pins(folio, nr - 1); ··· 1076 1054 return iopt_pages_update_pinned(pages, npages, inc, user); 1077 1055 } 1078 1056 1057 + struct pfn_reader_dmabuf { 1058 + struct dma_buf_phys_vec phys; 1059 + unsigned long start_offset; 1060 + }; 1061 + 1062 + static int pfn_reader_dmabuf_init(struct pfn_reader_dmabuf *dmabuf, 1063 + struct iopt_pages *pages) 1064 + { 1065 + /* Callers must not get here if the dmabuf was already revoked */ 1066 + if (WARN_ON(iopt_dmabuf_revoked(pages))) 1067 + return -EINVAL; 1068 + 1069 + dmabuf->phys = pages->dmabuf.phys; 1070 + dmabuf->start_offset = pages->dmabuf.start; 1071 + return 0; 1072 + } 1073 + 1074 + static int pfn_reader_fill_dmabuf(struct pfn_reader_dmabuf *dmabuf, 1075 + struct pfn_batch *batch, 1076 + unsigned long start_index, 1077 + unsigned long last_index) 1078 + { 1079 + unsigned long start = dmabuf->start_offset + start_index * PAGE_SIZE; 1080 + 1081 + /* 1082 + * start/last_index and start are all PAGE_SIZE aligned, the batch is 1083 + * always filled using page size aligned PFNs just like the other types. 1084 + * If the dmabuf has been sliced on a sub page offset then the common 1085 + * batch to domain code will adjust it before mapping to the domain. 1086 + */ 1087 + batch_add_pfn_num(batch, PHYS_PFN(dmabuf->phys.paddr + start), 1088 + last_index - start_index + 1, BATCH_MMIO); 1089 + return 0; 1090 + } 1091 + 1079 1092 /* 1080 1093 * PFNs are stored in three places, in order of preference: 1081 1094 * - The iopt_pages xarray. This is only populated if there is a ··· 1129 1072 unsigned long batch_end_index; 1130 1073 unsigned long last_index; 1131 1074 1132 - struct pfn_reader_user user; 1075 + union { 1076 + struct pfn_reader_user user; 1077 + struct pfn_reader_dmabuf dmabuf; 1078 + }; 1133 1079 }; 1134 1080 1135 1081 static int pfn_reader_update_pinned(struct pfn_reader *pfns) ··· 1168 1108 { 1169 1109 struct interval_tree_double_span_iter *span = &pfns->span; 1170 1110 unsigned long start_index = pfns->batch_end_index; 1171 - struct pfn_reader_user *user = &pfns->user; 1111 + struct pfn_reader_user *user; 1172 1112 unsigned long npages; 1173 1113 struct iopt_area *area; 1174 1114 int rc; ··· 1200 1140 return 0; 1201 1141 } 1202 1142 1203 - if (start_index >= pfns->user.upages_end) { 1204 - rc = pfn_reader_user_pin(&pfns->user, pfns->pages, start_index, 1143 + if (iopt_is_dmabuf(pfns->pages)) 1144 + return pfn_reader_fill_dmabuf(&pfns->dmabuf, &pfns->batch, 1145 + start_index, span->last_hole); 1146 + 1147 + user = &pfns->user; 1148 + if (start_index >= user->upages_end) { 1149 + rc = pfn_reader_user_pin(user, pfns->pages, start_index, 1205 1150 span->last_hole); 1206 1151 if (rc) 1207 1152 return rc; ··· 1274 1209 pfns->batch_start_index = start_index; 1275 1210 pfns->batch_end_index = start_index; 1276 1211 pfns->last_index = last_index; 1277 - pfn_reader_user_init(&pfns->user, pages); 1212 + if (iopt_is_dmabuf(pages)) 1213 + pfn_reader_dmabuf_init(&pfns->dmabuf, pages); 1214 + else 1215 + pfn_reader_user_init(&pfns->user, pages); 1278 1216 rc = batch_init(&pfns->batch, last_index - start_index + 1); 1279 1217 if (rc) 1280 1218 return rc; ··· 1298 1230 static void pfn_reader_release_pins(struct pfn_reader *pfns) 1299 1231 { 1300 1232 struct iopt_pages *pages = pfns->pages; 1301 - struct pfn_reader_user *user = &pfns->user; 1233 + struct pfn_reader_user *user; 1302 1234 1235 + if (iopt_is_dmabuf(pages)) 1236 + return; 1237 + 1238 + user = &pfns->user; 1303 1239 if (user->upages_end > pfns->batch_end_index) { 1304 1240 /* Any pages not transferred to the batch are just unpinned */ 1305 1241 ··· 1333 1261 struct iopt_pages *pages = pfns->pages; 1334 1262 1335 1263 pfn_reader_release_pins(pfns); 1336 - pfn_reader_user_destroy(&pfns->user, pfns->pages); 1264 + if (!iopt_is_dmabuf(pfns->pages)) 1265 + pfn_reader_user_destroy(&pfns->user, pfns->pages); 1337 1266 batch_destroy(&pfns->batch, NULL); 1338 1267 WARN_ON(pages->last_npinned != pages->npinned); 1339 1268 } ··· 1413 1340 return pages; 1414 1341 } 1415 1342 1416 - struct iopt_pages *iopt_alloc_file_pages(struct file *file, unsigned long start, 1343 + struct iopt_pages *iopt_alloc_file_pages(struct file *file, 1344 + unsigned long start_byte, 1345 + unsigned long start, 1417 1346 unsigned long length, bool writable) 1418 1347 1419 1348 { 1420 1349 struct iopt_pages *pages; 1421 - unsigned long start_down = ALIGN_DOWN(start, PAGE_SIZE); 1422 - unsigned long end; 1423 1350 1424 - if (length && check_add_overflow(start, length - 1, &end)) 1425 - return ERR_PTR(-EOVERFLOW); 1426 - 1427 - pages = iopt_alloc_pages(start - start_down, length, writable); 1351 + pages = iopt_alloc_pages(start_byte, length, writable); 1428 1352 if (IS_ERR(pages)) 1429 1353 return pages; 1430 1354 pages->file = get_file(file); 1431 - pages->start = start_down; 1355 + pages->start = start - start_byte; 1432 1356 pages->type = IOPT_ADDRESS_FILE; 1433 1357 return pages; 1358 + } 1359 + 1360 + static void iopt_revoke_notify(struct dma_buf_attachment *attach) 1361 + { 1362 + struct iopt_pages *pages = attach->importer_priv; 1363 + struct iopt_pages_dmabuf_track *track; 1364 + 1365 + guard(mutex)(&pages->mutex); 1366 + if (iopt_dmabuf_revoked(pages)) 1367 + return; 1368 + 1369 + list_for_each_entry(track, &pages->dmabuf.tracker, elm) { 1370 + struct iopt_area *area = track->area; 1371 + 1372 + iopt_area_unmap_domain_range(area, track->domain, 1373 + iopt_area_index(area), 1374 + iopt_area_last_index(area)); 1375 + } 1376 + pages->dmabuf.phys.len = 0; 1377 + } 1378 + 1379 + static struct dma_buf_attach_ops iopt_dmabuf_attach_revoke_ops = { 1380 + .allow_peer2peer = true, 1381 + .move_notify = iopt_revoke_notify, 1382 + }; 1383 + 1384 + /* 1385 + * iommufd and vfio have a circular dependency. Future work for a phys 1386 + * based private interconnect will remove this. 1387 + */ 1388 + static int 1389 + sym_vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, 1390 + struct dma_buf_phys_vec *phys) 1391 + { 1392 + typeof(&vfio_pci_dma_buf_iommufd_map) fn; 1393 + int rc; 1394 + 1395 + rc = iommufd_test_dma_buf_iommufd_map(attachment, phys); 1396 + if (rc != -EOPNOTSUPP) 1397 + return rc; 1398 + 1399 + if (!IS_ENABLED(CONFIG_VFIO_PCI_DMABUF)) 1400 + return -EOPNOTSUPP; 1401 + 1402 + fn = symbol_get(vfio_pci_dma_buf_iommufd_map); 1403 + if (!fn) 1404 + return -EOPNOTSUPP; 1405 + rc = fn(attachment, phys); 1406 + symbol_put(vfio_pci_dma_buf_iommufd_map); 1407 + return rc; 1408 + } 1409 + 1410 + static int iopt_map_dmabuf(struct iommufd_ctx *ictx, struct iopt_pages *pages, 1411 + struct dma_buf *dmabuf) 1412 + { 1413 + struct dma_buf_attachment *attach; 1414 + int rc; 1415 + 1416 + attach = dma_buf_dynamic_attach(dmabuf, iommufd_global_device(), 1417 + &iopt_dmabuf_attach_revoke_ops, pages); 1418 + if (IS_ERR(attach)) 1419 + return PTR_ERR(attach); 1420 + 1421 + dma_resv_lock(dmabuf->resv, NULL); 1422 + /* 1423 + * Lock ordering requires the mutex to be taken inside the reservation, 1424 + * make sure lockdep sees this. 1425 + */ 1426 + if (IS_ENABLED(CONFIG_LOCKDEP)) { 1427 + mutex_lock(&pages->mutex); 1428 + mutex_unlock(&pages->mutex); 1429 + } 1430 + 1431 + rc = sym_vfio_pci_dma_buf_iommufd_map(attach, &pages->dmabuf.phys); 1432 + if (rc) 1433 + goto err_detach; 1434 + 1435 + dma_resv_unlock(dmabuf->resv); 1436 + 1437 + /* On success iopt_release_pages() will detach and put the dmabuf. */ 1438 + pages->dmabuf.attach = attach; 1439 + return 0; 1440 + 1441 + err_detach: 1442 + dma_resv_unlock(dmabuf->resv); 1443 + dma_buf_detach(dmabuf, attach); 1444 + return rc; 1445 + } 1446 + 1447 + struct iopt_pages *iopt_alloc_dmabuf_pages(struct iommufd_ctx *ictx, 1448 + struct dma_buf *dmabuf, 1449 + unsigned long start_byte, 1450 + unsigned long start, 1451 + unsigned long length, bool writable) 1452 + { 1453 + static struct lock_class_key pages_dmabuf_mutex_key; 1454 + struct iopt_pages *pages; 1455 + int rc; 1456 + 1457 + if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER)) 1458 + return ERR_PTR(-EOPNOTSUPP); 1459 + 1460 + if (dmabuf->size <= (start + length - 1) || 1461 + length / PAGE_SIZE >= MAX_NPFNS) 1462 + return ERR_PTR(-EINVAL); 1463 + 1464 + pages = iopt_alloc_pages(start_byte, length, writable); 1465 + if (IS_ERR(pages)) 1466 + return pages; 1467 + 1468 + /* 1469 + * The mmap_lock can be held when obtaining the dmabuf reservation lock 1470 + * which creates a locking cycle with the pages mutex which is held 1471 + * while obtaining the mmap_lock. This locking path is not present for 1472 + * IOPT_ADDRESS_DMABUF so split the lock class. 1473 + */ 1474 + lockdep_set_class(&pages->mutex, &pages_dmabuf_mutex_key); 1475 + 1476 + /* dmabuf does not use pinned page accounting. */ 1477 + pages->account_mode = IOPT_PAGES_ACCOUNT_NONE; 1478 + pages->type = IOPT_ADDRESS_DMABUF; 1479 + pages->dmabuf.start = start - start_byte; 1480 + INIT_LIST_HEAD(&pages->dmabuf.tracker); 1481 + 1482 + rc = iopt_map_dmabuf(ictx, pages, dmabuf); 1483 + if (rc) { 1484 + iopt_put_pages(pages); 1485 + return ERR_PTR(rc); 1486 + } 1487 + 1488 + return pages; 1489 + } 1490 + 1491 + int iopt_dmabuf_track_domain(struct iopt_pages *pages, struct iopt_area *area, 1492 + struct iommu_domain *domain) 1493 + { 1494 + struct iopt_pages_dmabuf_track *track; 1495 + 1496 + lockdep_assert_held(&pages->mutex); 1497 + if (WARN_ON(!iopt_is_dmabuf(pages))) 1498 + return -EINVAL; 1499 + 1500 + list_for_each_entry(track, &pages->dmabuf.tracker, elm) 1501 + if (WARN_ON(track->domain == domain && track->area == area)) 1502 + return -EINVAL; 1503 + 1504 + track = kzalloc(sizeof(*track), GFP_KERNEL); 1505 + if (!track) 1506 + return -ENOMEM; 1507 + track->domain = domain; 1508 + track->area = area; 1509 + list_add_tail(&track->elm, &pages->dmabuf.tracker); 1510 + 1511 + return 0; 1512 + } 1513 + 1514 + void iopt_dmabuf_untrack_domain(struct iopt_pages *pages, 1515 + struct iopt_area *area, 1516 + struct iommu_domain *domain) 1517 + { 1518 + struct iopt_pages_dmabuf_track *track; 1519 + 1520 + lockdep_assert_held(&pages->mutex); 1521 + WARN_ON(!iopt_is_dmabuf(pages)); 1522 + 1523 + list_for_each_entry(track, &pages->dmabuf.tracker, elm) { 1524 + if (track->domain == domain && track->area == area) { 1525 + list_del(&track->elm); 1526 + kfree(track); 1527 + return; 1528 + } 1529 + } 1530 + WARN_ON(true); 1531 + } 1532 + 1533 + int iopt_dmabuf_track_all_domains(struct iopt_area *area, 1534 + struct iopt_pages *pages) 1535 + { 1536 + struct iopt_pages_dmabuf_track *track; 1537 + struct iommu_domain *domain; 1538 + unsigned long index; 1539 + int rc; 1540 + 1541 + list_for_each_entry(track, &pages->dmabuf.tracker, elm) 1542 + if (WARN_ON(track->area == area)) 1543 + return -EINVAL; 1544 + 1545 + xa_for_each(&area->iopt->domains, index, domain) { 1546 + rc = iopt_dmabuf_track_domain(pages, area, domain); 1547 + if (rc) 1548 + goto err_untrack; 1549 + } 1550 + return 0; 1551 + err_untrack: 1552 + iopt_dmabuf_untrack_all_domains(area, pages); 1553 + return rc; 1554 + } 1555 + 1556 + void iopt_dmabuf_untrack_all_domains(struct iopt_area *area, 1557 + struct iopt_pages *pages) 1558 + { 1559 + struct iopt_pages_dmabuf_track *track; 1560 + struct iopt_pages_dmabuf_track *tmp; 1561 + 1562 + list_for_each_entry_safe(track, tmp, &pages->dmabuf.tracker, 1563 + elm) { 1564 + if (track->area == area) { 1565 + list_del(&track->elm); 1566 + kfree(track); 1567 + } 1568 + } 1434 1569 } 1435 1570 1436 1571 void iopt_release_pages(struct kref *kref) ··· 1653 1372 mutex_destroy(&pages->mutex); 1654 1373 put_task_struct(pages->source_task); 1655 1374 free_uid(pages->source_user); 1656 - if (pages->type == IOPT_ADDRESS_FILE) 1375 + if (iopt_is_dmabuf(pages) && pages->dmabuf.attach) { 1376 + struct dma_buf *dmabuf = pages->dmabuf.attach->dmabuf; 1377 + 1378 + dma_buf_detach(dmabuf, pages->dmabuf.attach); 1379 + dma_buf_put(dmabuf); 1380 + WARN_ON(!list_empty(&pages->dmabuf.tracker)); 1381 + } else if (pages->type == IOPT_ADDRESS_FILE) { 1657 1382 fput(pages->file); 1383 + } 1658 1384 kfree(pages); 1659 1385 } 1660 1386 ··· 1739 1451 1740 1452 lockdep_assert_held(&pages->mutex); 1741 1453 1454 + if (iopt_is_dmabuf(pages)) { 1455 + if (WARN_ON(iopt_dmabuf_revoked(pages))) 1456 + return; 1457 + iopt_area_unmap_domain_range(area, domain, start_index, 1458 + last_index); 1459 + return; 1460 + } 1461 + 1742 1462 /* 1743 1463 * For security we must not unpin something that is still DMA mapped, 1744 1464 * so this must unmap any IOVA before we go ahead and unpin the pages. ··· 1822 1526 void iopt_area_unfill_domain(struct iopt_area *area, struct iopt_pages *pages, 1823 1527 struct iommu_domain *domain) 1824 1528 { 1529 + if (iopt_dmabuf_revoked(pages)) 1530 + return; 1531 + 1825 1532 __iopt_area_unfill_domain(area, pages, domain, 1826 1533 iopt_area_last_index(area)); 1827 1534 } ··· 1844 1545 int rc; 1845 1546 1846 1547 lockdep_assert_held(&area->pages->mutex); 1548 + 1549 + if (iopt_dmabuf_revoked(area->pages)) 1550 + return 0; 1847 1551 1848 1552 rc = pfn_reader_first(&pfns, area->pages, iopt_area_index(area), 1849 1553 iopt_area_last_index(area)); ··· 1907 1605 return 0; 1908 1606 1909 1607 mutex_lock(&pages->mutex); 1910 - rc = pfn_reader_first(&pfns, pages, iopt_area_index(area), 1911 - iopt_area_last_index(area)); 1912 - if (rc) 1913 - goto out_unlock; 1608 + if (iopt_is_dmabuf(pages)) { 1609 + rc = iopt_dmabuf_track_all_domains(area, pages); 1610 + if (rc) 1611 + goto out_unlock; 1612 + } 1914 1613 1915 - while (!pfn_reader_done(&pfns)) { 1916 - done_first_end_index = pfns.batch_end_index; 1917 - done_all_end_index = pfns.batch_start_index; 1918 - xa_for_each(&area->iopt->domains, index, domain) { 1919 - rc = batch_to_domain(&pfns.batch, domain, area, 1920 - pfns.batch_start_index); 1614 + if (!iopt_dmabuf_revoked(pages)) { 1615 + rc = pfn_reader_first(&pfns, pages, iopt_area_index(area), 1616 + iopt_area_last_index(area)); 1617 + if (rc) 1618 + goto out_untrack; 1619 + 1620 + while (!pfn_reader_done(&pfns)) { 1621 + done_first_end_index = pfns.batch_end_index; 1622 + done_all_end_index = pfns.batch_start_index; 1623 + xa_for_each(&area->iopt->domains, index, domain) { 1624 + rc = batch_to_domain(&pfns.batch, domain, area, 1625 + pfns.batch_start_index); 1626 + if (rc) 1627 + goto out_unmap; 1628 + } 1629 + done_all_end_index = done_first_end_index; 1630 + 1631 + rc = pfn_reader_next(&pfns); 1921 1632 if (rc) 1922 1633 goto out_unmap; 1923 1634 } 1924 - done_all_end_index = done_first_end_index; 1925 - 1926 - rc = pfn_reader_next(&pfns); 1635 + rc = pfn_reader_update_pinned(&pfns); 1927 1636 if (rc) 1928 1637 goto out_unmap; 1638 + 1639 + pfn_reader_destroy(&pfns); 1929 1640 } 1930 - rc = pfn_reader_update_pinned(&pfns); 1931 - if (rc) 1932 - goto out_unmap; 1933 1641 1934 1642 area->storage_domain = xa_load(&area->iopt->domains, 0); 1935 1643 interval_tree_insert(&area->pages_node, &pages->domains_itree); 1936 - goto out_destroy; 1644 + mutex_unlock(&pages->mutex); 1645 + return 0; 1937 1646 1938 1647 out_unmap: 1939 1648 pfn_reader_release_pins(&pfns); ··· 1971 1658 end_index); 1972 1659 } 1973 1660 } 1974 - out_destroy: 1975 1661 pfn_reader_destroy(&pfns); 1662 + out_untrack: 1663 + if (iopt_is_dmabuf(pages)) 1664 + iopt_dmabuf_untrack_all_domains(area, pages); 1976 1665 out_unlock: 1977 1666 mutex_unlock(&pages->mutex); 1978 1667 return rc; ··· 2000 1685 if (!area->storage_domain) 2001 1686 goto out_unlock; 2002 1687 2003 - xa_for_each(&iopt->domains, index, domain) 2004 - if (domain != area->storage_domain) 1688 + xa_for_each(&iopt->domains, index, domain) { 1689 + if (domain == area->storage_domain) 1690 + continue; 1691 + 1692 + if (!iopt_dmabuf_revoked(pages)) 2005 1693 iopt_area_unmap_domain_range( 2006 1694 area, domain, iopt_area_index(area), 2007 1695 iopt_area_last_index(area)); 1696 + } 2008 1697 2009 1698 if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 2010 1699 WARN_ON(RB_EMPTY_NODE(&area->pages_node.rb)); 2011 1700 interval_tree_remove(&area->pages_node, &pages->domains_itree); 2012 1701 iopt_area_unfill_domain(area, pages, area->storage_domain); 1702 + if (iopt_is_dmabuf(pages)) 1703 + iopt_dmabuf_untrack_all_domains(area, pages); 2013 1704 area->storage_domain = NULL; 2014 1705 out_unlock: 2015 1706 mutex_unlock(&pages->mutex); ··· 2352 2031 if ((flags & IOMMUFD_ACCESS_RW_WRITE) && !pages->writable) 2353 2032 return -EPERM; 2354 2033 2355 - if (pages->type == IOPT_ADDRESS_FILE) 2034 + if (iopt_is_dmabuf(pages)) 2035 + return -EINVAL; 2036 + 2037 + if (pages->type != IOPT_ADDRESS_USER) 2356 2038 return iopt_pages_rw_slow(pages, start_index, last_index, 2357 2039 start_byte % PAGE_SIZE, data, length, 2358 2040 flags); 2359 - 2360 - if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && 2361 - WARN_ON(pages->type != IOPT_ADDRESS_USER)) 2362 - return -EINVAL; 2363 2041 2364 2042 if (!(flags & IOMMUFD_ACCESS_RW_KTHREAD) && change_mm) { 2365 2043 if (start_index == last_index)
+143
drivers/iommu/iommufd/selftest.c
··· 5 5 */ 6 6 #include <linux/anon_inodes.h> 7 7 #include <linux/debugfs.h> 8 + #include <linux/dma-buf.h> 9 + #include <linux/dma-resv.h> 8 10 #include <linux/fault-inject.h> 9 11 #include <linux/file.h> 10 12 #include <linux/iommu.h> ··· 1947 1945 } 1948 1946 } 1949 1947 1948 + struct iommufd_test_dma_buf { 1949 + void *memory; 1950 + size_t length; 1951 + bool revoked; 1952 + }; 1953 + 1954 + static int iommufd_test_dma_buf_attach(struct dma_buf *dmabuf, 1955 + struct dma_buf_attachment *attachment) 1956 + { 1957 + return 0; 1958 + } 1959 + 1960 + static void iommufd_test_dma_buf_detach(struct dma_buf *dmabuf, 1961 + struct dma_buf_attachment *attachment) 1962 + { 1963 + } 1964 + 1965 + static struct sg_table * 1966 + iommufd_test_dma_buf_map(struct dma_buf_attachment *attachment, 1967 + enum dma_data_direction dir) 1968 + { 1969 + return ERR_PTR(-EOPNOTSUPP); 1970 + } 1971 + 1972 + static void iommufd_test_dma_buf_unmap(struct dma_buf_attachment *attachment, 1973 + struct sg_table *sgt, 1974 + enum dma_data_direction dir) 1975 + { 1976 + } 1977 + 1978 + static void iommufd_test_dma_buf_release(struct dma_buf *dmabuf) 1979 + { 1980 + struct iommufd_test_dma_buf *priv = dmabuf->priv; 1981 + 1982 + kfree(priv->memory); 1983 + kfree(priv); 1984 + } 1985 + 1986 + static const struct dma_buf_ops iommufd_test_dmabuf_ops = { 1987 + .attach = iommufd_test_dma_buf_attach, 1988 + .detach = iommufd_test_dma_buf_detach, 1989 + .map_dma_buf = iommufd_test_dma_buf_map, 1990 + .release = iommufd_test_dma_buf_release, 1991 + .unmap_dma_buf = iommufd_test_dma_buf_unmap, 1992 + }; 1993 + 1994 + int iommufd_test_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, 1995 + struct dma_buf_phys_vec *phys) 1996 + { 1997 + struct iommufd_test_dma_buf *priv = attachment->dmabuf->priv; 1998 + 1999 + dma_resv_assert_held(attachment->dmabuf->resv); 2000 + 2001 + if (attachment->dmabuf->ops != &iommufd_test_dmabuf_ops) 2002 + return -EOPNOTSUPP; 2003 + 2004 + if (priv->revoked) 2005 + return -ENODEV; 2006 + 2007 + phys->paddr = virt_to_phys(priv->memory); 2008 + phys->len = priv->length; 2009 + return 0; 2010 + } 2011 + 2012 + static int iommufd_test_dmabuf_get(struct iommufd_ucmd *ucmd, 2013 + unsigned int open_flags, 2014 + size_t len) 2015 + { 2016 + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); 2017 + struct iommufd_test_dma_buf *priv; 2018 + struct dma_buf *dmabuf; 2019 + int rc; 2020 + 2021 + len = ALIGN(len, PAGE_SIZE); 2022 + if (len == 0 || len > PAGE_SIZE * 512) 2023 + return -EINVAL; 2024 + 2025 + priv = kzalloc(sizeof(*priv), GFP_KERNEL); 2026 + if (!priv) 2027 + return -ENOMEM; 2028 + 2029 + priv->length = len; 2030 + priv->memory = kzalloc(len, GFP_KERNEL); 2031 + if (!priv->memory) { 2032 + rc = -ENOMEM; 2033 + goto err_free; 2034 + } 2035 + 2036 + exp_info.ops = &iommufd_test_dmabuf_ops; 2037 + exp_info.size = len; 2038 + exp_info.flags = open_flags; 2039 + exp_info.priv = priv; 2040 + 2041 + dmabuf = dma_buf_export(&exp_info); 2042 + if (IS_ERR(dmabuf)) { 2043 + rc = PTR_ERR(dmabuf); 2044 + goto err_free; 2045 + } 2046 + 2047 + return dma_buf_fd(dmabuf, open_flags); 2048 + 2049 + err_free: 2050 + kfree(priv->memory); 2051 + kfree(priv); 2052 + return rc; 2053 + } 2054 + 2055 + static int iommufd_test_dmabuf_revoke(struct iommufd_ucmd *ucmd, int fd, 2056 + bool revoked) 2057 + { 2058 + struct iommufd_test_dma_buf *priv; 2059 + struct dma_buf *dmabuf; 2060 + int rc = 0; 2061 + 2062 + dmabuf = dma_buf_get(fd); 2063 + if (IS_ERR(dmabuf)) 2064 + return PTR_ERR(dmabuf); 2065 + 2066 + if (dmabuf->ops != &iommufd_test_dmabuf_ops) { 2067 + rc = -EOPNOTSUPP; 2068 + goto err_put; 2069 + } 2070 + 2071 + priv = dmabuf->priv; 2072 + dma_resv_lock(dmabuf->resv, NULL); 2073 + priv->revoked = revoked; 2074 + dma_buf_move_notify(dmabuf); 2075 + dma_resv_unlock(dmabuf->resv); 2076 + 2077 + err_put: 2078 + dma_buf_put(dmabuf); 2079 + return rc; 2080 + } 2081 + 1950 2082 int iommufd_test(struct iommufd_ucmd *ucmd) 1951 2083 { 1952 2084 struct iommu_test_cmd *cmd = ucmd->cmd; ··· 2159 2023 return iommufd_test_pasid_detach(ucmd, cmd); 2160 2024 case IOMMU_TEST_OP_PASID_CHECK_HWPT: 2161 2025 return iommufd_test_pasid_check_hwpt(ucmd, cmd); 2026 + case IOMMU_TEST_OP_DMABUF_GET: 2027 + return iommufd_test_dmabuf_get(ucmd, cmd->dmabuf_get.open_flags, 2028 + cmd->dmabuf_get.length); 2029 + case IOMMU_TEST_OP_DMABUF_REVOKE: 2030 + return iommufd_test_dmabuf_revoke(ucmd, 2031 + cmd->dmabuf_revoke.dmabuf_fd, 2032 + cmd->dmabuf_revoke.revoked); 2162 2033 default: 2163 2034 return -EOPNOTSUPP; 2164 2035 }
+34
drivers/vfio/pci/vfio_pci_dmabuf.c
··· 82 82 .release = vfio_pci_dma_buf_release, 83 83 }; 84 84 85 + /* 86 + * This is a temporary "private interconnect" between VFIO DMABUF and iommufd. 87 + * It allows the two co-operating drivers to exchange the physical address of 88 + * the BAR. This is to be replaced with a formal DMABUF system for negotiated 89 + * interconnect types. 90 + * 91 + * If this function succeeds the following are true: 92 + * - There is one physical range and it is pointing to MMIO 93 + * - When move_notify is called it means revoke, not move, vfio_dma_buf_map 94 + * will fail if it is currently revoked 95 + */ 96 + int vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, 97 + struct dma_buf_phys_vec *phys) 98 + { 99 + struct vfio_pci_dma_buf *priv; 100 + 101 + dma_resv_assert_held(attachment->dmabuf->resv); 102 + 103 + if (attachment->dmabuf->ops != &vfio_pci_dmabuf_ops) 104 + return -EOPNOTSUPP; 105 + 106 + priv = attachment->dmabuf->priv; 107 + if (priv->revoked) 108 + return -ENODEV; 109 + 110 + /* More than one range to iommufd will require proper DMABUF support */ 111 + if (priv->nr_ranges != 1) 112 + return -EOPNOTSUPP; 113 + 114 + *phys = priv->phys_vec[0]; 115 + return 0; 116 + } 117 + EXPORT_SYMBOL_FOR_MODULES(vfio_pci_dma_buf_iommufd_map, "iommufd"); 118 + 85 119 int vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec, 86 120 struct vfio_region_dma_range *dma_ranges, 87 121 size_t nr_ranges, phys_addr_t start,
+4
include/linux/vfio_pci_core.h
··· 29 29 struct vfio_pci_region; 30 30 struct p2pdma_provider; 31 31 struct dma_buf_phys_vec; 32 + struct dma_buf_attachment; 32 33 33 34 struct vfio_pci_eventfd { 34 35 struct eventfd_ctx *ctx; ··· 226 225 addr + (PAGE_SIZE << order) > vma->vm_end || 227 226 !IS_ALIGNED(pfn, 1 << order))); 228 227 } 228 + 229 + int vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, 230 + struct dma_buf_phys_vec *phys); 229 231 230 232 #endif /* VFIO_PCI_CORE_H */
+10
include/uapi/linux/iommufd.h
··· 450 450 * nested domain will translate the same as the nesting parent. The S1 will 451 451 * install a Context Descriptor Table pointing at userspace memory translated 452 452 * by the nesting parent. 453 + * 454 + * It's suggested to allocate a vDEVICE object carrying vSID and then re-attach 455 + * the nested domain, as soon as the vSID is available in the VMM level: 456 + * 457 + * - when Cfg=translate, a vDEVICE must be allocated prior to attaching to the 458 + * allocated nested domain, as CD/ATS invalidations and vevents need a vSID. 459 + * - when Cfg=bypass/abort, a vDEVICE is not enforced during the nested domain 460 + * attachment, to support a GBPA case where VM sets CR0.SMMUEN=0. However, if 461 + * VM sets CR0.SMMUEN=1 while missing a vDEVICE object, kernel would fail to 462 + * report events to the VM. E.g. F_TRANSLATION when guest STE.Cfg=abort. 453 463 */ 454 464 struct iommu_hwpt_arm_smmuv3 { 455 465 __aligned_le64 ste[2];
+43
tools/testing/selftests/iommu/iommufd.c
··· 1571 1571 test_ioctl_destroy(dst_ioas_id); 1572 1572 } 1573 1573 1574 + TEST_F(iommufd_ioas, dmabuf_simple) 1575 + { 1576 + size_t buf_size = PAGE_SIZE*4; 1577 + __u64 iova; 1578 + int dfd; 1579 + 1580 + test_cmd_get_dmabuf(buf_size, &dfd); 1581 + test_err_ioctl_ioas_map_file(EINVAL, dfd, 0, 0, &iova); 1582 + test_err_ioctl_ioas_map_file(EINVAL, dfd, buf_size, buf_size, &iova); 1583 + test_err_ioctl_ioas_map_file(EINVAL, dfd, 0, buf_size + 1, &iova); 1584 + test_ioctl_ioas_map_file(dfd, 0, buf_size, &iova); 1585 + 1586 + close(dfd); 1587 + } 1588 + 1589 + TEST_F(iommufd_ioas, dmabuf_revoke) 1590 + { 1591 + size_t buf_size = PAGE_SIZE*4; 1592 + __u32 hwpt_id; 1593 + __u64 iova; 1594 + __u64 iova2; 1595 + int dfd; 1596 + 1597 + test_cmd_get_dmabuf(buf_size, &dfd); 1598 + test_ioctl_ioas_map_file(dfd, 0, buf_size, &iova); 1599 + test_cmd_revoke_dmabuf(dfd, true); 1600 + 1601 + if (variant->mock_domains) 1602 + test_cmd_hwpt_alloc(self->device_id, self->ioas_id, 0, 1603 + &hwpt_id); 1604 + 1605 + test_err_ioctl_ioas_map_file(ENODEV, dfd, 0, buf_size, &iova2); 1606 + 1607 + test_cmd_revoke_dmabuf(dfd, false); 1608 + test_ioctl_ioas_map_file(dfd, 0, buf_size, &iova2); 1609 + 1610 + /* Restore the iova back */ 1611 + test_ioctl_ioas_unmap(iova, buf_size); 1612 + test_ioctl_ioas_map_fixed_file(dfd, 0, buf_size, iova); 1613 + 1614 + close(dfd); 1615 + } 1616 + 1574 1617 FIXTURE(iommufd_mock_domain) 1575 1618 { 1576 1619 int fd;
+44
tools/testing/selftests/iommu/iommufd_utils.h
··· 560 560 EXPECT_ERRNO(_errno, _test_cmd_destroy_access_pages( \ 561 561 self->fd, access_id, access_pages_id)) 562 562 563 + static int _test_cmd_get_dmabuf(int fd, size_t len, int *out_fd) 564 + { 565 + struct iommu_test_cmd cmd = { 566 + .size = sizeof(cmd), 567 + .op = IOMMU_TEST_OP_DMABUF_GET, 568 + .dmabuf_get = { .length = len, .open_flags = O_CLOEXEC }, 569 + }; 570 + 571 + *out_fd = ioctl(fd, IOMMU_TEST_CMD, &cmd); 572 + if (*out_fd < 0) 573 + return -1; 574 + return 0; 575 + } 576 + #define test_cmd_get_dmabuf(len, out_fd) \ 577 + ASSERT_EQ(0, _test_cmd_get_dmabuf(self->fd, len, out_fd)) 578 + 579 + static int _test_cmd_revoke_dmabuf(int fd, int dmabuf_fd, bool revoked) 580 + { 581 + struct iommu_test_cmd cmd = { 582 + .size = sizeof(cmd), 583 + .op = IOMMU_TEST_OP_DMABUF_REVOKE, 584 + .dmabuf_revoke = { .dmabuf_fd = dmabuf_fd, .revoked = revoked }, 585 + }; 586 + int ret; 587 + 588 + ret = ioctl(fd, IOMMU_TEST_CMD, &cmd); 589 + if (ret < 0) 590 + return -1; 591 + return 0; 592 + } 593 + #define test_cmd_revoke_dmabuf(dmabuf_fd, revoke) \ 594 + ASSERT_EQ(0, _test_cmd_revoke_dmabuf(self->fd, dmabuf_fd, revoke)) 595 + 563 596 static int _test_ioctl_destroy(int fd, unsigned int id) 564 597 { 565 598 struct iommu_destroy cmd = { ··· 762 729 _test_ioctl_ioas_map_file( \ 763 730 self->fd, ioas_id, mfd, start, length, iova_p, \ 764 731 IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE)) 732 + 733 + #define test_ioctl_ioas_map_fixed_file(mfd, start, length, iova) \ 734 + ({ \ 735 + __u64 __iova = iova; \ 736 + ASSERT_EQ(0, _test_ioctl_ioas_map_file( \ 737 + self->fd, self->ioas_id, mfd, start, \ 738 + length, &__iova, \ 739 + IOMMU_IOAS_MAP_FIXED_IOVA | \ 740 + IOMMU_IOAS_MAP_WRITEABLE | \ 741 + IOMMU_IOAS_MAP_READABLE)); \ 742 + }) 765 743 766 744 static int _test_ioctl_set_temp_memory_limit(int fd, unsigned int limit) 767 745 {