Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'vfio-v7.0-rc1' of https://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:
"A small cycle with the bulk in selftests and reintroducing poison
handling in the nvgrace-gpu driver. The rest are fixes, cleanups, and
some dmabuf structure consolidation.

- Update outdated mdev comment referencing the renamed
mdev_type_add() function (Julia Lawall)

- Introduce selftest support for IOMMU mapping of PCI MMIO BARs (Alex
Mastro)

- Relax selftest assertion relative to differences in huge page
handling between legacy (v1) TYPE1 IOMMU mapping behavior and the
compatibility mode supported by IOMMUFD (David Matlack)

- Reintroduce memory poison handling support for non-struct-page-
backed memory in the nvgrace-gpu variant driver (Ankit Agrawal)

- Replace dma_buf_phys_vec with phys_vec to avoid duplicate structure
and semantics (Leon Romanovsky)

- Add missing upstream bridge locking across PCI function reset,
resolving an assertion failure when secondary bus reset is used to
provide that reset (Anthony Pighin)

- Fixes to hisi_acc vfio-pci variant driver to resolve corner case
issues related to resets, repeated migration, and error injection
scenarios (Longfang Liu, Weili Qian)

- Restrict vfio selftest builds to arm64 and x86_64, resolving
compiler warnings on 32-bit archs (Ted Logan)

- Un-deprecate the fsl-mc vfio bus driver as a new maintainer has
stepped up (Ioana Ciornei)"

* tag 'vfio-v7.0-rc1' of https://github.com/awilliam/linux-vfio:
vfio/fsl-mc: add myself as maintainer
vfio: selftests: only build tests on arm64 and x86_64
hisi_acc_vfio_pci: fix the queue parameter anomaly issue
hisi_acc_vfio_pci: resolve duplicate migration states
hisi_acc_vfio_pci: update status after RAS error
hisi_acc_vfio_pci: fix VF reset timeout issue
vfio/pci: Lock upstream bridge for vfio_pci_core_disable()
types: reuse common phys_vec type instead of DMABUF open‑coded variant
vfio/nvgrace-gpu: register device memory for poison handling
mm: add stubs for PFNMAP memory failure registration functions
vfio: selftests: Drop IOMMU mapping size assertions for VFIO_TYPE1_IOMMU
vfio: selftests: Add vfio_dma_mapping_mmio_test
vfio: selftests: Align BAR mmaps for efficient IOMMU mapping
vfio: selftests: Centralize IOMMU mode name definitions
vfio/mdev: update outdated comment

+411 -67
+2 -1
MAINTAINERS
··· 27606 27606 F: tools/testing/selftests/vfio/ 27607 27607 27608 27608 VFIO FSL-MC DRIVER 27609 + M: Ioana Ciornei <ioana.ciornei@nxp.com> 27609 27610 L: kvm@vger.kernel.org 27610 - S: Obsolete 27611 + S: Maintained 27611 27612 F: drivers/vfio/fsl-mc/ 27612 27613 27613 27614 VFIO HISILICON PCI DRIVER
+3 -3
drivers/dma-buf/dma-buf-mapping.c
··· 33 33 } 34 34 35 35 static unsigned int calc_sg_nents(struct dma_iova_state *state, 36 - struct dma_buf_phys_vec *phys_vec, 37 - size_t nr_ranges, size_t size) 36 + struct phys_vec *phys_vec, size_t nr_ranges, 37 + size_t size) 38 38 { 39 39 unsigned int nents = 0; 40 40 size_t i; ··· 91 91 */ 92 92 struct sg_table *dma_buf_phys_vec_to_sgt(struct dma_buf_attachment *attach, 93 93 struct p2pdma_provider *provider, 94 - struct dma_buf_phys_vec *phys_vec, 94 + struct phys_vec *phys_vec, 95 95 size_t nr_ranges, size_t size, 96 96 enum dma_data_direction dir) 97 97 {
+1 -1
drivers/iommu/iommufd/io_pagetable.h
··· 202 202 203 203 struct iopt_pages_dmabuf { 204 204 struct dma_buf_attachment *attach; 205 - struct dma_buf_phys_vec phys; 205 + struct phys_vec phys; 206 206 /* Always PAGE_SIZE aligned */ 207 207 unsigned long start; 208 208 struct list_head tracker;
+2 -3
drivers/iommu/iommufd/iommufd_private.h
··· 20 20 struct iommu_option; 21 21 struct iommufd_device; 22 22 struct dma_buf_attachment; 23 - struct dma_buf_phys_vec; 24 23 25 24 struct iommufd_sw_msi_map { 26 25 struct list_head sw_msi_item; ··· 717 718 void iommufd_test_exit(void); 718 719 bool iommufd_selftest_is_mock_dev(struct device *dev); 719 720 int iommufd_test_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, 720 - struct dma_buf_phys_vec *phys); 721 + struct phys_vec *phys); 721 722 #else 722 723 static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, 723 724 unsigned int ioas_id, ··· 741 742 } 742 743 static inline int 743 744 iommufd_test_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, 744 - struct dma_buf_phys_vec *phys) 745 + struct phys_vec *phys) 745 746 { 746 747 return -EOPNOTSUPP; 747 748 }
+2 -2
drivers/iommu/iommufd/pages.c
··· 1078 1078 } 1079 1079 1080 1080 struct pfn_reader_dmabuf { 1081 - struct dma_buf_phys_vec phys; 1081 + struct phys_vec phys; 1082 1082 unsigned long start_offset; 1083 1083 }; 1084 1084 ··· 1461 1461 */ 1462 1462 static int 1463 1463 sym_vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, 1464 - struct dma_buf_phys_vec *phys) 1464 + struct phys_vec *phys) 1465 1465 { 1466 1466 typeof(&vfio_pci_dma_buf_iommufd_map) fn; 1467 1467 int rc;
+1 -1
drivers/iommu/iommufd/selftest.c
··· 2002 2002 }; 2003 2003 2004 2004 int iommufd_test_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, 2005 - struct dma_buf_phys_vec *phys) 2005 + struct phys_vec *phys) 2006 2006 { 2007 2007 struct iommufd_test_dma_buf *priv = attachment->dmabuf->priv; 2008 2008
+1 -4
drivers/vfio/fsl-mc/Kconfig
··· 2 2 depends on FSL_MC_BUS 3 3 4 4 config VFIO_FSL_MC 5 - tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices (DEPRECATED)" 5 + tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices" 6 6 select EVENTFD 7 7 help 8 - The vfio-fsl-mc driver is deprecated and will be removed in a 9 - future kernel release. 10 - 11 8 Driver to enable support for the VFIO QorIQ DPAA2 fsl-mc 12 9 (Management Complex) devices. This is required to passthrough 13 10 fsl-mc bus devices using the VFIO framework.
-2
drivers/vfio/fsl-mc/vfio_fsl_mc.c
··· 531 531 struct device *dev = &mc_dev->dev; 532 532 int ret; 533 533 534 - dev_err_once(dev, "DEPRECATION: vfio-fsl-mc is deprecated and will be removed in a future kernel release\n"); 535 - 536 534 vdev = vfio_alloc_device(vfio_fsl_mc_device, vdev, dev, 537 535 &vfio_fsl_mc_ops); 538 536 if (IS_ERR(vdev))
+1 -1
drivers/vfio/mdev/mdev_sysfs.c
··· 156 156 struct mdev_type *type = to_mdev_type(kobj); 157 157 158 158 pr_debug("Releasing group %s\n", kobj->name); 159 - /* Pairs with the get in add_mdev_supported_type() */ 159 + /* Pairs with the get in mdev_type_add() */ 160 160 put_device(type->parent->dev); 161 161 } 162 162
+27 -3
drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
··· 426 426 ret = qm_get_vft(vf_qm, &vf_qm->qp_base); 427 427 if (ret <= 0) { 428 428 dev_err(dev, "failed to get vft qp nums\n"); 429 - return ret; 429 + return ret < 0 ? ret : -EINVAL; 430 430 } 431 431 432 432 if (ret != vf_data->qp_num) { ··· 1188 1188 return 0; 1189 1189 } 1190 1190 1191 + static void hisi_acc_vf_pci_reset_prepare(struct pci_dev *pdev) 1192 + { 1193 + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_drvdata(pdev); 1194 + struct hisi_qm *qm = hisi_acc_vdev->pf_qm; 1195 + struct device *dev = &qm->pdev->dev; 1196 + u32 delay = 0; 1197 + 1198 + /* All reset requests need to be queued for processing */ 1199 + while (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) { 1200 + msleep(1); 1201 + if (++delay > QM_RESET_WAIT_TIMEOUT) { 1202 + dev_err(dev, "reset prepare failed\n"); 1203 + return; 1204 + } 1205 + } 1206 + 1207 + hisi_acc_vdev->set_reset_flag = true; 1208 + } 1209 + 1191 1210 static void hisi_acc_vf_pci_aer_reset_done(struct pci_dev *pdev) 1192 1211 { 1193 1212 struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_drvdata(pdev); 1213 + struct hisi_qm *qm = hisi_acc_vdev->pf_qm; 1194 1214 1195 - if (hisi_acc_vdev->core_device.vdev.migration_flags != 1196 - VFIO_MIGRATION_STOP_COPY) 1215 + if (hisi_acc_vdev->set_reset_flag) 1216 + clear_bit(QM_RESETTING, &qm->misc_ctl); 1217 + 1218 + if (!hisi_acc_vdev->core_device.vdev.mig_ops) 1197 1219 return; 1198 1220 1199 1221 mutex_lock(&hisi_acc_vdev->state_mutex); ··· 1569 1547 } 1570 1548 hisi_acc_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING; 1571 1549 hisi_acc_vdev->dev_opened = true; 1550 + hisi_acc_vdev->match_done = 0; 1572 1551 mutex_unlock(&hisi_acc_vdev->open_mutex); 1573 1552 } 1574 1553 ··· 1757 1734 MODULE_DEVICE_TABLE(pci, hisi_acc_vfio_pci_table); 1758 1735 1759 1736 static const struct pci_error_handlers hisi_acc_vf_err_handlers = { 1737 + .reset_prepare = hisi_acc_vf_pci_reset_prepare, 1760 1738 .reset_done = hisi_acc_vf_pci_aer_reset_done, 1761 1739 .error_detected = vfio_pci_core_aer_err_detected, 1762 1740 };
+2
drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
··· 27 27 28 28 #define ERROR_CHECK_TIMEOUT 100 29 29 #define CHECK_DELAY_TIME 100 30 + #define QM_RESET_WAIT_TIMEOUT 60000 30 31 31 32 #define QM_SQC_VFT_BASE_SHIFT_V2 28 32 33 #define QM_SQC_VFT_BASE_MASK_V2 GENMASK(15, 0) ··· 129 128 struct hisi_acc_vf_core_device { 130 129 struct vfio_pci_core_device core_device; 131 130 u8 match_done; 131 + bool set_reset_flag; 132 132 /* 133 133 * io_base is only valid when dev_opened is true, 134 134 * which is protected by open_mutex.
+110 -5
drivers/vfio/pci/nvgrace-gpu/main.c
··· 9 9 #include <linux/jiffies.h> 10 10 #include <linux/pci-p2pdma.h> 11 11 #include <linux/pm_runtime.h> 12 + #include <linux/memory-failure.h> 12 13 13 14 /* 14 15 * The device memory usable to the workloads running in the VM is cached ··· 50 49 void *memaddr; 51 50 void __iomem *ioaddr; 52 51 }; /* Base virtual address of the region */ 52 + struct pfn_address_space pfn_address_space; 53 53 }; 54 54 55 55 struct nvgrace_gpu_pci_core_device { ··· 90 88 return NULL; 91 89 } 92 90 91 + static int pfn_memregion_offset(struct nvgrace_gpu_pci_core_device *nvdev, 92 + unsigned int index, 93 + unsigned long pfn, 94 + pgoff_t *pfn_offset_in_region) 95 + { 96 + struct mem_region *region; 97 + unsigned long start_pfn, num_pages; 98 + 99 + region = nvgrace_gpu_memregion(index, nvdev); 100 + if (!region) 101 + return -EINVAL; 102 + 103 + start_pfn = PHYS_PFN(region->memphys); 104 + num_pages = region->memlength >> PAGE_SHIFT; 105 + 106 + if (pfn < start_pfn || pfn >= start_pfn + num_pages) 107 + return -EFAULT; 108 + 109 + *pfn_offset_in_region = pfn - start_pfn; 110 + 111 + return 0; 112 + } 113 + 114 + static inline 115 + struct nvgrace_gpu_pci_core_device *vma_to_nvdev(struct vm_area_struct *vma); 116 + 117 + static int nvgrace_gpu_pfn_to_vma_pgoff(struct vm_area_struct *vma, 118 + unsigned long pfn, 119 + pgoff_t *pgoff) 120 + { 121 + struct nvgrace_gpu_pci_core_device *nvdev; 122 + unsigned int index = 123 + vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); 124 + pgoff_t vma_offset_in_region = vma->vm_pgoff & 125 + ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); 126 + pgoff_t pfn_offset_in_region; 127 + int ret; 128 + 129 + nvdev = vma_to_nvdev(vma); 130 + if (!nvdev) 131 + return -ENOENT; 132 + 133 + ret = pfn_memregion_offset(nvdev, index, pfn, &pfn_offset_in_region); 134 + if (ret) 135 + return ret; 136 + 137 + /* Ensure PFN is not before VMA's start within the region */ 138 + if (pfn_offset_in_region < vma_offset_in_region) 139 + return -EFAULT; 140 + 141 + /* Calculate offset from VMA start */ 142 + *pgoff = vma->vm_pgoff + 143 + (pfn_offset_in_region - vma_offset_in_region); 144 + 145 + return 0; 146 + } 147 + 148 + static int 149 + nvgrace_gpu_vfio_pci_register_pfn_range(struct vfio_device *core_vdev, 150 + struct mem_region *region) 151 + { 152 + unsigned long pfn, nr_pages; 153 + 154 + pfn = PHYS_PFN(region->memphys); 155 + nr_pages = region->memlength >> PAGE_SHIFT; 156 + 157 + region->pfn_address_space.node.start = pfn; 158 + region->pfn_address_space.node.last = pfn + nr_pages - 1; 159 + region->pfn_address_space.mapping = core_vdev->inode->i_mapping; 160 + region->pfn_address_space.pfn_to_vma_pgoff = nvgrace_gpu_pfn_to_vma_pgoff; 161 + 162 + return register_pfn_address_space(&region->pfn_address_space); 163 + } 164 + 93 165 static int nvgrace_gpu_open_device(struct vfio_device *core_vdev) 94 166 { 95 167 struct vfio_pci_core_device *vdev = ··· 190 114 * memory mapping. 191 115 */ 192 116 ret = vfio_pci_core_setup_barmap(vdev, 0); 193 - if (ret) { 194 - vfio_pci_core_disable(vdev); 195 - return ret; 117 + if (ret) 118 + goto error_exit; 119 + 120 + if (nvdev->resmem.memlength) { 121 + ret = nvgrace_gpu_vfio_pci_register_pfn_range(core_vdev, &nvdev->resmem); 122 + if (ret && ret != -EOPNOTSUPP) 123 + goto error_exit; 196 124 } 197 125 198 - vfio_pci_core_finish_enable(vdev); 126 + ret = nvgrace_gpu_vfio_pci_register_pfn_range(core_vdev, &nvdev->usemem); 127 + if (ret && ret != -EOPNOTSUPP) 128 + goto register_mem_failed; 199 129 130 + vfio_pci_core_finish_enable(vdev); 200 131 return 0; 132 + 133 + register_mem_failed: 134 + if (nvdev->resmem.memlength) 135 + unregister_pfn_address_space(&nvdev->resmem.pfn_address_space); 136 + error_exit: 137 + vfio_pci_core_disable(vdev); 138 + return ret; 201 139 } 202 140 203 141 static void nvgrace_gpu_close_device(struct vfio_device *core_vdev) ··· 219 129 struct nvgrace_gpu_pci_core_device *nvdev = 220 130 container_of(core_vdev, struct nvgrace_gpu_pci_core_device, 221 131 core_device.vdev); 132 + 133 + if (nvdev->resmem.memlength) 134 + unregister_pfn_address_space(&nvdev->resmem.pfn_address_space); 135 + 136 + unregister_pfn_address_space(&nvdev->usemem.pfn_address_space); 222 137 223 138 /* Unmap the mapping to the device memory cached region */ 224 139 if (nvdev->usemem.memaddr) { ··· 341 246 .huge_fault = nvgrace_gpu_vfio_pci_huge_fault, 342 247 #endif 343 248 }; 249 + 250 + static inline 251 + struct nvgrace_gpu_pci_core_device *vma_to_nvdev(struct vm_area_struct *vma) 252 + { 253 + /* Check if this VMA belongs to us */ 254 + if (vma->vm_ops != &nvgrace_gpu_vfio_pci_mmap_ops) 255 + return NULL; 256 + 257 + return vma->vm_private_data; 258 + } 344 259 345 260 static int nvgrace_gpu_mmap(struct vfio_device *core_vdev, 346 261 struct vm_area_struct *vma) ··· 889 784 static int nvgrace_get_dmabuf_phys(struct vfio_pci_core_device *core_vdev, 890 785 struct p2pdma_provider **provider, 891 786 unsigned int region_index, 892 - struct dma_buf_phys_vec *phys_vec, 787 + struct phys_vec *phys_vec, 893 788 struct vfio_region_dma_range *dma_ranges, 894 789 size_t nr_ranges) 895 790 {
+13 -4
drivers/vfio/pci/vfio_pci_core.c
··· 588 588 589 589 void vfio_pci_core_disable(struct vfio_pci_core_device *vdev) 590 590 { 591 + struct pci_dev *bridge; 591 592 struct pci_dev *pdev = vdev->pdev; 592 593 struct vfio_pci_dummy_resource *dummy_res, *tmp; 593 594 struct vfio_pci_ioeventfd *ioeventfd, *ioeventfd_tmp; ··· 695 694 * We can not use the "try" reset interface here, which will 696 695 * overwrite the previously restored configuration information. 697 696 */ 698 - if (vdev->reset_works && pci_dev_trylock(pdev)) { 699 - if (!__pci_reset_function_locked(pdev)) 700 - vdev->needs_reset = false; 701 - pci_dev_unlock(pdev); 697 + if (vdev->reset_works) { 698 + bridge = pci_upstream_bridge(pdev); 699 + if (bridge && !pci_dev_trylock(bridge)) 700 + goto out_restore_state; 701 + if (pci_dev_trylock(pdev)) { 702 + if (!__pci_reset_function_locked(pdev)) 703 + vdev->needs_reset = false; 704 + pci_dev_unlock(pdev); 705 + } 706 + if (bridge) 707 + pci_dev_unlock(bridge); 702 708 } 703 709 710 + out_restore_state: 704 711 pci_restore_state(pdev); 705 712 out: 706 713 pci_disable_device(pdev);
+4 -4
drivers/vfio/pci/vfio_pci_dmabuf.c
··· 14 14 struct vfio_pci_core_device *vdev; 15 15 struct list_head dmabufs_elm; 16 16 size_t size; 17 - struct dma_buf_phys_vec *phys_vec; 17 + struct phys_vec *phys_vec; 18 18 struct p2pdma_provider *provider; 19 19 u32 nr_ranges; 20 20 u8 revoked : 1; ··· 106 106 * will fail if it is currently revoked 107 107 */ 108 108 int vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, 109 - struct dma_buf_phys_vec *phys) 109 + struct phys_vec *phys) 110 110 { 111 111 struct vfio_pci_dma_buf *priv; 112 112 ··· 128 128 } 129 129 EXPORT_SYMBOL_FOR_MODULES(vfio_pci_dma_buf_iommufd_map, "iommufd"); 130 130 131 - int vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec, 131 + int vfio_pci_core_fill_phys_vec(struct phys_vec *phys_vec, 132 132 struct vfio_region_dma_range *dma_ranges, 133 133 size_t nr_ranges, phys_addr_t start, 134 134 phys_addr_t len) ··· 160 160 int vfio_pci_core_get_dmabuf_phys(struct vfio_pci_core_device *vdev, 161 161 struct p2pdma_provider **provider, 162 162 unsigned int region_index, 163 - struct dma_buf_phys_vec *phys_vec, 163 + struct phys_vec *phys_vec, 164 164 struct vfio_region_dma_range *dma_ranges, 165 165 size_t nr_ranges) 166 166 {
+1 -1
include/linux/dma-buf-mapping.h
··· 9 9 10 10 struct sg_table *dma_buf_phys_vec_to_sgt(struct dma_buf_attachment *attach, 11 11 struct p2pdma_provider *provider, 12 - struct dma_buf_phys_vec *phys_vec, 12 + struct phys_vec *phys_vec, 13 13 size_t nr_ranges, size_t size, 14 14 enum dma_data_direction dir); 15 15 void dma_buf_free_sgt(struct dma_buf_attachment *attach, struct sg_table *sgt,
-10
include/linux/dma-buf.h
··· 520 520 }; 521 521 522 522 /** 523 - * struct dma_buf_phys_vec - describe continuous chunk of memory 524 - * @paddr: physical address of that chunk 525 - * @len: Length of this chunk 526 - */ 527 - struct dma_buf_phys_vec { 528 - phys_addr_t paddr; 529 - size_t len; 530 - }; 531 - 532 - /** 533 523 * DEFINE_DMA_BUF_EXPORT_INFO - helper macro for exporters 534 524 * @name: export-info name 535 525 *
+11 -2
include/linux/memory-failure.h
··· 4 4 5 5 #include <linux/interval_tree.h> 6 6 7 - struct pfn_address_space; 8 - 9 7 struct pfn_address_space { 10 8 struct interval_tree_node node; 11 9 struct address_space *mapping; ··· 11 13 unsigned long pfn, pgoff_t *pgoff); 12 14 }; 13 15 16 + #ifdef CONFIG_MEMORY_FAILURE 14 17 int register_pfn_address_space(struct pfn_address_space *pfn_space); 15 18 void unregister_pfn_address_space(struct pfn_address_space *pfn_space); 19 + #else 20 + static inline int register_pfn_address_space(struct pfn_address_space *pfn_space) 21 + { 22 + return -EOPNOTSUPP; 23 + } 24 + 25 + static inline void unregister_pfn_address_space(struct pfn_address_space *pfn_space) 26 + { 27 + } 28 + #endif /* CONFIG_MEMORY_FAILURE */ 16 29 17 30 #endif /* _LINUX_MEMORY_FAILURE_H */
+6 -7
include/linux/vfio_pci_core.h
··· 28 28 struct vfio_pci_core_device; 29 29 struct vfio_pci_region; 30 30 struct p2pdma_provider; 31 - struct dma_buf_phys_vec; 32 31 struct dma_buf_attachment; 33 32 34 33 struct vfio_pci_eventfd { ··· 61 62 int (*get_dmabuf_phys)(struct vfio_pci_core_device *vdev, 62 63 struct p2pdma_provider **provider, 63 64 unsigned int region_index, 64 - struct dma_buf_phys_vec *phys_vec, 65 + struct phys_vec *phys_vec, 65 66 struct vfio_region_dma_range *dma_ranges, 66 67 size_t nr_ranges); 67 68 }; 68 69 69 70 #if IS_ENABLED(CONFIG_VFIO_PCI_DMABUF) 70 - int vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec, 71 + int vfio_pci_core_fill_phys_vec(struct phys_vec *phys_vec, 71 72 struct vfio_region_dma_range *dma_ranges, 72 73 size_t nr_ranges, phys_addr_t start, 73 74 phys_addr_t len); 74 75 int vfio_pci_core_get_dmabuf_phys(struct vfio_pci_core_device *vdev, 75 76 struct p2pdma_provider **provider, 76 77 unsigned int region_index, 77 - struct dma_buf_phys_vec *phys_vec, 78 + struct phys_vec *phys_vec, 78 79 struct vfio_region_dma_range *dma_ranges, 79 80 size_t nr_ranges); 80 81 #else 81 82 static inline int 82 - vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec, 83 + vfio_pci_core_fill_phys_vec(struct phys_vec *phys_vec, 83 84 struct vfio_region_dma_range *dma_ranges, 84 85 size_t nr_ranges, phys_addr_t start, 85 86 phys_addr_t len) ··· 88 89 } 89 90 static inline int vfio_pci_core_get_dmabuf_phys( 90 91 struct vfio_pci_core_device *vdev, struct p2pdma_provider **provider, 91 - unsigned int region_index, struct dma_buf_phys_vec *phys_vec, 92 + unsigned int region_index, struct phys_vec *phys_vec, 92 93 struct vfio_region_dma_range *dma_ranges, size_t nr_ranges) 93 94 { 94 95 return -EOPNOTSUPP; ··· 235 236 } 236 237 237 238 int vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, 238 - struct dma_buf_phys_vec *phys); 239 + struct phys_vec *phys); 239 240 240 241 #endif /* VFIO_PCI_CORE_H */
+10
tools/testing/selftests/vfio/Makefile
··· 1 + ARCH ?= $(shell uname -m) 2 + 3 + ifeq (,$(filter $(ARCH),arm64 x86_64)) 4 + # Do nothing on unsupported architectures 5 + include ../lib.mk 6 + else 7 + 1 8 CFLAGS = $(KHDR_INCLUDES) 2 9 TEST_GEN_PROGS += vfio_dma_mapping_test 10 + TEST_GEN_PROGS += vfio_dma_mapping_mmio_test 3 11 TEST_GEN_PROGS += vfio_iommufd_setup_test 4 12 TEST_GEN_PROGS += vfio_pci_device_test 5 13 TEST_GEN_PROGS += vfio_pci_device_init_perf_test ··· 35 27 -include $(TEST_DEP_FILES) 36 28 37 29 EXTRA_CLEAN += $(TEST_GEN_PROGS_O) $(TEST_DEP_FILES) 30 + 31 + endif
+9
tools/testing/selftests/vfio/lib/include/libvfio.h
··· 23 23 const char *vfio_selftests_get_bdf(int *argc, char *argv[]); 24 24 char **vfio_selftests_get_bdfs(int *argc, char *argv[], int *nr_bdfs); 25 25 26 + /* 27 + * Reserve virtual address space of size at an address satisfying 28 + * (vaddr % align) == offset. 29 + * 30 + * Returns the reserved vaddr. The caller is responsible for unmapping 31 + * the returned region. 32 + */ 33 + void *mmap_reserve(size_t size, size_t align, size_t offset); 34 + 26 35 #endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_H */
+6
tools/testing/selftests/vfio/lib/include/libvfio/iommu.h
··· 61 61 62 62 struct iommu_iova_range *iommu_iova_ranges(struct iommu *iommu, u32 *nranges); 63 63 64 + #define MODE_VFIO_TYPE1_IOMMU "vfio_type1_iommu" 65 + #define MODE_VFIO_TYPE1V2_IOMMU "vfio_type1v2_iommu" 66 + #define MODE_IOMMUFD_COMPAT_TYPE1 "iommufd_compat_type1" 67 + #define MODE_IOMMUFD_COMPAT_TYPE1V2 "iommufd_compat_type1v2" 68 + #define MODE_IOMMUFD "iommufd" 69 + 64 70 /* 65 71 * Generator for VFIO selftests fixture variants that replicate across all 66 72 * possible IOMMU modes. Tests must define FIXTURE_VARIANT_ADD_IOMMU_MODE()
+6 -6
tools/testing/selftests/vfio/lib/iommu.c
··· 20 20 #include "../../../kselftest.h" 21 21 #include <libvfio.h> 22 22 23 - const char *default_iommu_mode = "iommufd"; 23 + const char *default_iommu_mode = MODE_IOMMUFD; 24 24 25 25 /* Reminder: Keep in sync with FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(). */ 26 26 static const struct iommu_mode iommu_modes[] = { 27 27 { 28 - .name = "vfio_type1_iommu", 28 + .name = MODE_VFIO_TYPE1_IOMMU, 29 29 .container_path = "/dev/vfio/vfio", 30 30 .iommu_type = VFIO_TYPE1_IOMMU, 31 31 }, 32 32 { 33 - .name = "vfio_type1v2_iommu", 33 + .name = MODE_VFIO_TYPE1V2_IOMMU, 34 34 .container_path = "/dev/vfio/vfio", 35 35 .iommu_type = VFIO_TYPE1v2_IOMMU, 36 36 }, 37 37 { 38 - .name = "iommufd_compat_type1", 38 + .name = MODE_IOMMUFD_COMPAT_TYPE1, 39 39 .container_path = "/dev/iommu", 40 40 .iommu_type = VFIO_TYPE1_IOMMU, 41 41 }, 42 42 { 43 - .name = "iommufd_compat_type1v2", 43 + .name = MODE_IOMMUFD_COMPAT_TYPE1V2, 44 44 .container_path = "/dev/iommu", 45 45 .iommu_type = VFIO_TYPE1v2_IOMMU, 46 46 }, 47 47 { 48 - .name = "iommufd", 48 + .name = MODE_IOMMUFD, 49 49 }, 50 50 }; 51 51
+25
tools/testing/selftests/vfio/lib/libvfio.c
··· 2 2 3 3 #include <stdio.h> 4 4 #include <stdlib.h> 5 + #include <sys/mman.h> 6 + 7 + #include <linux/align.h> 5 8 6 9 #include "../../../kselftest.h" 7 10 #include <libvfio.h> ··· 78 75 int nr_bdfs; 79 76 80 77 return vfio_selftests_get_bdfs(argc, argv, &nr_bdfs)[0]; 78 + } 79 + 80 + void *mmap_reserve(size_t size, size_t align, size_t offset) 81 + { 82 + void *map_base, *map_align; 83 + size_t delta; 84 + 85 + VFIO_ASSERT_GT(align, offset); 86 + delta = align - offset; 87 + 88 + map_base = mmap(NULL, size + align, PROT_NONE, 89 + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 90 + VFIO_ASSERT_NE(map_base, MAP_FAILED); 91 + 92 + map_align = (void *)(ALIGN((uintptr_t)map_base + delta, align) - delta); 93 + 94 + if (map_align > map_base) 95 + VFIO_ASSERT_EQ(munmap(map_base, map_align - map_base), 0); 96 + 97 + VFIO_ASSERT_EQ(munmap(map_align + size, map_base + align - map_align), 0); 98 + 99 + return map_align; 81 100 }
+23 -1
tools/testing/selftests/vfio/lib/vfio_pci_device.c
··· 11 11 #include <sys/ioctl.h> 12 12 #include <sys/mman.h> 13 13 14 + #include <linux/align.h> 14 15 #include <linux/iommufd.h> 16 + #include <linux/kernel.h> 15 17 #include <linux/limits.h> 18 + #include <linux/log2.h> 16 19 #include <linux/mman.h> 17 20 #include <linux/overflow.h> 21 + #include <linux/sizes.h> 18 22 #include <linux/types.h> 19 23 #include <linux/vfio.h> 20 24 ··· 127 123 static void vfio_pci_bar_map(struct vfio_pci_device *device, int index) 128 124 { 129 125 struct vfio_pci_bar *bar = &device->bars[index]; 126 + size_t align, size; 130 127 int prot = 0; 128 + void *vaddr; 131 129 132 130 VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS); 133 131 VFIO_ASSERT_NULL(bar->vaddr); 134 132 VFIO_ASSERT_TRUE(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP); 133 + VFIO_ASSERT_TRUE(is_power_of_2(bar->info.size)); 135 134 136 135 if (bar->info.flags & VFIO_REGION_INFO_FLAG_READ) 137 136 prot |= PROT_READ; 138 137 if (bar->info.flags & VFIO_REGION_INFO_FLAG_WRITE) 139 138 prot |= PROT_WRITE; 140 139 141 - bar->vaddr = mmap(NULL, bar->info.size, prot, MAP_FILE | MAP_SHARED, 140 + size = bar->info.size; 141 + 142 + /* 143 + * Align BAR mmaps to improve page fault granularity during potential 144 + * subsequent IOMMU mapping of these BAR vaddr. 1G for x86 is the 145 + * largest hugepage size across any architecture, so no benefit from 146 + * larger alignment. BARs smaller than 1G will be aligned by their 147 + * power-of-two size, guaranteeing sufficient alignment for smaller 148 + * hugepages, if present. 149 + */ 150 + align = min_t(size_t, size, SZ_1G); 151 + 152 + vaddr = mmap_reserve(size, align, 0); 153 + bar->vaddr = mmap(vaddr, size, prot, MAP_SHARED | MAP_FIXED, 142 154 device->fd, bar->info.offset); 143 155 VFIO_ASSERT_NE(bar->vaddr, MAP_FAILED); 156 + 157 + madvise(bar->vaddr, size, MADV_HUGEPAGE); 144 158 } 145 159 146 160 static void vfio_pci_bar_unmap(struct vfio_pci_device *device, int index)
+143
tools/testing/selftests/vfio/vfio_dma_mapping_mmio_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + #include <stdio.h> 3 + #include <sys/mman.h> 4 + #include <unistd.h> 5 + 6 + #include <uapi/linux/types.h> 7 + #include <linux/pci_regs.h> 8 + #include <linux/sizes.h> 9 + #include <linux/vfio.h> 10 + 11 + #include <libvfio.h> 12 + 13 + #include "../kselftest_harness.h" 14 + 15 + static const char *device_bdf; 16 + 17 + static struct vfio_pci_bar *largest_mapped_bar(struct vfio_pci_device *device) 18 + { 19 + u32 flags = VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE; 20 + struct vfio_pci_bar *largest = NULL; 21 + u64 bar_size = 0; 22 + 23 + for (int i = 0; i < PCI_STD_NUM_BARS; i++) { 24 + struct vfio_pci_bar *bar = &device->bars[i]; 25 + 26 + if (!bar->vaddr) 27 + continue; 28 + 29 + /* 30 + * iommu_map() maps with READ|WRITE, so require the same 31 + * abilities for the underlying VFIO region. 32 + */ 33 + if ((bar->info.flags & flags) != flags) 34 + continue; 35 + 36 + if (bar->info.size > bar_size) { 37 + bar_size = bar->info.size; 38 + largest = bar; 39 + } 40 + } 41 + 42 + return largest; 43 + } 44 + 45 + FIXTURE(vfio_dma_mapping_mmio_test) { 46 + struct iommu *iommu; 47 + struct vfio_pci_device *device; 48 + struct iova_allocator *iova_allocator; 49 + struct vfio_pci_bar *bar; 50 + }; 51 + 52 + FIXTURE_VARIANT(vfio_dma_mapping_mmio_test) { 53 + const char *iommu_mode; 54 + }; 55 + 56 + #define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode) \ 57 + FIXTURE_VARIANT_ADD(vfio_dma_mapping_mmio_test, _iommu_mode) { \ 58 + .iommu_mode = #_iommu_mode, \ 59 + } 60 + 61 + FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(); 62 + 63 + #undef FIXTURE_VARIANT_ADD_IOMMU_MODE 64 + 65 + FIXTURE_SETUP(vfio_dma_mapping_mmio_test) 66 + { 67 + self->iommu = iommu_init(variant->iommu_mode); 68 + self->device = vfio_pci_device_init(device_bdf, self->iommu); 69 + self->iova_allocator = iova_allocator_init(self->iommu); 70 + self->bar = largest_mapped_bar(self->device); 71 + 72 + if (!self->bar) 73 + SKIP(return, "No mappable BAR found on device %s", device_bdf); 74 + } 75 + 76 + FIXTURE_TEARDOWN(vfio_dma_mapping_mmio_test) 77 + { 78 + iova_allocator_cleanup(self->iova_allocator); 79 + vfio_pci_device_cleanup(self->device); 80 + iommu_cleanup(self->iommu); 81 + } 82 + 83 + static void do_mmio_map_test(struct iommu *iommu, 84 + struct iova_allocator *iova_allocator, 85 + void *vaddr, size_t size) 86 + { 87 + struct dma_region region = { 88 + .vaddr = vaddr, 89 + .size = size, 90 + .iova = iova_allocator_alloc(iova_allocator, size), 91 + }; 92 + 93 + /* 94 + * NOTE: Check for iommufd compat success once it lands. Native iommufd 95 + * will never support this. 96 + */ 97 + if (!strcmp(iommu->mode->name, MODE_VFIO_TYPE1V2_IOMMU) || 98 + !strcmp(iommu->mode->name, MODE_VFIO_TYPE1_IOMMU)) { 99 + iommu_map(iommu, &region); 100 + iommu_unmap(iommu, &region); 101 + } else { 102 + VFIO_ASSERT_NE(__iommu_map(iommu, &region), 0); 103 + VFIO_ASSERT_NE(__iommu_unmap(iommu, &region, NULL), 0); 104 + } 105 + } 106 + 107 + TEST_F(vfio_dma_mapping_mmio_test, map_full_bar) 108 + { 109 + do_mmio_map_test(self->iommu, self->iova_allocator, 110 + self->bar->vaddr, self->bar->info.size); 111 + } 112 + 113 + TEST_F(vfio_dma_mapping_mmio_test, map_partial_bar) 114 + { 115 + if (self->bar->info.size < 2 * getpagesize()) 116 + SKIP(return, "BAR too small (size=0x%llx)", self->bar->info.size); 117 + 118 + do_mmio_map_test(self->iommu, self->iova_allocator, 119 + self->bar->vaddr, getpagesize()); 120 + } 121 + 122 + /* Test IOMMU mapping of BAR mmap with intentionally poor vaddr alignment. */ 123 + TEST_F(vfio_dma_mapping_mmio_test, map_bar_misaligned) 124 + { 125 + /* Limit size to bound test time for large BARs */ 126 + size_t size = min_t(size_t, self->bar->info.size, SZ_1G); 127 + void *vaddr; 128 + 129 + vaddr = mmap_reserve(size, SZ_1G, getpagesize()); 130 + vaddr = mmap(vaddr, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, 131 + self->device->fd, self->bar->info.offset); 132 + VFIO_ASSERT_NE(vaddr, MAP_FAILED); 133 + 134 + do_mmio_map_test(self->iommu, self->iova_allocator, vaddr, size); 135 + 136 + VFIO_ASSERT_EQ(munmap(vaddr, size), 0); 137 + } 138 + 139 + int main(int argc, char *argv[]) 140 + { 141 + device_bdf = vfio_selftests_get_bdf(&argc, argv); 142 + return test_harness_run(argc, argv); 143 + }
+2 -6
tools/testing/selftests/vfio/vfio_dma_mapping_test.c
··· 161 161 if (rc == -EOPNOTSUPP) 162 162 goto unmap; 163 163 164 - /* 165 - * IOMMUFD compatibility-mode does not support huge mappings when 166 - * using VFIO_TYPE1_IOMMU. 167 - */ 168 - if (!strcmp(variant->iommu_mode, "iommufd_compat_type1")) 169 - mapping_size = SZ_4K; 164 + if (self->iommu->mode->iommu_type == VFIO_TYPE1_IOMMU) 165 + goto unmap; 170 166 171 167 ASSERT_EQ(0, rc); 172 168 printf("Found IOMMU mappings for IOVA 0x%lx:\n", region.iova);