Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

vfio: selftests: Align BAR mmaps for efficient IOMMU mapping

Update vfio_pci_bar_map() to align BAR mmaps for efficient huge page
mappings. The manual mmap alignment can be removed once mmap(!MAP_FIXED)
on vfio device fds improves to automatically return well-aligned
addresses.

Also add MADV_HUGEPAGE, which encourages the kernel to use huge pages
(e.g. when /sys/kernel/mm/transparent_hugepage/enabled is set to "madvise").

Drop MAP_FILE from mmap(). It is an ignored compatibility flag.

Signed-off-by: Alex Mastro <amastro@fb.com>
Reviewed-by: David Matlack <dmatlack@google.com>
Tested-by: David Matlack <dmatlack@google.com>
Link: https://lore.kernel.org/r/20260114-map-mmio-test-v3-2-44e036d95e64@fb.com
Signed-off-by: Alex Williamson <alex@shazbot.org>

authored by

Alex Mastro and committed by
Alex Williamson
557dbdf6 03b7c2d7

+57 -1
+9
tools/testing/selftests/vfio/lib/include/libvfio.h
··· 23 23 const char *vfio_selftests_get_bdf(int *argc, char *argv[]); 24 24 char **vfio_selftests_get_bdfs(int *argc, char *argv[], int *nr_bdfs); 25 25 26 + /* 27 + * Reserve virtual address space of size at an address satisfying 28 + * (vaddr % align) == offset. 29 + * 30 + * Returns the reserved vaddr. The caller is responsible for unmapping 31 + * the returned region. 32 + */ 33 + void *mmap_reserve(size_t size, size_t align, size_t offset); 34 + 26 35 #endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_H */
+25
tools/testing/selftests/vfio/lib/libvfio.c
··· 2 2 3 3 #include <stdio.h> 4 4 #include <stdlib.h> 5 + #include <sys/mman.h> 6 + 7 + #include <linux/align.h> 5 8 6 9 #include "../../../kselftest.h" 7 10 #include <libvfio.h> ··· 78 75 int nr_bdfs; 79 76 80 77 return vfio_selftests_get_bdfs(argc, argv, &nr_bdfs)[0]; 78 + } 79 + 80 + void *mmap_reserve(size_t size, size_t align, size_t offset) 81 + { 82 + void *map_base, *map_align; 83 + size_t delta; 84 + 85 + VFIO_ASSERT_GT(align, offset); 86 + delta = align - offset; 87 + 88 + map_base = mmap(NULL, size + align, PROT_NONE, 89 + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 90 + VFIO_ASSERT_NE(map_base, MAP_FAILED); 91 + 92 + map_align = (void *)(ALIGN((uintptr_t)map_base + delta, align) - delta); 93 + 94 + if (map_align > map_base) 95 + VFIO_ASSERT_EQ(munmap(map_base, map_align - map_base), 0); 96 + 97 + VFIO_ASSERT_EQ(munmap(map_align + size, map_base + align - map_align), 0); 98 + 99 + return map_align; 81 100 }
+23 -1
tools/testing/selftests/vfio/lib/vfio_pci_device.c
··· 11 11 #include <sys/ioctl.h> 12 12 #include <sys/mman.h> 13 13 14 + #include <linux/align.h> 14 15 #include <linux/iommufd.h> 16 + #include <linux/kernel.h> 15 17 #include <linux/limits.h> 18 + #include <linux/log2.h> 16 19 #include <linux/mman.h> 17 20 #include <linux/overflow.h> 21 + #include <linux/sizes.h> 18 22 #include <linux/types.h> 19 23 #include <linux/vfio.h> 20 24 ··· 127 123 static void vfio_pci_bar_map(struct vfio_pci_device *device, int index) 128 124 { 129 125 struct vfio_pci_bar *bar = &device->bars[index]; 126 + size_t align, size; 130 127 int prot = 0; 128 + void *vaddr; 131 129 132 130 VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS); 133 131 VFIO_ASSERT_NULL(bar->vaddr); 134 132 VFIO_ASSERT_TRUE(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP); 133 + VFIO_ASSERT_TRUE(is_power_of_2(bar->info.size)); 135 134 136 135 if (bar->info.flags & VFIO_REGION_INFO_FLAG_READ) 137 136 prot |= PROT_READ; 138 137 if (bar->info.flags & VFIO_REGION_INFO_FLAG_WRITE) 139 138 prot |= PROT_WRITE; 140 139 141 - bar->vaddr = mmap(NULL, bar->info.size, prot, MAP_FILE | MAP_SHARED, 140 + size = bar->info.size; 141 + 142 + /* 143 + * Align BAR mmaps to improve page fault granularity during potential 144 + * subsequent IOMMU mapping of these BAR vaddr. 1G for x86 is the 145 + * largest hugepage size across any architecture, so no benefit from 146 + * larger alignment. BARs smaller than 1G will be aligned by their 147 + * power-of-two size, guaranteeing sufficient alignment for smaller 148 + * hugepages, if present. 149 + */ 150 + align = min_t(size_t, size, SZ_1G); 151 + 152 + vaddr = mmap_reserve(size, align, 0); 153 + bar->vaddr = mmap(vaddr, size, prot, MAP_SHARED | MAP_FIXED, 142 154 device->fd, bar->info.offset); 143 155 VFIO_ASSERT_NE(bar->vaddr, MAP_FAILED); 156 + 157 + madvise(bar->vaddr, size, MADV_HUGEPAGE); 144 158 } 145 159 146 160 static void vfio_pci_bar_unmap(struct vfio_pci_device *device, int index)