Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

PCI/P2PDMA: Separate the mmap() support from the core logic

Currently the P2PDMA code requires a pgmap and a struct page to
function. The was serving three important purposes:

- DMA API compatibility, where scatterlist required a struct page as
input

- Life cycle management, the percpu_ref is used to prevent UAF during
device hot unplug

- A way to get the P2P provider data through the pci_p2pdma_pagemap

The DMA API now has a new flow, and has gained phys_addr_t support, so
it no longer needs struct pages to perform P2P mapping.

Lifecycle management can be delegated to the user, DMABUF for instance
has a suitable invalidation protocol that does not require struct page.

Finding the P2P provider data can also be managed by the caller
without need to look it up from the phys_addr.

Split the P2PDMA code into two layers. The optional upper layer,
effectively, provides a way to mmap() P2P memory into a VMA by
providing struct page, pgmap, a genalloc and sysfs.

The lower layer provides the actual P2P infrastructure and is wrapped
up in a new struct p2pdma_provider. Rework the mmap layer to use new
p2pdma_provider based APIs.

Drivers that do not want to put P2P memory into VMA's can allocate a
struct p2pdma_provider after probe() starts and free it before
remove() completes. When DMA mapping the driver must convey the struct
p2pdma_provider to the DMA mapping code along with a phys_addr of the
MMIO BAR slice to map. The driver must ensure that no DMA mapping
outlives the lifetime of the struct p2pdma_provider.

The intended target of this new API layer is DMABUF. There is usually
only a single p2pdma_provider for a DMABUF exporter. Most drivers can
establish the p2pdma_provider during probe, access the single instance
during DMABUF attach and use that to drive the DMA mapping.

DMABUF provides an invalidation mechanism that can guarantee all DMA
is halted and the DMA mappings are undone prior to destroying the
struct p2pdma_provider. This ensures there is no UAF through DMABUFs
that are lingering past driver removal.

The new p2pdma_provider layer cannot be used to create P2P memory that
can be mapped into VMA's, be used with pin_user_pages(), O_DIRECT, and
so on. These use cases must still use the mmap() layer. The
p2pdma_provider layer is principally for DMABUF-like use cases where
DMABUF natively manages the life cycle and access instead of
vmas/pin_user_pages()/struct page.

In addition, remove the bus_off field from pci_p2pdma_map_state since
it duplicates information already available in the pgmap structure.
The bus_offset is only used in one location (pci_p2pdma_bus_addr_map)
and is always identical to pgmap->bus_offset.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Alex Mastro <amastro@fb.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Acked-by: Ankit Agrawal <ankita@nvidia.com>
Link: https://lore.kernel.org/r/20251120-dmabuf-vfio-v9-1-d7f71607f371@nvidia.com
Signed-off-by: Alex Williamson <alex@shazbot.org>

authored by

Leon Romanovsky and committed by
Alex Williamson
f58ef9d1 6146a0f1

+37 -25
+23 -20
drivers/pci/p2pdma.c
··· 28 28 }; 29 29 30 30 struct pci_p2pdma_pagemap { 31 - struct pci_dev *provider; 32 - u64 bus_offset; 33 31 struct dev_pagemap pgmap; 32 + struct p2pdma_provider mem; 34 33 }; 35 34 36 35 static struct pci_p2pdma_pagemap *to_p2p_pgmap(struct dev_pagemap *pgmap) ··· 203 204 { 204 205 struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page_pgmap(page)); 205 206 /* safe to dereference while a reference is held to the percpu ref */ 206 - struct pci_p2pdma *p2pdma = 207 - rcu_dereference_protected(pgmap->provider->p2pdma, 1); 207 + struct pci_p2pdma *p2pdma = rcu_dereference_protected( 208 + to_pci_dev(pgmap->mem.owner)->p2pdma, 1); 208 209 struct percpu_ref *ref; 209 210 210 211 gen_pool_free_owner(p2pdma->pool, (uintptr_t)page_to_virt(page), ··· 269 270 270 271 static void pci_p2pdma_unmap_mappings(void *data) 271 272 { 272 - struct pci_dev *pdev = data; 273 + struct pci_p2pdma_pagemap *p2p_pgmap = data; 273 274 274 275 /* 275 276 * Removing the alloc attribute from sysfs will call 276 277 * unmap_mapping_range() on the inode, teardown any existing userspace 277 278 * mappings and prevent new ones from being created. 278 279 */ 279 - sysfs_remove_file_from_group(&pdev->dev.kobj, &p2pmem_alloc_attr.attr, 280 + sysfs_remove_file_from_group(&p2p_pgmap->mem.owner->kobj, 281 + &p2pmem_alloc_attr.attr, 280 282 p2pmem_group.name); 281 283 } 282 284 ··· 328 328 pgmap->nr_range = 1; 329 329 pgmap->type = MEMORY_DEVICE_PCI_P2PDMA; 330 330 pgmap->ops = &p2pdma_pgmap_ops; 331 - 332 - p2p_pgmap->provider = pdev; 333 - p2p_pgmap->bus_offset = pci_bus_address(pdev, bar) - 334 - pci_resource_start(pdev, bar); 331 + p2p_pgmap->mem.owner = &pdev->dev; 332 + p2p_pgmap->mem.bus_offset = 333 + pci_bus_address(pdev, bar) - pci_resource_start(pdev, bar); 335 334 336 335 addr = devm_memremap_pages(&pdev->dev, pgmap); 337 336 if (IS_ERR(addr)) { ··· 339 340 } 340 341 341 342 error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_unmap_mappings, 342 - pdev); 343 + p2p_pgmap); 343 344 if (error) 344 345 goto pages_free; 345 346 ··· 971 972 } 972 973 EXPORT_SYMBOL_GPL(pci_p2pmem_publish); 973 974 974 - static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct dev_pagemap *pgmap, 975 - struct device *dev) 975 + static enum pci_p2pdma_map_type 976 + pci_p2pdma_map_type(struct p2pdma_provider *provider, struct device *dev) 976 977 { 977 978 enum pci_p2pdma_map_type type = PCI_P2PDMA_MAP_NOT_SUPPORTED; 978 - struct pci_dev *provider = to_p2p_pgmap(pgmap)->provider; 979 + struct pci_dev *pdev = to_pci_dev(provider->owner); 979 980 struct pci_dev *client; 980 981 struct pci_p2pdma *p2pdma; 981 982 int dist; 982 983 983 - if (!provider->p2pdma) 984 + if (!pdev->p2pdma) 984 985 return PCI_P2PDMA_MAP_NOT_SUPPORTED; 985 986 986 987 if (!dev_is_pci(dev)) ··· 989 990 client = to_pci_dev(dev); 990 991 991 992 rcu_read_lock(); 992 - p2pdma = rcu_dereference(provider->p2pdma); 993 + p2pdma = rcu_dereference(pdev->p2pdma); 993 994 994 995 if (p2pdma) 995 996 type = xa_to_value(xa_load(&p2pdma->map_types, ··· 997 998 rcu_read_unlock(); 998 999 999 1000 if (type == PCI_P2PDMA_MAP_UNKNOWN) 1000 - return calc_map_type_and_dist(provider, client, &dist, true); 1001 + return calc_map_type_and_dist(pdev, client, &dist, true); 1001 1002 1002 1003 return type; 1003 1004 } ··· 1005 1006 void __pci_p2pdma_update_state(struct pci_p2pdma_map_state *state, 1006 1007 struct device *dev, struct page *page) 1007 1008 { 1008 - state->pgmap = page_pgmap(page); 1009 - state->map = pci_p2pdma_map_type(state->pgmap, dev); 1010 - state->bus_off = to_p2p_pgmap(state->pgmap)->bus_offset; 1009 + struct pci_p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(page_pgmap(page)); 1010 + 1011 + if (state->mem == &p2p_pgmap->mem) 1012 + return; 1013 + 1014 + state->mem = &p2p_pgmap->mem; 1015 + state->map = pci_p2pdma_map_type(&p2p_pgmap->mem, dev); 1011 1016 } 1012 1017 1013 1018 /**
+14 -5
include/linux/pci-p2pdma.h
··· 16 16 struct block_device; 17 17 struct scatterlist; 18 18 19 + /** 20 + * struct p2pdma_provider 21 + * 22 + * A p2pdma provider is a range of MMIO address space available to the CPU. 23 + */ 24 + struct p2pdma_provider { 25 + struct device *owner; 26 + u64 bus_offset; 27 + }; 28 + 19 29 #ifdef CONFIG_PCI_P2PDMA 20 30 int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, 21 31 u64 offset); ··· 149 139 }; 150 140 151 141 struct pci_p2pdma_map_state { 152 - struct dev_pagemap *pgmap; 142 + struct p2pdma_provider *mem; 153 143 enum pci_p2pdma_map_type map; 154 - u64 bus_off; 155 144 }; 145 + 156 146 157 147 /* helper for pci_p2pdma_state(), do not use directly */ 158 148 void __pci_p2pdma_update_state(struct pci_p2pdma_map_state *state, ··· 172 162 struct page *page) 173 163 { 174 164 if (IS_ENABLED(CONFIG_PCI_P2PDMA) && is_pci_p2pdma_page(page)) { 175 - if (state->pgmap != page_pgmap(page)) 176 - __pci_p2pdma_update_state(state, dev, page); 165 + __pci_p2pdma_update_state(state, dev, page); 177 166 return state->map; 178 167 } 179 168 return PCI_P2PDMA_MAP_NONE; ··· 190 181 pci_p2pdma_bus_addr_map(struct pci_p2pdma_map_state *state, phys_addr_t paddr) 191 182 { 192 183 WARN_ON_ONCE(state->map != PCI_P2PDMA_MAP_BUS_ADDR); 193 - return paddr + state->bus_off; 184 + return paddr + state->mem->bus_offset; 194 185 } 195 186 196 187 #endif /* _LINUX_PCI_P2P_H */