Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

PCI/P2PDMA: Refactor to separate core P2P functionality from memory allocation

Refactor the PCI P2PDMA subsystem to separate the core peer-to-peer DMA
functionality from the optional memory allocation layer. This creates a
two-tier architecture:

The core layer provides P2P mapping functionality for physical addresses
based on PCI device MMIO BARs and integrates with the DMA API for
mapping operations. This layer is required for all P2PDMA users.

The optional upper layer provides memory allocation capabilities
including gen_pool allocator, struct page support, and sysfs interface
for user space access.

This separation allows subsystems like DMABUF to use only the core P2P
mapping functionality without the overhead of memory allocation features
they don't need. The core functionality is now available through the
new pcim_p2pdma_provider() function that returns a p2pdma_provider
structure.

Tested-by: Alex Mastro <amastro@fb.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Acked-by: Ankit Agrawal <ankita@nvidia.com>
Link: https://lore.kernel.org/r/20251120-dmabuf-vfio-v9-3-d7f71607f371@nvidia.com
Signed-off-by: Alex Williamson <alex@shazbot.org>

authored by

Leon Romanovsky and committed by
Alex Williamson
372d6d1b d4504262

+132 -32
+121 -32
drivers/pci/p2pdma.c
··· 25 25 struct gen_pool *pool; 26 26 bool p2pmem_published; 27 27 struct xarray map_types; 28 + struct p2pdma_provider mem[PCI_STD_NUM_BARS]; 28 29 }; 29 30 30 31 struct pci_p2pdma_pagemap { 31 32 struct dev_pagemap pgmap; 32 - struct p2pdma_provider mem; 33 + struct p2pdma_provider *mem; 33 34 }; 34 35 35 36 static struct pci_p2pdma_pagemap *to_p2p_pgmap(struct dev_pagemap *pgmap) ··· 205 204 struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page_pgmap(page)); 206 205 /* safe to dereference while a reference is held to the percpu ref */ 207 206 struct pci_p2pdma *p2pdma = rcu_dereference_protected( 208 - to_pci_dev(pgmap->mem.owner)->p2pdma, 1); 207 + to_pci_dev(pgmap->mem->owner)->p2pdma, 1); 209 208 struct percpu_ref *ref; 210 209 211 210 gen_pool_free_owner(p2pdma->pool, (uintptr_t)page_to_virt(page), ··· 228 227 229 228 /* Flush and disable pci_alloc_p2p_mem() */ 230 229 pdev->p2pdma = NULL; 231 - synchronize_rcu(); 230 + if (p2pdma->pool) 231 + synchronize_rcu(); 232 + xa_destroy(&p2pdma->map_types); 233 + 234 + if (!p2pdma->pool) 235 + return; 232 236 233 237 gen_pool_destroy(p2pdma->pool); 234 238 sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group); 235 - xa_destroy(&p2pdma->map_types); 236 239 } 237 240 238 - static int pci_p2pdma_setup(struct pci_dev *pdev) 241 + /** 242 + * pcim_p2pdma_init - Initialise peer-to-peer DMA providers 243 + * @pdev: The PCI device to enable P2PDMA for 244 + * 245 + * This function initializes the peer-to-peer DMA infrastructure 246 + * for a PCI device. It allocates and sets up the necessary data 247 + * structures to support P2PDMA operations, including mapping type 248 + * tracking. 249 + */ 250 + int pcim_p2pdma_init(struct pci_dev *pdev) 239 251 { 240 - int error = -ENOMEM; 241 252 struct pci_p2pdma *p2p; 253 + int i, ret; 254 + 255 + p2p = rcu_dereference_protected(pdev->p2pdma, 1); 256 + if (p2p) 257 + return 0; 242 258 243 259 p2p = devm_kzalloc(&pdev->dev, sizeof(*p2p), GFP_KERNEL); 244 260 if (!p2p) 245 261 return -ENOMEM; 246 262 247 263 xa_init(&p2p->map_types); 264 + /* 265 + * Iterate over all standard PCI BARs and record only those that 266 + * correspond to MMIO regions. Skip non-memory resources (e.g. I/O 267 + * port BARs) since they cannot be used for peer-to-peer (P2P) 268 + * transactions. 269 + */ 270 + for (i = 0; i < PCI_STD_NUM_BARS; i++) { 271 + if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) 272 + continue; 248 273 249 - p2p->pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev)); 250 - if (!p2p->pool) 251 - goto out; 274 + p2p->mem[i].owner = &pdev->dev; 275 + p2p->mem[i].bus_offset = 276 + pci_bus_address(pdev, i) - pci_resource_start(pdev, i); 277 + } 252 278 253 - error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev); 254 - if (error) 255 - goto out_pool_destroy; 256 - 257 - error = sysfs_create_group(&pdev->dev.kobj, &p2pmem_group); 258 - if (error) 259 - goto out_pool_destroy; 279 + ret = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev); 280 + if (ret) 281 + goto out_p2p; 260 282 261 283 rcu_assign_pointer(pdev->p2pdma, p2p); 262 284 return 0; 263 285 264 - out_pool_destroy: 265 - gen_pool_destroy(p2p->pool); 266 - out: 286 + out_p2p: 267 287 devm_kfree(&pdev->dev, p2p); 268 - return error; 288 + return ret; 289 + } 290 + EXPORT_SYMBOL_GPL(pcim_p2pdma_init); 291 + 292 + /** 293 + * pcim_p2pdma_provider - Get peer-to-peer DMA provider 294 + * @pdev: The PCI device to enable P2PDMA for 295 + * @bar: BAR index to get provider 296 + * 297 + * This function gets peer-to-peer DMA provider for a PCI device. The lifetime 298 + * of the provider (and of course the MMIO) is bound to the lifetime of the 299 + * driver. A driver calling this function must ensure that all references to the 300 + * provider, and any DMA mappings created for any MMIO, are all cleaned up 301 + * before the driver remove() completes. 302 + * 303 + * Since P2P is almost always shared with a second driver this means some system 304 + * to notify, invalidate and revoke the MMIO's DMA must be in place to use this 305 + * function. For example a revoke can be built using DMABUF. 306 + */ 307 + struct p2pdma_provider *pcim_p2pdma_provider(struct pci_dev *pdev, int bar) 308 + { 309 + struct pci_p2pdma *p2p; 310 + 311 + if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) 312 + return NULL; 313 + 314 + p2p = rcu_dereference_protected(pdev->p2pdma, 1); 315 + if (WARN_ON(!p2p)) 316 + /* Someone forgot to call to pcim_p2pdma_init() before */ 317 + return NULL; 318 + 319 + return &p2p->mem[bar]; 320 + } 321 + EXPORT_SYMBOL_GPL(pcim_p2pdma_provider); 322 + 323 + static int pci_p2pdma_setup_pool(struct pci_dev *pdev) 324 + { 325 + struct pci_p2pdma *p2pdma; 326 + int ret; 327 + 328 + p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); 329 + if (p2pdma->pool) 330 + /* We already setup pools, do nothing, */ 331 + return 0; 332 + 333 + p2pdma->pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev)); 334 + if (!p2pdma->pool) 335 + return -ENOMEM; 336 + 337 + ret = sysfs_create_group(&pdev->dev.kobj, &p2pmem_group); 338 + if (ret) 339 + goto out_pool_destroy; 340 + 341 + return 0; 342 + 343 + out_pool_destroy: 344 + gen_pool_destroy(p2pdma->pool); 345 + p2pdma->pool = NULL; 346 + return ret; 269 347 } 270 348 271 349 static void pci_p2pdma_unmap_mappings(void *data) ··· 356 276 * unmap_mapping_range() on the inode, teardown any existing userspace 357 277 * mappings and prevent new ones from being created. 358 278 */ 359 - sysfs_remove_file_from_group(&p2p_pgmap->mem.owner->kobj, 279 + sysfs_remove_file_from_group(&p2p_pgmap->mem->owner->kobj, 360 280 &p2pmem_alloc_attr.attr, 361 281 p2pmem_group.name); 362 282 } ··· 375 295 u64 offset) 376 296 { 377 297 struct pci_p2pdma_pagemap *p2p_pgmap; 298 + struct p2pdma_provider *mem; 378 299 struct dev_pagemap *pgmap; 379 300 struct pci_p2pdma *p2pdma; 380 301 void *addr; ··· 393 312 if (size + offset > pci_resource_len(pdev, bar)) 394 313 return -EINVAL; 395 314 396 - if (!pdev->p2pdma) { 397 - error = pci_p2pdma_setup(pdev); 398 - if (error) 399 - return error; 400 - } 315 + error = pcim_p2pdma_init(pdev); 316 + if (error) 317 + return error; 318 + 319 + error = pci_p2pdma_setup_pool(pdev); 320 + if (error) 321 + return error; 322 + 323 + mem = pcim_p2pdma_provider(pdev, bar); 324 + /* 325 + * We checked validity of BAR prior to call 326 + * to pcim_p2pdma_provider. It should never return NULL. 327 + */ 328 + if (WARN_ON(!mem)) 329 + return -EINVAL; 401 330 402 331 p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL); 403 332 if (!p2p_pgmap) ··· 419 328 pgmap->nr_range = 1; 420 329 pgmap->type = MEMORY_DEVICE_PCI_P2PDMA; 421 330 pgmap->ops = &p2pdma_pgmap_ops; 422 - p2p_pgmap->mem.owner = &pdev->dev; 423 - p2p_pgmap->mem.bus_offset = 424 - pci_bus_address(pdev, bar) - pci_resource_start(pdev, bar); 331 + p2p_pgmap->mem = mem; 425 332 426 333 addr = devm_memremap_pages(&pdev->dev, pgmap); 427 334 if (IS_ERR(addr)) { ··· 1096 1007 { 1097 1008 struct pci_p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(page_pgmap(page)); 1098 1009 1099 - if (state->mem == &p2p_pgmap->mem) 1010 + if (state->mem == p2p_pgmap->mem) 1100 1011 return; 1101 1012 1102 - state->mem = &p2p_pgmap->mem; 1103 - state->map = pci_p2pdma_map_type(&p2p_pgmap->mem, dev); 1013 + state->mem = p2p_pgmap->mem; 1014 + state->map = pci_p2pdma_map_type(p2p_pgmap->mem, dev); 1104 1015 } 1105 1016 1106 1017 /**
+11
include/linux/pci-p2pdma.h
··· 27 27 }; 28 28 29 29 #ifdef CONFIG_PCI_P2PDMA 30 + int pcim_p2pdma_init(struct pci_dev *pdev); 31 + struct p2pdma_provider *pcim_p2pdma_provider(struct pci_dev *pdev, int bar); 30 32 int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, 31 33 u64 offset); 32 34 int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, ··· 46 44 ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev, 47 45 bool use_p2pdma); 48 46 #else /* CONFIG_PCI_P2PDMA */ 47 + static inline int pcim_p2pdma_init(struct pci_dev *pdev) 48 + { 49 + return -EOPNOTSUPP; 50 + } 51 + static inline struct p2pdma_provider *pcim_p2pdma_provider(struct pci_dev *pdev, 52 + int bar) 53 + { 54 + return NULL; 55 + } 49 56 static inline int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, 50 57 size_t size, u64 offset) 51 58 {