Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

gpu/drm/nouveau: enable THP support for GPU memory migration

Enable MIGRATE_VMA_SELECT_COMPOUND support in nouveau driver to take
advantage of THP zone device migration capabilities.

Update migration and eviction code paths to handle compound page sizes
appropriately, improving memory bandwidth utilization and reducing
migration overhead for large GPU memory allocations.

[balbirs@nvidia.com: fix sparse error]
Link: https://lkml.kernel.org/r/20251115003333.3516870-1-balbirs@nvidia.com
Link: https://lkml.kernel.org/r/20251001065707.920170-17-balbirs@nvidia.com
Signed-off-by: Balbir Singh <balbirs@nvidia.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Byungchul Park <byungchul@sk.com>
Cc: Gregory Price <gourry@gourry.net>
Cc: Ying Huang <ying.huang@linux.alibaba.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Lyude Paul <lyude@redhat.com>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: David Airlie <airlied@gmail.com>
Cc: Simona Vetter <simona@ffwll.ch>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Mika Penttilä <mpenttil@redhat.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Balbir Singh and committed by
Andrew Morton
c3228747 271a7b2e

+231 -84
+225 -81
drivers/gpu/drm/nouveau/nouveau_dmem.c
··· 50 50 */ 51 51 #define DMEM_CHUNK_SIZE (2UL << 20) 52 52 #define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT) 53 + #define NR_CHUNKS (128) 53 54 54 55 enum nouveau_aper { 55 56 NOUVEAU_APER_VIRT, ··· 84 83 struct list_head chunks; 85 84 struct mutex mutex; 86 85 struct page *free_pages; 86 + struct folio *free_folios; 87 87 spinlock_t lock; 88 + }; 89 + 90 + struct nouveau_dmem_dma_info { 91 + dma_addr_t dma_addr; 92 + size_t size; 88 93 }; 89 94 90 95 static struct nouveau_dmem_chunk *nouveau_page_to_chunk(struct page *page) ··· 122 115 struct nouveau_dmem *dmem = chunk->drm->dmem; 123 116 124 117 spin_lock(&dmem->lock); 125 - page->zone_device_data = dmem->free_pages; 126 - dmem->free_pages = page; 118 + if (folio_order(folio)) { 119 + page->zone_device_data = dmem->free_folios; 120 + dmem->free_folios = folio; 121 + } else { 122 + page->zone_device_data = dmem->free_pages; 123 + dmem->free_pages = page; 124 + } 127 125 128 126 WARN_ON(!chunk->callocated); 129 127 chunk->callocated--; ··· 152 140 } 153 141 } 154 142 155 - static int nouveau_dmem_copy_one(struct nouveau_drm *drm, struct page *spage, 156 - struct page *dpage, dma_addr_t *dma_addr) 143 + static int nouveau_dmem_copy_folio(struct nouveau_drm *drm, 144 + struct folio *sfolio, struct folio *dfolio, 145 + struct nouveau_dmem_dma_info *dma_info) 157 146 { 158 147 struct device *dev = drm->dev->dev; 148 + struct page *dpage = folio_page(dfolio, 0); 149 + struct page *spage = folio_page(sfolio, 0); 159 150 160 - lock_page(dpage); 151 + folio_lock(dfolio); 161 152 162 - *dma_addr = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); 163 - if (dma_mapping_error(dev, *dma_addr)) 153 + dma_info->dma_addr = dma_map_page(dev, dpage, 0, page_size(dpage), 154 + DMA_BIDIRECTIONAL); 155 + dma_info->size = page_size(dpage); 156 + if (dma_mapping_error(dev, dma_info->dma_addr)) 164 157 return -EIO; 165 158 166 - if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_HOST, *dma_addr, 167 - NOUVEAU_APER_VRAM, nouveau_dmem_page_addr(spage))) { 168 - dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); 159 + if (drm->dmem->migrate.copy_func(drm, folio_nr_pages(sfolio), 160 + NOUVEAU_APER_HOST, dma_info->dma_addr, 161 + NOUVEAU_APER_VRAM, 162 + nouveau_dmem_page_addr(spage))) { 163 + dma_unmap_page(dev, dma_info->dma_addr, page_size(dpage), 164 + DMA_BIDIRECTIONAL); 169 165 return -EIO; 170 166 } 171 167 ··· 186 166 struct nouveau_dmem *dmem = drm->dmem; 187 167 struct nouveau_fence *fence; 188 168 struct nouveau_svmm *svmm; 189 - struct page *spage, *dpage; 190 - unsigned long src = 0, dst = 0; 191 - dma_addr_t dma_addr = 0; 169 + struct page *dpage; 192 170 vm_fault_t ret = 0; 171 + int err; 193 172 struct migrate_vma args = { 194 173 .vma = vmf->vma, 195 - .start = vmf->address, 196 - .end = vmf->address + PAGE_SIZE, 197 - .src = &src, 198 - .dst = &dst, 199 174 .pgmap_owner = drm->dev, 200 175 .fault_page = vmf->page, 201 - .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE, 176 + .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE | 177 + MIGRATE_VMA_SELECT_COMPOUND, 178 + .src = NULL, 179 + .dst = NULL, 202 180 }; 181 + unsigned int order, nr; 182 + struct folio *sfolio, *dfolio; 183 + struct nouveau_dmem_dma_info dma_info; 203 184 185 + sfolio = page_folio(vmf->page); 186 + order = folio_order(sfolio); 187 + nr = 1 << order; 188 + 189 + /* 190 + * Handle partial unmap faults, where the folio is large, but 191 + * the pmd is split. 192 + */ 193 + if (vmf->pte) { 194 + order = 0; 195 + nr = 1; 196 + } 197 + 198 + if (order) 199 + args.flags |= MIGRATE_VMA_SELECT_COMPOUND; 200 + 201 + args.start = ALIGN_DOWN(vmf->address, (PAGE_SIZE << order)); 202 + args.vma = vmf->vma; 203 + args.end = args.start + (PAGE_SIZE << order); 204 + args.src = kcalloc(nr, sizeof(*args.src), GFP_KERNEL); 205 + args.dst = kcalloc(nr, sizeof(*args.dst), GFP_KERNEL); 206 + 207 + if (!args.src || !args.dst) { 208 + ret = VM_FAULT_OOM; 209 + goto err; 210 + } 204 211 /* 205 212 * FIXME what we really want is to find some heuristic to migrate more 206 213 * than just one page on CPU fault. When such fault happens it is very ··· 238 191 if (!args.cpages) 239 192 return 0; 240 193 241 - spage = migrate_pfn_to_page(src); 242 - if (!spage || !(src & MIGRATE_PFN_MIGRATE)) 194 + if (order) 195 + dpage = folio_page(vma_alloc_folio(GFP_HIGHUSER | __GFP_ZERO, 196 + order, vmf->vma, vmf->address), 0); 197 + else 198 + dpage = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vmf->vma, 199 + vmf->address); 200 + if (!dpage) { 201 + ret = VM_FAULT_OOM; 243 202 goto done; 203 + } 244 204 245 - dpage = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vmf->vma, vmf->address); 246 - if (!dpage) 247 - goto done; 205 + args.dst[0] = migrate_pfn(page_to_pfn(dpage)); 206 + if (order) 207 + args.dst[0] |= MIGRATE_PFN_COMPOUND; 208 + dfolio = page_folio(dpage); 248 209 249 - dst = migrate_pfn(page_to_pfn(dpage)); 250 - 251 - svmm = spage->zone_device_data; 210 + svmm = folio_zone_device_data(sfolio); 252 211 mutex_lock(&svmm->mutex); 253 212 nouveau_svmm_invalidate(svmm, args.start, args.end); 254 - ret = nouveau_dmem_copy_one(drm, spage, dpage, &dma_addr); 213 + err = nouveau_dmem_copy_folio(drm, sfolio, dfolio, &dma_info); 255 214 mutex_unlock(&svmm->mutex); 256 - if (ret) { 215 + if (err) { 257 216 ret = VM_FAULT_SIGBUS; 258 217 goto done; 259 218 } ··· 267 214 nouveau_fence_new(&fence, dmem->migrate.chan); 268 215 migrate_vma_pages(&args); 269 216 nouveau_dmem_fence_done(&fence); 270 - dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); 217 + dma_unmap_page(drm->dev->dev, dma_info.dma_addr, PAGE_SIZE, 218 + DMA_BIDIRECTIONAL); 271 219 done: 272 220 migrate_vma_finalize(&args); 221 + err: 222 + kfree(args.src); 223 + kfree(args.dst); 273 224 return ret; 225 + } 226 + 227 + static void nouveau_dmem_folio_split(struct folio *head, struct folio *tail) 228 + { 229 + if (tail == NULL) 230 + return; 231 + tail->pgmap = head->pgmap; 232 + tail->mapping = head->mapping; 233 + folio_set_zone_device_data(tail, folio_zone_device_data(head)); 274 234 } 275 235 276 236 static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = { 277 237 .folio_free = nouveau_dmem_folio_free, 278 238 .migrate_to_ram = nouveau_dmem_migrate_to_ram, 239 + .folio_split = nouveau_dmem_folio_split, 279 240 }; 280 241 281 242 static int 282 - nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage) 243 + nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage, 244 + bool is_large) 283 245 { 284 246 struct nouveau_dmem_chunk *chunk; 285 247 struct resource *res; 286 248 struct page *page; 287 249 void *ptr; 288 - unsigned long i, pfn_first; 250 + unsigned long i, pfn_first, pfn; 289 251 int ret; 290 252 291 253 chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); ··· 310 242 } 311 243 312 244 /* Allocate unused physical address space for device private pages. */ 313 - res = request_free_mem_region(&iomem_resource, DMEM_CHUNK_SIZE, 245 + res = request_free_mem_region(&iomem_resource, DMEM_CHUNK_SIZE * NR_CHUNKS, 314 246 "nouveau_dmem"); 315 247 if (IS_ERR(res)) { 316 248 ret = PTR_ERR(res); ··· 343 275 pfn_first = chunk->pagemap.range.start >> PAGE_SHIFT; 344 276 page = pfn_to_page(pfn_first); 345 277 spin_lock(&drm->dmem->lock); 346 - for (i = 0; i < DMEM_CHUNK_NPAGES - 1; ++i, ++page) { 347 - page->zone_device_data = drm->dmem->free_pages; 348 - drm->dmem->free_pages = page; 278 + 279 + pfn = pfn_first; 280 + for (i = 0; i < NR_CHUNKS; i++) { 281 + int j; 282 + 283 + if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) || !is_large) { 284 + for (j = 0; j < DMEM_CHUNK_NPAGES - 1; j++, pfn++) { 285 + page = pfn_to_page(pfn); 286 + page->zone_device_data = drm->dmem->free_pages; 287 + drm->dmem->free_pages = page; 288 + } 289 + } else { 290 + page = pfn_to_page(pfn); 291 + page->zone_device_data = drm->dmem->free_folios; 292 + drm->dmem->free_folios = page_folio(page); 293 + pfn += DMEM_CHUNK_NPAGES; 294 + } 349 295 } 350 - *ppage = page; 296 + 297 + /* Move to next page */ 298 + if (is_large) { 299 + *ppage = &drm->dmem->free_folios->page; 300 + drm->dmem->free_folios = (*ppage)->zone_device_data; 301 + } else { 302 + *ppage = drm->dmem->free_pages; 303 + drm->dmem->free_pages = (*ppage)->zone_device_data; 304 + } 305 + 351 306 chunk->callocated++; 352 307 spin_unlock(&drm->dmem->lock); 353 308 354 - NV_INFO(drm, "DMEM: registered %ldMB of device memory\n", 355 - DMEM_CHUNK_SIZE >> 20); 309 + NV_INFO(drm, "DMEM: registered %ldMB of %sdevice memory %lx %lx\n", 310 + NR_CHUNKS * DMEM_CHUNK_SIZE >> 20, is_large ? "THP " : "", pfn_first, 311 + nouveau_dmem_page_addr(page)); 356 312 357 313 return 0; 358 314 ··· 391 299 } 392 300 393 301 static struct page * 394 - nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm) 302 + nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm, bool is_large) 395 303 { 396 304 struct nouveau_dmem_chunk *chunk; 397 305 struct page *page = NULL; 306 + struct folio *folio = NULL; 398 307 int ret; 308 + unsigned int order = 0; 399 309 400 310 spin_lock(&drm->dmem->lock); 401 - if (drm->dmem->free_pages) { 311 + if (is_large && drm->dmem->free_folios) { 312 + folio = drm->dmem->free_folios; 313 + page = &folio->page; 314 + drm->dmem->free_folios = page->zone_device_data; 315 + chunk = nouveau_page_to_chunk(&folio->page); 316 + chunk->callocated++; 317 + spin_unlock(&drm->dmem->lock); 318 + order = ilog2(DMEM_CHUNK_NPAGES); 319 + } else if (!is_large && drm->dmem->free_pages) { 402 320 page = drm->dmem->free_pages; 403 321 drm->dmem->free_pages = page->zone_device_data; 404 322 chunk = nouveau_page_to_chunk(page); 405 323 chunk->callocated++; 406 324 spin_unlock(&drm->dmem->lock); 325 + folio = page_folio(page); 407 326 } else { 408 327 spin_unlock(&drm->dmem->lock); 409 - ret = nouveau_dmem_chunk_alloc(drm, &page); 328 + ret = nouveau_dmem_chunk_alloc(drm, &page, is_large); 410 329 if (ret) 411 330 return NULL; 331 + folio = page_folio(page); 332 + if (is_large) 333 + order = ilog2(DMEM_CHUNK_NPAGES); 412 334 } 413 335 414 - zone_device_page_init(page, 0); 336 + zone_device_folio_init(folio, order); 415 337 return page; 416 338 } 417 339 ··· 476 370 { 477 371 unsigned long i, npages = range_len(&chunk->pagemap.range) >> PAGE_SHIFT; 478 372 unsigned long *src_pfns, *dst_pfns; 479 - dma_addr_t *dma_addrs; 373 + struct nouveau_dmem_dma_info *dma_info; 480 374 struct nouveau_fence *fence; 481 375 482 376 src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL); 483 377 dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL); 484 - dma_addrs = kvcalloc(npages, sizeof(*dma_addrs), GFP_KERNEL | __GFP_NOFAIL); 378 + dma_info = kvcalloc(npages, sizeof(*dma_info), GFP_KERNEL | __GFP_NOFAIL); 485 379 486 380 migrate_device_range(src_pfns, chunk->pagemap.range.start >> PAGE_SHIFT, 487 381 npages); ··· 489 383 for (i = 0; i < npages; i++) { 490 384 if (src_pfns[i] & MIGRATE_PFN_MIGRATE) { 491 385 struct page *dpage; 386 + struct folio *folio = page_folio( 387 + migrate_pfn_to_page(src_pfns[i])); 388 + unsigned int order = folio_order(folio); 492 389 493 - /* 494 - * _GFP_NOFAIL because the GPU is going away and there 495 - * is nothing sensible we can do if we can't copy the 496 - * data back. 497 - */ 498 - dpage = alloc_page(GFP_HIGHUSER | __GFP_NOFAIL); 390 + if (src_pfns[i] & MIGRATE_PFN_COMPOUND) { 391 + dpage = folio_page( 392 + folio_alloc( 393 + GFP_HIGHUSER_MOVABLE, order), 0); 394 + } else { 395 + /* 396 + * _GFP_NOFAIL because the GPU is going away and there 397 + * is nothing sensible we can do if we can't copy the 398 + * data back. 399 + */ 400 + dpage = alloc_page(GFP_HIGHUSER | __GFP_NOFAIL); 401 + } 402 + 499 403 dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)); 500 - nouveau_dmem_copy_one(chunk->drm, 501 - migrate_pfn_to_page(src_pfns[i]), dpage, 502 - &dma_addrs[i]); 404 + nouveau_dmem_copy_folio(chunk->drm, 405 + page_folio(migrate_pfn_to_page(src_pfns[i])), 406 + page_folio(dpage), 407 + &dma_info[i]); 503 408 } 504 409 } 505 410 ··· 521 404 kvfree(src_pfns); 522 405 kvfree(dst_pfns); 523 406 for (i = 0; i < npages; i++) 524 - dma_unmap_page(chunk->drm->dev->dev, dma_addrs[i], PAGE_SIZE, DMA_BIDIRECTIONAL); 525 - kvfree(dma_addrs); 407 + dma_unmap_page(chunk->drm->dev->dev, dma_info[i].dma_addr, 408 + dma_info[i].size, DMA_BIDIRECTIONAL); 409 + kvfree(dma_info); 526 410 } 527 411 528 412 void ··· 726 608 727 609 static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm, 728 610 struct nouveau_svmm *svmm, unsigned long src, 729 - dma_addr_t *dma_addr, u64 *pfn) 611 + struct nouveau_dmem_dma_info *dma_info, u64 *pfn) 730 612 { 731 613 struct device *dev = drm->dev->dev; 732 614 struct page *dpage, *spage; 733 615 unsigned long paddr; 616 + bool is_large = false; 617 + unsigned long mpfn; 734 618 735 619 spage = migrate_pfn_to_page(src); 736 620 if (!(src & MIGRATE_PFN_MIGRATE)) 737 621 goto out; 738 622 739 - dpage = nouveau_dmem_page_alloc_locked(drm); 623 + is_large = src & MIGRATE_PFN_COMPOUND; 624 + dpage = nouveau_dmem_page_alloc_locked(drm, is_large); 740 625 if (!dpage) 741 626 goto out; 742 627 743 628 paddr = nouveau_dmem_page_addr(dpage); 744 629 if (spage) { 745 - *dma_addr = dma_map_page(dev, spage, 0, page_size(spage), 630 + dma_info->dma_addr = dma_map_page(dev, spage, 0, page_size(spage), 746 631 DMA_BIDIRECTIONAL); 747 - if (dma_mapping_error(dev, *dma_addr)) 632 + dma_info->size = page_size(spage); 633 + if (dma_mapping_error(dev, dma_info->dma_addr)) 748 634 goto out_free_page; 749 - if (drm->dmem->migrate.copy_func(drm, 1, 750 - NOUVEAU_APER_VRAM, paddr, NOUVEAU_APER_HOST, *dma_addr)) 635 + if (drm->dmem->migrate.copy_func(drm, folio_nr_pages(page_folio(spage)), 636 + NOUVEAU_APER_VRAM, paddr, NOUVEAU_APER_HOST, 637 + dma_info->dma_addr)) 751 638 goto out_dma_unmap; 752 639 } else { 753 - *dma_addr = DMA_MAPPING_ERROR; 640 + dma_info->dma_addr = DMA_MAPPING_ERROR; 754 641 if (drm->dmem->migrate.clear_func(drm, page_size(dpage), 755 642 NOUVEAU_APER_VRAM, paddr)) 756 643 goto out_free_page; ··· 766 643 ((paddr >> PAGE_SHIFT) << NVIF_VMM_PFNMAP_V0_ADDR_SHIFT); 767 644 if (src & MIGRATE_PFN_WRITE) 768 645 *pfn |= NVIF_VMM_PFNMAP_V0_W; 769 - return migrate_pfn(page_to_pfn(dpage)); 646 + mpfn = migrate_pfn(page_to_pfn(dpage)); 647 + if (folio_order(page_folio(dpage))) 648 + mpfn |= MIGRATE_PFN_COMPOUND; 649 + return mpfn; 770 650 771 651 out_dma_unmap: 772 - dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); 652 + dma_unmap_page(dev, dma_info->dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); 773 653 out_free_page: 774 654 nouveau_dmem_page_free_locked(drm, dpage); 775 655 out: ··· 782 656 783 657 static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm, 784 658 struct nouveau_svmm *svmm, struct migrate_vma *args, 785 - dma_addr_t *dma_addrs, u64 *pfns) 659 + struct nouveau_dmem_dma_info *dma_info, u64 *pfns) 786 660 { 787 661 struct nouveau_fence *fence; 788 662 unsigned long addr = args->start, nr_dma = 0, i; 663 + unsigned long order = 0; 789 664 790 - for (i = 0; addr < args->end; i++) { 665 + for (i = 0; addr < args->end; ) { 666 + struct folio *folio; 667 + 791 668 args->dst[i] = nouveau_dmem_migrate_copy_one(drm, svmm, 792 - args->src[i], dma_addrs + nr_dma, pfns + i); 793 - if (!dma_mapping_error(drm->dev->dev, dma_addrs[nr_dma])) 669 + args->src[i], dma_info + nr_dma, pfns + i); 670 + if (!args->dst[i]) { 671 + i++; 672 + addr += PAGE_SIZE; 673 + continue; 674 + } 675 + if (!dma_mapping_error(drm->dev->dev, dma_info[nr_dma].dma_addr)) 794 676 nr_dma++; 795 - addr += PAGE_SIZE; 677 + folio = page_folio(migrate_pfn_to_page(args->dst[i])); 678 + order = folio_order(folio); 679 + i += 1 << order; 680 + addr += (1 << order) * PAGE_SIZE; 796 681 } 797 682 798 683 nouveau_fence_new(&fence, drm->dmem->migrate.chan); 799 684 migrate_vma_pages(args); 800 685 nouveau_dmem_fence_done(&fence); 801 - nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i); 686 + nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i, order); 802 687 803 688 while (nr_dma--) { 804 - dma_unmap_page(drm->dev->dev, dma_addrs[nr_dma], PAGE_SIZE, 805 - DMA_BIDIRECTIONAL); 689 + dma_unmap_page(drm->dev->dev, dma_info[nr_dma].dma_addr, 690 + dma_info[nr_dma].size, DMA_BIDIRECTIONAL); 806 691 } 807 692 migrate_vma_finalize(args); 808 693 } ··· 826 689 unsigned long end) 827 690 { 828 691 unsigned long npages = (end - start) >> PAGE_SHIFT; 829 - unsigned long max = min(SG_MAX_SINGLE_ALLOC, npages); 830 - dma_addr_t *dma_addrs; 692 + unsigned long max = npages; 831 693 struct migrate_vma args = { 832 694 .vma = vma, 833 695 .start = start, 834 696 .pgmap_owner = drm->dev, 835 - .flags = MIGRATE_VMA_SELECT_SYSTEM, 697 + .flags = MIGRATE_VMA_SELECT_SYSTEM 698 + | MIGRATE_VMA_SELECT_COMPOUND, 836 699 }; 837 700 unsigned long i; 838 701 u64 *pfns; 839 702 int ret = -ENOMEM; 703 + struct nouveau_dmem_dma_info *dma_info; 840 704 841 - if (drm->dmem == NULL) 842 - return -ENODEV; 705 + if (drm->dmem == NULL) { 706 + ret = -ENODEV; 707 + goto out; 708 + } 709 + 710 + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 711 + if (max > (unsigned long)HPAGE_PMD_NR) 712 + max = (unsigned long)HPAGE_PMD_NR; 843 713 844 714 args.src = kcalloc(max, sizeof(*args.src), GFP_KERNEL); 845 715 if (!args.src) ··· 855 711 if (!args.dst) 856 712 goto out_free_src; 857 713 858 - dma_addrs = kmalloc_array(max, sizeof(*dma_addrs), GFP_KERNEL); 859 - if (!dma_addrs) 714 + dma_info = kmalloc_array(max, sizeof(*dma_info), GFP_KERNEL); 715 + if (!dma_info) 860 716 goto out_free_dst; 861 717 862 718 pfns = nouveau_pfns_alloc(max); ··· 874 730 goto out_free_pfns; 875 731 876 732 if (args.cpages) 877 - nouveau_dmem_migrate_chunk(drm, svmm, &args, dma_addrs, 733 + nouveau_dmem_migrate_chunk(drm, svmm, &args, dma_info, 878 734 pfns); 879 735 args.start = args.end; 880 736 } ··· 883 739 out_free_pfns: 884 740 nouveau_pfns_free(pfns); 885 741 out_free_dma: 886 - kfree(dma_addrs); 742 + kfree(dma_info); 887 743 out_free_dst: 888 744 kfree(args.dst); 889 745 out_free_src:
+4 -2
drivers/gpu/drm/nouveau/nouveau_svm.c
··· 921 921 922 922 void 923 923 nouveau_pfns_map(struct nouveau_svmm *svmm, struct mm_struct *mm, 924 - unsigned long addr, u64 *pfns, unsigned long npages) 924 + unsigned long addr, u64 *pfns, unsigned long npages, 925 + unsigned int page_shift) 925 926 { 926 927 struct nouveau_pfnmap_args *args = nouveau_pfns_to_args(pfns); 927 928 928 929 args->p.addr = addr; 929 - args->p.size = npages << PAGE_SHIFT; 930 + args->p.size = npages << page_shift; 931 + args->p.page = page_shift; 930 932 931 933 mutex_lock(&svmm->mutex); 932 934
+2 -1
drivers/gpu/drm/nouveau/nouveau_svm.h
··· 33 33 u64 *nouveau_pfns_alloc(unsigned long npages); 34 34 void nouveau_pfns_free(u64 *pfns); 35 35 void nouveau_pfns_map(struct nouveau_svmm *svmm, struct mm_struct *mm, 36 - unsigned long addr, u64 *pfns, unsigned long npages); 36 + unsigned long addr, u64 *pfns, unsigned long npages, 37 + unsigned int page_shift); 37 38 #else /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */ 38 39 static inline void nouveau_svm_init(struct nouveau_drm *drm) {} 39 40 static inline void nouveau_svm_fini(struct nouveau_drm *drm) {}