Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/ttm: rework pipelined eviction fence handling

Until now ttm stored a single pipelined eviction fence which means
drivers had to use a single entity for these evictions.

To lift this requirement, this commit allows up to 8 entities to
be used.

Ideally a dma_resv object would have been used as a container of
the eviction fences, but the locking rules makes it complex.
dma_resv all have the same ww_class, which means "Attempting to
lock more mutexes after ww_acquire_done." is an error.

One alternative considered was to introduced a 2nd ww_class for
specific resv to hold a single "transient" lock (= the resv lock
would only be held for a short period, without taking any other
locks).

The other option, is to statically reserve a fence array, and
extend the existing code to deal with N fences, instead of 1.

The driver is still responsible to reserve the correct number
of fence slots.

Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Link: https://lore.kernel.org/r/20251121101315.3585-20-pierre-eric.pelloux-prayer@amd.com
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>

authored by

Pierre-Eric Pelloux-Prayer and committed by
Christian König
ddf055b8 6f53bcb4

+104 -57
+6 -5
drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
··· 652 652 int err; 653 653 654 654 man = ttm_manager_type(priv->ttm_dev, mem_type); 655 - man->move = dma_fence_get_stub(); 655 + man->eviction_fences[0] = dma_fence_get_stub(); 656 656 657 657 bo = ttm_bo_kunit_init(test, test->priv, size, NULL); 658 658 bo->type = bo_type; ··· 669 669 KUNIT_EXPECT_EQ(test, ctx.bytes_moved, size); 670 670 671 671 ttm_bo_fini(bo); 672 - dma_fence_put(man->move); 672 + dma_fence_put(man->eviction_fences[0]); 673 673 } 674 674 675 675 static const struct ttm_bo_validate_test_case ttm_bo_validate_wait_cases[] = { ··· 733 733 734 734 spin_lock_init(&fence_lock); 735 735 man = ttm_manager_type(priv->ttm_dev, fst_mem); 736 - man->move = alloc_mock_fence(test); 736 + man->eviction_fences[0] = alloc_mock_fence(test); 737 737 738 - task = kthread_create(threaded_fence_signal, man->move, "move-fence-signal"); 738 + task = kthread_create(threaded_fence_signal, man->eviction_fences[0], "move-fence-signal"); 739 739 if (IS_ERR(task)) 740 740 KUNIT_FAIL(test, "Couldn't create move fence signal task\n"); 741 741 ··· 743 743 err = ttm_bo_validate(bo, placement_val, &ctx_val); 744 744 dma_resv_unlock(bo->base.resv); 745 745 746 - dma_fence_wait_timeout(man->move, false, MAX_SCHEDULE_TIMEOUT); 746 + dma_fence_wait_timeout(man->eviction_fences[0], false, MAX_SCHEDULE_TIMEOUT); 747 + man->eviction_fences[0] = NULL; 747 748 748 749 KUNIT_EXPECT_EQ(test, err, 0); 749 750 KUNIT_EXPECT_EQ(test, ctx_val.bytes_moved, size);
+3 -2
drivers/gpu/drm/ttm/tests/ttm_resource_test.c
··· 207 207 struct ttm_resource_test_priv *priv = test->priv; 208 208 struct ttm_resource_manager *man; 209 209 size_t size = SZ_16K; 210 + int i; 210 211 211 212 man = kunit_kzalloc(test, sizeof(*man), GFP_KERNEL); 212 213 KUNIT_ASSERT_NOT_NULL(test, man); ··· 217 216 KUNIT_ASSERT_PTR_EQ(test, man->bdev, priv->devs->ttm_dev); 218 217 KUNIT_ASSERT_EQ(test, man->size, size); 219 218 KUNIT_ASSERT_EQ(test, man->usage, 0); 220 - KUNIT_ASSERT_NULL(test, man->move); 221 - KUNIT_ASSERT_NOT_NULL(test, &man->move_lock); 219 + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) 220 + KUNIT_ASSERT_NULL(test, man->eviction_fences[i]); 222 221 223 222 for (int i = 0; i < TTM_MAX_BO_PRIORITY; ++i) 224 223 KUNIT_ASSERT_TRUE(test, list_empty(&man->lru[i]));
+24 -23
drivers/gpu/drm/ttm/ttm_bo.c
··· 659 659 EXPORT_SYMBOL(ttm_bo_unpin); 660 660 661 661 /* 662 - * Add the last move fence to the BO as kernel dependency and reserve a new 663 - * fence slot. 662 + * Add the pipelined eviction fencesto the BO as kernel dependency and reserve new 663 + * fence slots. 664 664 */ 665 - static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, 666 - struct ttm_resource_manager *man, 667 - bool no_wait_gpu) 665 + static int ttm_bo_add_pipelined_eviction_fences(struct ttm_buffer_object *bo, 666 + struct ttm_resource_manager *man, 667 + bool no_wait_gpu) 668 668 { 669 669 struct dma_fence *fence; 670 - int ret; 670 + int i; 671 671 672 - spin_lock(&man->move_lock); 673 - fence = dma_fence_get(man->move); 674 - spin_unlock(&man->move_lock); 672 + spin_lock(&man->eviction_lock); 673 + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) { 674 + fence = man->eviction_fences[i]; 675 + if (!fence) 676 + continue; 675 677 676 - if (!fence) 677 - return 0; 678 - 679 - if (no_wait_gpu) { 680 - ret = dma_fence_is_signaled(fence) ? 0 : -EBUSY; 681 - dma_fence_put(fence); 682 - return ret; 678 + if (no_wait_gpu) { 679 + if (!dma_fence_is_signaled(fence)) { 680 + spin_unlock(&man->eviction_lock); 681 + return -EBUSY; 682 + } 683 + } else { 684 + dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL); 685 + } 683 686 } 687 + spin_unlock(&man->eviction_lock); 684 688 685 - dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL); 686 - 687 - ret = dma_resv_reserve_fences(bo->base.resv, 1); 688 - dma_fence_put(fence); 689 - return ret; 689 + /* TODO: this call should be removed. */ 690 + return dma_resv_reserve_fences(bo->base.resv, 1); 690 691 } 691 692 692 693 /** ··· 720 719 int i, ret; 721 720 722 721 ticket = dma_resv_locking_ctx(bo->base.resv); 723 - ret = dma_resv_reserve_fences(bo->base.resv, 1); 722 + ret = dma_resv_reserve_fences(bo->base.resv, TTM_NUM_MOVE_FENCES); 724 723 if (unlikely(ret)) 725 724 return ret; 726 725 ··· 759 758 return ret; 760 759 } 761 760 762 - ret = ttm_bo_add_move_fence(bo, man, ctx->no_wait_gpu); 761 + ret = ttm_bo_add_pipelined_eviction_fences(bo, man, ctx->no_wait_gpu); 763 762 if (unlikely(ret)) { 764 763 ttm_resource_free(bo, res); 765 764 if (ret == -EBUSY)
+31 -7
drivers/gpu/drm/ttm/ttm_bo_util.c
··· 258 258 ret = dma_resv_trylock(&fbo->base.base._resv); 259 259 WARN_ON(!ret); 260 260 261 - ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1); 261 + ret = dma_resv_reserve_fences(&fbo->base.base._resv, TTM_NUM_MOVE_FENCES); 262 262 if (ret) { 263 263 dma_resv_unlock(&fbo->base.base._resv); 264 264 kfree(fbo); ··· 646 646 { 647 647 struct ttm_device *bdev = bo->bdev; 648 648 struct ttm_resource_manager *from; 649 + struct dma_fence *tmp; 650 + int i; 649 651 650 652 from = ttm_manager_type(bdev, bo->resource->mem_type); 651 653 652 654 /** 653 655 * BO doesn't have a TTM we need to bind/unbind. Just remember 654 - * this eviction and free up the allocation 656 + * this eviction and free up the allocation. 657 + * The fence will be saved in the first free slot or in the slot 658 + * already used to store a fence from the same context. Since 659 + * drivers can't use more than TTM_NUM_MOVE_FENCES contexts for 660 + * evictions we should always find a slot to use. 655 661 */ 656 - spin_lock(&from->move_lock); 657 - if (!from->move || dma_fence_is_later(fence, from->move)) { 658 - dma_fence_put(from->move); 659 - from->move = dma_fence_get(fence); 662 + spin_lock(&from->eviction_lock); 663 + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) { 664 + tmp = from->eviction_fences[i]; 665 + if (!tmp) 666 + break; 667 + if (fence->context != tmp->context) 668 + continue; 669 + if (dma_fence_is_later(fence, tmp)) { 670 + dma_fence_put(tmp); 671 + break; 672 + } 673 + goto unlock; 660 674 } 661 - spin_unlock(&from->move_lock); 675 + if (i < TTM_NUM_MOVE_FENCES) { 676 + from->eviction_fences[i] = dma_fence_get(fence); 677 + } else { 678 + WARN(1, "not enough fence slots for all fence contexts"); 679 + spin_unlock(&from->eviction_lock); 680 + dma_fence_wait(fence, false); 681 + goto end; 682 + } 662 683 684 + unlock: 685 + spin_unlock(&from->eviction_lock); 686 + end: 663 687 ttm_resource_free(bo, &bo->resource); 664 688 } 665 689
+19 -12
drivers/gpu/drm/ttm/ttm_resource.c
··· 524 524 { 525 525 unsigned i; 526 526 527 - spin_lock_init(&man->move_lock); 528 527 man->bdev = bdev; 529 528 man->size = size; 530 529 man->usage = 0; 531 530 532 531 for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) 533 532 INIT_LIST_HEAD(&man->lru[i]); 534 - man->move = NULL; 533 + spin_lock_init(&man->eviction_lock); 534 + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) 535 + man->eviction_fences[i] = NULL; 535 536 } 536 537 EXPORT_SYMBOL(ttm_resource_manager_init); 537 538 ··· 553 552 .no_wait_gpu = false, 554 553 }; 555 554 struct dma_fence *fence; 556 - int ret; 555 + int ret, i; 557 556 558 557 do { 559 558 ret = ttm_bo_evict_first(bdev, man, &ctx); ··· 563 562 if (ret && ret != -ENOENT) 564 563 return ret; 565 564 566 - spin_lock(&man->move_lock); 567 - fence = dma_fence_get(man->move); 568 - spin_unlock(&man->move_lock); 565 + ret = 0; 569 566 570 - if (fence) { 571 - ret = dma_fence_wait(fence, false); 572 - dma_fence_put(fence); 573 - if (ret) 574 - return ret; 567 + spin_lock(&man->eviction_lock); 568 + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) { 569 + fence = man->eviction_fences[i]; 570 + if (fence && !dma_fence_is_signaled(fence)) { 571 + dma_fence_get(fence); 572 + spin_unlock(&man->eviction_lock); 573 + ret = dma_fence_wait(fence, false); 574 + dma_fence_put(fence); 575 + if (ret) 576 + return ret; 577 + spin_lock(&man->eviction_lock); 578 + } 575 579 } 580 + spin_unlock(&man->eviction_lock); 576 581 577 - return 0; 582 + return ret; 578 583 } 579 584 EXPORT_SYMBOL(ttm_resource_manager_evict_all); 580 585
+21 -8
include/drm/ttm/ttm_resource.h
··· 52 52 struct scatterlist; 53 53 54 54 /** 55 + * define TTM_NUM_MOVE_FENCES - How many entities can be used for evictions 56 + * 57 + * Pipelined evictions can be spread on multiple entities. This 58 + * is the max number of entities that can be used by the driver 59 + * for that purpose. 60 + */ 61 + #define TTM_NUM_MOVE_FENCES 8 62 + 63 + /** 55 64 * enum ttm_lru_item_type - enumerate ttm_lru_item subclasses 56 65 */ 57 66 enum ttm_lru_item_type { ··· 190 181 * @size: Size of the managed region. 191 182 * @bdev: ttm device this manager belongs to 192 183 * @func: structure pointer implementing the range manager. See above 193 - * @move_lock: lock for move fence 194 - * @move: The fence of the last pipelined move operation. 184 + * @eviction_lock: lock for eviction fences 185 + * @eviction_fences: The fences of the last pipelined move operation. 195 186 * @lru: The lru list for this memory type. 196 187 * 197 188 * This structure is used to identify and manage memory types for a device. ··· 205 196 struct ttm_device *bdev; 206 197 uint64_t size; 207 198 const struct ttm_resource_manager_func *func; 208 - spinlock_t move_lock; 209 199 210 - /* 211 - * Protected by @move_lock. 200 + /* This is very similar to a dma_resv object, but locking rules make 201 + * it difficult to use one in this context. 212 202 */ 213 - struct dma_fence *move; 203 + spinlock_t eviction_lock; 204 + struct dma_fence *eviction_fences[TTM_NUM_MOVE_FENCES]; 214 205 215 206 /* 216 207 * Protected by the bdev->lru_lock. ··· 431 422 static inline void 432 423 ttm_resource_manager_cleanup(struct ttm_resource_manager *man) 433 424 { 434 - dma_fence_put(man->move); 435 - man->move = NULL; 425 + int i; 426 + 427 + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) { 428 + dma_fence_put(man->eviction_fences[i]); 429 + man->eviction_fences[i] = NULL; 430 + } 436 431 } 437 432 438 433 void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);