drm/amdgpu: statically assign gart windows to ttm entities

+3 -3

drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c

··· 742 742 * translation. Avoid this by doing the invalidation from the SDMA 743 743 * itself at least for GART. 744 744 */ 745 - mutex_lock(&adev->mman.gtt_window_lock); 745 + mutex_lock(&adev->mman.default_entity.lock); 746 746 r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.default_entity.base, 747 747 AMDGPU_FENCE_OWNER_UNDEFINED, 748 748 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, ··· 755 755 job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; 756 756 amdgpu_ring_pad_ib(ring, &job->ibs[0]); 757 757 fence = amdgpu_job_submit(job); 758 - mutex_unlock(&adev->mman.gtt_window_lock); 758 + mutex_unlock(&adev->mman.default_entity.lock); 759 759 760 760 dma_fence_wait(fence, false); 761 761 dma_fence_put(fence); ··· 763 763 return; 764 764 765 765 error_alloc: 766 - mutex_unlock(&adev->mman.gtt_window_lock); 766 + mutex_unlock(&adev->mman.default_entity.lock); 767 767 dev_err(adev->dev, "Error flushing GPU TLB using the SDMA (%d)!\n", r); 768 768 } 769 769

+45 -19

drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

··· 228 228 229 229 *size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset); 230 230 231 - *addr = adev->gmc.gart_start; 232 - *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 233 - AMDGPU_GPU_PAGE_SIZE; 231 + *addr = amdgpu_compute_gart_address(&adev->gmc, entity, window); 234 232 *addr += offset; 235 233 236 234 num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); ··· 246 248 src_addr += job->ibs[0].gpu_addr; 247 249 248 250 dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); 249 - dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; 251 + dst_addr += (entity->gart_window_offs[window] >> AMDGPU_GPU_PAGE_SHIFT) * 8; 250 252 amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, 251 253 dst_addr, num_bytes, 0); 252 254 ··· 311 313 amdgpu_res_first(src->mem, src->offset, size, &src_mm); 312 314 amdgpu_res_first(dst->mem, dst->offset, size, &dst_mm); 313 315 314 - mutex_lock(&adev->mman.gtt_window_lock); 316 + mutex_lock(&entity->lock); 315 317 while (src_mm.remaining) { 316 318 uint64_t from, to, cur_size, tiling_flags; 317 319 uint32_t num_type, data_format, max_com, write_compress_disable; ··· 366 368 amdgpu_res_next(&dst_mm, cur_size); 367 369 } 368 370 error: 369 - mutex_unlock(&adev->mman.gtt_window_lock); 371 + mutex_unlock(&entity->lock); 370 372 *f = fence; 371 373 return r; 372 374 } ··· 1578 1580 if (r) 1579 1581 goto out; 1580 1582 1581 - mutex_lock(&adev->mman.gtt_window_lock); 1583 + mutex_lock(&adev->mman.default_entity.lock); 1582 1584 amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm); 1583 1585 src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) + 1584 1586 src_mm.start; ··· 1590 1592 PAGE_SIZE, 0); 1591 1593 1592 1594 fence = amdgpu_ttm_job_submit(adev, job, num_dw); 1593 - mutex_unlock(&adev->mman.gtt_window_lock); 1595 + mutex_unlock(&adev->mman.default_entity.lock); 1594 1596 1595 1597 if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout)) 1596 1598 r = -ETIMEDOUT; ··· 2011 2013 adev->rmmio_remap.bo = NULL; 2012 2014 } 2013 2015 2016 + static int amdgpu_ttm_buffer_entity_init(struct amdgpu_ttm_buffer_entity *entity, 2017 + int starting_gart_window, 2018 + u32 num_gart_windows) 2019 + { 2020 + int i; 2021 + 2022 + mutex_init(&entity->lock); 2023 + 2024 + if (ARRAY_SIZE(entity->gart_window_offs) < num_gart_windows) 2025 + return starting_gart_window; 2026 + 2027 + for (i = 0; i < num_gart_windows; i++) { 2028 + entity->gart_window_offs[i] = 2029 + (u64)starting_gart_window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 2030 + AMDGPU_GPU_PAGE_SIZE; 2031 + starting_gart_window++; 2032 + } 2033 + 2034 + return starting_gart_window; 2035 + } 2036 + 2014 2037 /* 2015 2038 * amdgpu_ttm_init - Init the memory management (ttm) as well as various 2016 2039 * gtt/vram related fields. ··· 2045 2026 { 2046 2027 uint64_t gtt_size; 2047 2028 int r; 2048 - 2049 - mutex_init(&adev->mman.gtt_window_lock); 2050 2029 2051 2030 dma_set_max_seg_size(adev->dev, UINT_MAX); 2052 2031 /* No others user of address space so set it to 0 */ ··· 2319 2302 void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) 2320 2303 { 2321 2304 struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); 2305 + u32 used_windows; 2322 2306 uint64_t size; 2323 2307 int r; 2324 2308 ··· 2363 2345 drm_sched_entity_destroy(&adev->mman.clear_entity.base); 2364 2346 goto error_free_entity; 2365 2347 } 2348 + 2349 + /* Statically assign GART windows to each entity. */ 2350 + used_windows = amdgpu_ttm_buffer_entity_init(&adev->mman.default_entity, 0, 0); 2351 + used_windows = amdgpu_ttm_buffer_entity_init(&adev->mman.move_entity, 2352 + used_windows, 2); 2353 + used_windows = amdgpu_ttm_buffer_entity_init(&adev->mman.clear_entity, 2354 + used_windows, 1); 2366 2355 } else { 2367 2356 drm_sched_entity_destroy(&adev->mman.default_entity.base); 2368 2357 drm_sched_entity_destroy(&adev->mman.clear_entity.base); ··· 2528 2503 struct dma_fence **fence) 2529 2504 { 2530 2505 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 2506 + struct amdgpu_ttm_buffer_entity *entity; 2531 2507 struct amdgpu_res_cursor cursor; 2532 2508 u64 addr; 2533 2509 int r = 0; ··· 2539 2513 if (!fence) 2540 2514 return -EINVAL; 2541 2515 2516 + entity = &adev->mman.clear_entity; 2542 2517 *fence = dma_fence_get_stub(); 2543 2518 2544 2519 amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); 2545 2520 2546 - mutex_lock(&adev->mman.gtt_window_lock); 2521 + mutex_lock(&entity->lock); 2547 2522 while (cursor.remaining) { 2548 2523 struct dma_fence *next = NULL; 2549 2524 u64 size; ··· 2557 2530 /* Never clear more than 256MiB at once to avoid timeouts */ 2558 2531 size = min(cursor.size, 256ULL << 20); 2559 2532 2560 - r = amdgpu_ttm_map_buffer(&adev->mman.clear_entity, 2561 - &bo->tbo, bo->tbo.resource, &cursor, 2562 - 1, false, &size, &addr); 2533 + r = amdgpu_ttm_map_buffer(entity, &bo->tbo, bo->tbo.resource, &cursor, 2534 + 0, false, &size, &addr); 2563 2535 if (r) 2564 2536 goto err; 2565 2537 2566 - r = amdgpu_ttm_fill_mem(adev, &adev->mman.clear_entity, 0, addr, size, resv, 2538 + r = amdgpu_ttm_fill_mem(adev, entity, 0, addr, size, resv, 2567 2539 &next, true, 2568 2540 AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER); 2569 2541 if (r) ··· 2574 2548 amdgpu_res_next(&cursor, size); 2575 2549 } 2576 2550 err: 2577 - mutex_unlock(&adev->mman.gtt_window_lock); 2551 + mutex_unlock(&entity->lock); 2578 2552 2579 2553 return r; 2580 2554 } ··· 2599 2573 2600 2574 amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &dst); 2601 2575 2602 - mutex_lock(&adev->mman.gtt_window_lock); 2576 + mutex_lock(&entity->lock); 2603 2577 while (dst.remaining) { 2604 2578 struct dma_fence *next; 2605 2579 uint64_t cur_size, to; ··· 2608 2582 cur_size = min(dst.size, 256ULL << 20); 2609 2583 2610 2584 r = amdgpu_ttm_map_buffer(entity, &bo->tbo, bo->tbo.resource, &dst, 2611 - 1, false, &cur_size, &to); 2585 + 0, false, &cur_size, &to); 2612 2586 if (r) 2613 2587 goto error; 2614 2588 ··· 2624 2598 amdgpu_res_next(&dst, cur_size); 2625 2599 } 2626 2600 error: 2627 - mutex_unlock(&adev->mman.gtt_window_lock); 2601 + mutex_unlock(&entity->lock); 2628 2602 if (f) 2629 2603 *f = dma_fence_get(fence); 2630 2604 dma_fence_put(fence);

+18 -3

drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

··· 29 29 #include <drm/ttm/ttm_placement.h> 30 30 #include "amdgpu_vram_mgr.h" 31 31 #include "amdgpu_hmm.h" 32 + #include "amdgpu_gmc.h" 32 33 33 34 #define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) 34 35 #define AMDGPU_PL_GWS (TTM_PL_PRIV + 1) ··· 40 39 #define __AMDGPU_PL_NUM (TTM_PL_PRIV + 6) 41 40 42 41 #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 43 - #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 42 + #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 3 44 43 45 44 extern const struct attribute_group amdgpu_vram_mgr_attr_group; 46 45 extern const struct attribute_group amdgpu_gtt_mgr_attr_group; ··· 55 54 56 55 struct amdgpu_ttm_buffer_entity { 57 56 struct drm_sched_entity base; 57 + struct mutex lock; 58 + u64 gart_window_offs[2]; 58 59 }; 59 60 60 61 struct amdgpu_mman { ··· 70 67 struct amdgpu_ring *buffer_funcs_ring; 71 68 bool buffer_funcs_enabled; 72 69 73 - struct mutex gtt_window_lock; 74 - 70 + /* @default_entity: for workarounds, has no gart windows */ 75 71 struct amdgpu_ttm_buffer_entity default_entity; 76 72 struct amdgpu_ttm_buffer_entity clear_entity; 77 73 struct amdgpu_ttm_buffer_entity move_entity; ··· 206 204 return -EPERM; 207 205 } 208 206 #endif 207 + 208 + /** 209 + * amdgpu_compute_gart_address() - Returns GART address of an entity's window 210 + * @gmc: The &struct amdgpu_gmc instance to use 211 + * @entity: The &struct amdgpu_ttm_buffer_entity owning the GART window 212 + * @index: The window to use (must be 0 or 1) 213 + */ 214 + static inline u64 amdgpu_compute_gart_address(struct amdgpu_gmc *gmc, 215 + struct amdgpu_ttm_buffer_entity *entity, 216 + int index) 217 + { 218 + return gmc->gart_start + entity->gart_window_offs[index]; 219 + } 209 220 210 221 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct amdgpu_hmm_range *range); 211 222 int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,

+6 -6

drivers/gpu/drm/amd/amdkfd/kfd_migrate.c

··· 59 59 void *cpu_addr; 60 60 int r; 61 61 62 - /* use gart window 0 */ 63 - *gart_addr = adev->gmc.gart_start; 62 + *gart_addr = amdgpu_compute_gart_address(&adev->gmc, entity, 0); 64 63 65 64 num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); 66 65 num_bytes = npages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE; ··· 77 78 src_addr += job->ibs[0].gpu_addr; 78 79 79 80 dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); 81 + dst_addr += (entity->gart_window_offs[0] >> AMDGPU_GPU_PAGE_SHIFT) * 8; 80 82 amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, 81 83 dst_addr, num_bytes, 0); 82 84 ··· 116 116 * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for 117 117 * the last sdma finish fence which is returned to check copy memory is done. 118 118 * 119 - * Context: Process context, takes and releases gtt_window_lock 119 + * Context: Process context 120 120 * 121 121 * Return: 122 122 * 0 - OK, otherwise error code ··· 136 136 u64 size; 137 137 int r; 138 138 139 - entity = &adev->mman.default_entity; 139 + entity = &adev->mman.move_entity; 140 140 141 - mutex_lock(&adev->mman.gtt_window_lock); 141 + mutex_lock(&entity->lock); 142 142 143 143 while (npages) { 144 144 size = min(GTT_MAX_PAGES, npages); ··· 175 175 } 176 176 177 177 out_unlock: 178 - mutex_unlock(&adev->mman.gtt_window_lock); 178 + mutex_unlock(&entity->lock); 179 179 180 180 return r; 181 181 }

Configure Feed

Configure Feed