Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amdkfd: Enable GFX11 usermode queue oversubscription

Starting with GFX11, MES requires wptr BOs to be GTT allocated/mapped to
GART for usermode queues in order to support oversubscription. In the
case that work is submitted to an unmapped queue, MES must have a GART
wptr address to determine whether the queue should be mapped.

This change is accompanied with changes in MES and is applicable for
MES_API_VERSION >= 2.

v3:
- Use amdgpu_vm_bo_lookup_mapping for wptr_bo mapping lookup
- Move wptr_bo refcount increment to amdgpu_amdkfd_map_gtt_bo_to_gart
- Remove list_del_init from amdgpu_amdkfd_map_gtt_bo_to_gart
- Cleanup/fix create_queue wptr_bo error handling
v4:
- Add MES version shift/mask defines to amdgpu_mes.h
- Change version check from MES_VERSION to MES_API_VERSION
- Add check in kfd_ioctl_create_queue before wptr bo pin/GART map to
ensure bo is a single page.

Signed-off-by: Graham Sider <Graham.Sider@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Philip Yang <Philip.Yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Graham Sider and committed by
Alex Deucher
e77a541f ff83e6e7

+125 -8
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
··· 286 286 void **kptr, uint64_t *size); 287 287 void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem); 288 288 289 + int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo); 290 + 289 291 int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, 290 292 struct dma_fence **ef); 291 293 int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
+48
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
··· 2113 2113 return ret; 2114 2114 } 2115 2115 2116 + /** 2117 + * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count 2118 + * @adev: Device to which allocated BO belongs 2119 + * @bo: Buffer object to be mapped 2120 + * 2121 + * Before return, bo reference count is incremented. To release the reference and unpin/ 2122 + * unmap the BO, call amdgpu_amdkfd_free_gtt_mem. 2123 + */ 2124 + int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo) 2125 + { 2126 + int ret; 2127 + 2128 + ret = amdgpu_bo_reserve(bo, true); 2129 + if (ret) { 2130 + pr_err("Failed to reserve bo. ret %d\n", ret); 2131 + goto err_reserve_bo_failed; 2132 + } 2133 + 2134 + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); 2135 + if (ret) { 2136 + pr_err("Failed to pin bo. ret %d\n", ret); 2137 + goto err_pin_bo_failed; 2138 + } 2139 + 2140 + ret = amdgpu_ttm_alloc_gart(&bo->tbo); 2141 + if (ret) { 2142 + pr_err("Failed to bind bo to GART. ret %d\n", ret); 2143 + goto err_map_bo_gart_failed; 2144 + } 2145 + 2146 + amdgpu_amdkfd_remove_eviction_fence( 2147 + bo, bo->kfd_bo->process_info->eviction_fence); 2148 + 2149 + amdgpu_bo_unreserve(bo); 2150 + 2151 + bo = amdgpu_bo_ref(bo); 2152 + 2153 + return 0; 2154 + 2155 + err_map_bo_gart_failed: 2156 + amdgpu_bo_unpin(bo); 2157 + err_pin_bo_failed: 2158 + amdgpu_bo_unreserve(bo); 2159 + err_reserve_bo_failed: 2160 + 2161 + return ret; 2162 + } 2163 + 2116 2164 /** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Map a GTT BO for kernel CPU access 2117 2165 * 2118 2166 * @mem: Buffer object to be mapped for CPU access
+7
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
··· 33 33 #define AMDGPU_MES_MAX_GFX_PIPES 2 34 34 #define AMDGPU_MES_MAX_SDMA_PIPES 2 35 35 36 + #define AMDGPU_MES_API_VERSION_SHIFT 12 37 + #define AMDGPU_MES_FEAT_VERSION_SHIFT 24 38 + 39 + #define AMDGPU_MES_VERSION_MASK 0x00000fff 40 + #define AMDGPU_MES_API_VERSION_MASK 0x00fff000 41 + #define AMDGPU_MES_FEAT_VERSION_MASK 0xff000000 42 + 36 43 enum amdgpu_mes_priority_level { 37 44 AMDGPU_MES_PRIORITY_LEVEL_LOW = 0, 38 45 AMDGPU_MES_PRIORITY_LEVEL_NORMAL = 1,
+43 -2
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
··· 299 299 struct kfd_process_device *pdd; 300 300 struct queue_properties q_properties; 301 301 uint32_t doorbell_offset_in_process = 0; 302 + struct amdgpu_bo *wptr_bo = NULL; 302 303 303 304 memset(&q_properties, 0, sizeof(struct queue_properties)); 304 305 ··· 327 326 goto err_bind_process; 328 327 } 329 328 329 + /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work 330 + * on unmapped queues for usermode queue oversubscription (no aggregated doorbell) 331 + */ 332 + if (dev->shared_resources.enable_mes && 333 + ((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) 334 + >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) { 335 + struct amdgpu_bo_va_mapping *wptr_mapping; 336 + struct amdgpu_vm *wptr_vm; 337 + 338 + wptr_vm = drm_priv_to_vm(pdd->drm_priv); 339 + err = amdgpu_bo_reserve(wptr_vm->root.bo, false); 340 + if (err) 341 + goto err_wptr_map_gart; 342 + 343 + wptr_mapping = amdgpu_vm_bo_lookup_mapping( 344 + wptr_vm, args->write_pointer_address >> PAGE_SHIFT); 345 + amdgpu_bo_unreserve(wptr_vm->root.bo); 346 + if (!wptr_mapping) { 347 + pr_err("Failed to lookup wptr bo\n"); 348 + err = -EINVAL; 349 + goto err_wptr_map_gart; 350 + } 351 + 352 + wptr_bo = wptr_mapping->bo_va->base.bo; 353 + if (wptr_bo->tbo.base.size > PAGE_SIZE) { 354 + pr_err("Requested GART mapping for wptr bo larger than one page\n"); 355 + err = -EINVAL; 356 + goto err_wptr_map_gart; 357 + } 358 + 359 + err = amdgpu_amdkfd_map_gtt_bo_to_gart(dev->adev, wptr_bo); 360 + if (err) { 361 + pr_err("Failed to map wptr bo to GART\n"); 362 + goto err_wptr_map_gart; 363 + } 364 + } 365 + 330 366 pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n", 331 367 p->pasid, 332 368 dev->id); 333 369 334 - err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, NULL, NULL, NULL, 335 - &doorbell_offset_in_process); 370 + err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, wptr_bo, 371 + NULL, NULL, NULL, &doorbell_offset_in_process); 336 372 if (err != 0) 337 373 goto err_create_queue; 338 374 ··· 401 363 return 0; 402 364 403 365 err_create_queue: 366 + if (wptr_bo) 367 + amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo); 368 + err_wptr_map_gart: 404 369 err_bind_process: 405 370 err_pdd: 406 371 mutex_unlock(&p->mutex);
+8 -1
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
··· 177 177 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 178 178 struct mes_add_queue_input queue_input; 179 179 int r, queue_type; 180 + uint64_t wptr_addr_off; 180 181 181 182 if (dqm->is_hws_hang) 182 183 return -EIO; ··· 197 196 AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 198 197 queue_input.doorbell_offset = q->properties.doorbell_off; 199 198 queue_input.mqd_addr = q->gart_mqd_addr; 200 - queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; 199 + 200 + if (q->wptr_bo) { 201 + wptr_addr_off = (uint64_t)q->properties.write_ptr - (uint64_t)q->wptr_bo->kfd_bo->va; 202 + queue_input.wptr_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off; 203 + } else 204 + queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; 205 + 201 206 queue_input.paging = false; 202 207 queue_input.tba_addr = qpd->tba_addr; 203 208 queue_input.tma_addr = qpd->tma_addr;
+2
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
··· 377 377 m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8); 378 378 m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); 379 379 m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); 380 + m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr); 381 + m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr); 380 382 m->sdmax_rlcx_doorbell_offset = 381 383 q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; 382 384
+3
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
··· 571 571 void *gang_ctx_bo; 572 572 uint64_t gang_ctx_gpu_addr; 573 573 void *gang_ctx_cpu_ptr; 574 + 575 + struct amdgpu_bo *wptr_bo; 574 576 }; 575 577 576 578 enum KFD_MQD_TYPE { ··· 1208 1206 struct file *f, 1209 1207 struct queue_properties *properties, 1210 1208 unsigned int *qid, 1209 + struct amdgpu_bo *wptr_bo, 1211 1210 const struct kfd_criu_queue_priv_data *q_data, 1212 1211 const void *restore_mqd, 1213 1212 const void *restore_ctl_stack,
+12 -5
drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
··· 180 180 static int init_user_queue(struct process_queue_manager *pqm, 181 181 struct kfd_dev *dev, struct queue **q, 182 182 struct queue_properties *q_properties, 183 - struct file *f, unsigned int qid) 183 + struct file *f, struct amdgpu_bo *wptr_bo, 184 + unsigned int qid) 184 185 { 185 186 int retval; 186 187 ··· 211 210 goto cleanup; 212 211 } 213 212 memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE); 213 + (*q)->wptr_bo = wptr_bo; 214 214 } 215 215 216 216 pr_debug("PQM After init queue"); ··· 228 226 struct file *f, 229 227 struct queue_properties *properties, 230 228 unsigned int *qid, 229 + struct amdgpu_bo *wptr_bo, 231 230 const struct kfd_criu_queue_priv_data *q_data, 232 231 const void *restore_mqd, 233 232 const void *restore_ctl_stack, ··· 291 288 * allocate_sdma_queue() in create_queue() has the 292 289 * corresponding check logic. 293 290 */ 294 - retval = init_user_queue(pqm, dev, &q, properties, f, *qid); 291 + retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid); 295 292 if (retval != 0) 296 293 goto err_create_queue; 297 294 pqn->q = q; ··· 312 309 goto err_create_queue; 313 310 } 314 311 315 - retval = init_user_queue(pqm, dev, &q, properties, f, *qid); 312 + retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid); 316 313 if (retval != 0) 317 314 goto err_create_queue; 318 315 pqn->q = q; ··· 438 435 pdd->qpd.num_gws = 0; 439 436 } 440 437 441 - if (dev->shared_resources.enable_mes) 438 + if (dev->shared_resources.enable_mes) { 442 439 amdgpu_amdkfd_free_gtt_mem(dev->adev, 443 440 pqn->q->gang_ctx_bo); 441 + if (pqn->q->wptr_bo) 442 + amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo); 443 + 444 + } 444 445 kfd_procfs_del_queue(pqn->q); 445 446 uninit_queue(pqn->q); 446 447 } ··· 851 844 852 845 print_queue_properties(&qp); 853 846 854 - ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, q_data, mqd, ctl_stack, 847 + ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack, 855 848 NULL); 856 849 if (ret) { 857 850 pr_err("Failed to create new queue err:%d\n", ret);