Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amdgpu: add mes unmap legacy queue routine

For mes kiq has been taken over by mes sched, drv can't directly
use mes kiq to unmap queues. drv has to use mes sched api to
unmap legacy queue.

Signed-off-by: Jack Xiao <Jack.Xiao@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Jack Xiao and committed by
Alex Deucher
18ee4ce6 14ab2924

+527 -152
+5 -3
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
··· 367 367 368 368 /* create MQD for KIQ */ 369 369 ring = &adev->gfx.kiq.ring; 370 - if (!ring->mqd_obj) { 370 + if (!adev->enable_mes_kiq && !ring->mqd_obj) { 371 371 /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must 372 372 * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD 373 373 * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for ··· 464 464 { 465 465 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 466 466 struct amdgpu_ring *kiq_ring = &kiq->ring; 467 - int i, r; 467 + int i, r = 0; 468 468 469 469 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 470 470 return -EINVAL; ··· 479 479 for (i = 0; i < adev->gfx.num_compute_rings; i++) 480 480 kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i], 481 481 RESET_QUEUES, 0, 0); 482 - r = amdgpu_ring_test_helper(kiq_ring); 482 + 483 + if (adev->gfx.kiq.ring.sched.ready) 484 + r = amdgpu_ring_test_helper(kiq_ring); 483 485 spin_unlock(&adev->gfx.kiq.ring_lock); 484 486 485 487 return r;
+213 -124
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
··· 150 150 idr_init(&adev->mes.queue_id_idr); 151 151 ida_init(&adev->mes.doorbell_ida); 152 152 spin_lock_init(&adev->mes.queue_id_lock); 153 - mutex_init(&adev->mes.mutex); 153 + mutex_init(&adev->mes.mutex_hidden); 154 154 155 155 adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; 156 156 adev->mes.vmid_mask_mmhub = 0xffffff00; ··· 166 166 for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) 167 167 adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe; 168 168 169 - for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) 170 - adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc; 169 + for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) { 170 + if (adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(6, 0, 0)) 171 + adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc; 172 + else 173 + adev->mes.sdma_hqd_mask[i] = 0xfc; 174 + } 171 175 172 176 for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) 173 177 adev->mes.agreegated_doorbells[i] = 0xffffffff; ··· 211 207 idr_destroy(&adev->mes.gang_id_idr); 212 208 idr_destroy(&adev->mes.queue_id_idr); 213 209 ida_destroy(&adev->mes.doorbell_ida); 214 - mutex_destroy(&adev->mes.mutex); 210 + mutex_destroy(&adev->mes.mutex_hidden); 215 211 return r; 216 212 } 217 213 ··· 223 219 idr_destroy(&adev->mes.gang_id_idr); 224 220 idr_destroy(&adev->mes.queue_id_idr); 225 221 ida_destroy(&adev->mes.doorbell_ida); 226 - mutex_destroy(&adev->mes.mutex); 222 + mutex_destroy(&adev->mes.mutex_hidden); 223 + } 224 + 225 + static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q) 226 + { 227 + amdgpu_bo_free_kernel(&q->mqd_obj, 228 + &q->mqd_gpu_addr, 229 + &q->mqd_cpu_ptr); 227 230 } 228 231 229 232 int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid, ··· 239 228 struct amdgpu_mes_process *process; 240 229 int r; 241 230 242 - mutex_lock(&adev->mes.mutex); 243 - 244 231 /* allocate the mes process buffer */ 245 232 process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL); 246 233 if (!process) { 247 234 DRM_ERROR("no more memory to create mes process\n"); 248 - mutex_unlock(&adev->mes.mutex); 249 235 return -ENOMEM; 250 236 } 251 237 ··· 252 244 if (!process->doorbell_bitmap) { 253 245 DRM_ERROR("failed to allocate doorbell bitmap\n"); 254 246 kfree(process); 255 - mutex_unlock(&adev->mes.mutex); 256 247 return -ENOMEM; 257 - } 258 - 259 - /* add the mes process to idr list */ 260 - r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1, 261 - GFP_KERNEL); 262 - if (r < 0) { 263 - DRM_ERROR("failed to lock pasid=%d\n", pasid); 264 - goto clean_up_memory; 265 248 } 266 249 267 250 /* allocate the process context bo and map it */ ··· 263 264 &process->proc_ctx_cpu_ptr); 264 265 if (r) { 265 266 DRM_ERROR("failed to allocate process context bo\n"); 266 - goto clean_up_pasid; 267 + goto clean_up_memory; 267 268 } 268 269 memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); 270 + 271 + /* 272 + * Avoid taking any other locks under MES lock to avoid circular 273 + * lock dependencies. 274 + */ 275 + amdgpu_mes_lock(&adev->mes); 276 + 277 + /* add the mes process to idr list */ 278 + r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1, 279 + GFP_KERNEL); 280 + if (r < 0) { 281 + DRM_ERROR("failed to lock pasid=%d\n", pasid); 282 + goto clean_up_ctx; 283 + } 269 284 270 285 /* allocate the starting doorbell index of the process */ 271 286 r = amdgpu_mes_alloc_process_doorbells(adev, &process->doorbell_index); 272 287 if (r < 0) { 273 288 DRM_ERROR("failed to allocate doorbell for process\n"); 274 - goto clean_up_ctx; 289 + goto clean_up_pasid; 275 290 } 276 291 277 292 DRM_DEBUG("process doorbell index = %d\n", process->doorbell_index); ··· 296 283 process->process_quantum = adev->mes.default_process_quantum; 297 284 process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo); 298 285 299 - mutex_unlock(&adev->mes.mutex); 286 + amdgpu_mes_unlock(&adev->mes); 300 287 return 0; 301 288 289 + clean_up_pasid: 290 + idr_remove(&adev->mes.pasid_idr, pasid); 291 + amdgpu_mes_unlock(&adev->mes); 302 292 clean_up_ctx: 303 293 amdgpu_bo_free_kernel(&process->proc_ctx_bo, 304 294 &process->proc_ctx_gpu_addr, 305 295 &process->proc_ctx_cpu_ptr); 306 - clean_up_pasid: 307 - idr_remove(&adev->mes.pasid_idr, pasid); 308 296 clean_up_memory: 309 297 kfree(process->doorbell_bitmap); 310 298 kfree(process); 311 - mutex_unlock(&adev->mes.mutex); 312 299 return r; 313 300 } 314 301 ··· 321 308 unsigned long flags; 322 309 int r; 323 310 324 - mutex_lock(&adev->mes.mutex); 311 + /* 312 + * Avoid taking any other locks under MES lock to avoid circular 313 + * lock dependencies. 314 + */ 315 + amdgpu_mes_lock(&adev->mes); 325 316 326 317 process = idr_find(&adev->mes.pasid_idr, pasid); 327 318 if (!process) { 328 319 DRM_WARN("pasid %d doesn't exist\n", pasid); 329 - mutex_unlock(&adev->mes.mutex); 320 + amdgpu_mes_unlock(&adev->mes); 330 321 return; 331 322 } 332 323 333 - /* free all gangs in the process */ 324 + /* Remove all queues from hardware */ 334 325 list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) { 335 - /* free all queues in the gang */ 336 326 list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) { 337 327 spin_lock_irqsave(&adev->mes.queue_id_lock, flags); 338 328 idr_remove(&adev->mes.queue_id_idr, queue->queue_id); ··· 348 332 &queue_input); 349 333 if (r) 350 334 DRM_WARN("failed to remove hardware queue\n"); 351 - 352 - list_del(&queue->list); 353 - kfree(queue); 354 335 } 355 336 356 337 idr_remove(&adev->mes.gang_id_idr, gang->gang_id); 338 + } 339 + 340 + amdgpu_mes_free_process_doorbells(adev, process->doorbell_index); 341 + idr_remove(&adev->mes.pasid_idr, pasid); 342 + amdgpu_mes_unlock(&adev->mes); 343 + 344 + /* free all memory allocated by the process */ 345 + list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) { 346 + /* free all queues in the gang */ 347 + list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) { 348 + amdgpu_mes_queue_free_mqd(queue); 349 + list_del(&queue->list); 350 + kfree(queue); 351 + } 357 352 amdgpu_bo_free_kernel(&gang->gang_ctx_bo, 358 353 &gang->gang_ctx_gpu_addr, 359 354 &gang->gang_ctx_cpu_ptr); 360 355 list_del(&gang->list); 361 356 kfree(gang); 357 + 362 358 } 363 - 364 - amdgpu_mes_free_process_doorbells(adev, process->doorbell_index); 365 - 366 - idr_remove(&adev->mes.pasid_idr, pasid); 367 359 amdgpu_bo_free_kernel(&process->proc_ctx_bo, 368 360 &process->proc_ctx_gpu_addr, 369 361 &process->proc_ctx_cpu_ptr); 370 362 kfree(process->doorbell_bitmap); 371 363 kfree(process); 372 - 373 - mutex_unlock(&adev->mes.mutex); 374 364 } 375 365 376 366 int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid, ··· 387 365 struct amdgpu_mes_gang *gang; 388 366 int r; 389 367 390 - mutex_lock(&adev->mes.mutex); 391 - 392 - process = idr_find(&adev->mes.pasid_idr, pasid); 393 - if (!process) { 394 - DRM_ERROR("pasid %d doesn't exist\n", pasid); 395 - mutex_unlock(&adev->mes.mutex); 396 - return -EINVAL; 397 - } 398 - 399 368 /* allocate the mes gang buffer */ 400 369 gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL); 401 370 if (!gang) { 402 - mutex_unlock(&adev->mes.mutex); 403 371 return -ENOMEM; 404 372 } 405 - 406 - /* add the mes gang to idr list */ 407 - r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0, 408 - GFP_KERNEL); 409 - if (r < 0) { 410 - kfree(gang); 411 - mutex_unlock(&adev->mes.mutex); 412 - return r; 413 - } 414 - 415 - gang->gang_id = r; 416 - *gang_id = r; 417 373 418 374 /* allocate the gang context bo and map it to cpu space */ 419 375 r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE, PAGE_SIZE, ··· 401 401 &gang->gang_ctx_cpu_ptr); 402 402 if (r) { 403 403 DRM_ERROR("failed to allocate process context bo\n"); 404 - goto clean_up; 404 + goto clean_up_mem; 405 405 } 406 406 memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE); 407 + 408 + /* 409 + * Avoid taking any other locks under MES lock to avoid circular 410 + * lock dependencies. 411 + */ 412 + amdgpu_mes_lock(&adev->mes); 413 + 414 + process = idr_find(&adev->mes.pasid_idr, pasid); 415 + if (!process) { 416 + DRM_ERROR("pasid %d doesn't exist\n", pasid); 417 + r = -EINVAL; 418 + goto clean_up_ctx; 419 + } 420 + 421 + /* add the mes gang to idr list */ 422 + r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0, 423 + GFP_KERNEL); 424 + if (r < 0) { 425 + DRM_ERROR("failed to allocate idr for gang\n"); 426 + goto clean_up_ctx; 427 + } 428 + 429 + gang->gang_id = r; 430 + *gang_id = r; 407 431 408 432 INIT_LIST_HEAD(&gang->queue_list); 409 433 gang->process = process; ··· 438 414 gang->inprocess_gang_priority = gprops->inprocess_gang_priority; 439 415 list_add_tail(&gang->list, &process->gang_list); 440 416 441 - mutex_unlock(&adev->mes.mutex); 417 + amdgpu_mes_unlock(&adev->mes); 442 418 return 0; 443 419 444 - clean_up: 445 - idr_remove(&adev->mes.gang_id_idr, gang->gang_id); 420 + clean_up_ctx: 421 + amdgpu_mes_unlock(&adev->mes); 422 + amdgpu_bo_free_kernel(&gang->gang_ctx_bo, 423 + &gang->gang_ctx_gpu_addr, 424 + &gang->gang_ctx_cpu_ptr); 425 + clean_up_mem: 446 426 kfree(gang); 447 - mutex_unlock(&adev->mes.mutex); 448 427 return r; 449 428 } 450 429 ··· 455 428 { 456 429 struct amdgpu_mes_gang *gang; 457 430 458 - mutex_lock(&adev->mes.mutex); 431 + /* 432 + * Avoid taking any other locks under MES lock to avoid circular 433 + * lock dependencies. 434 + */ 435 + amdgpu_mes_lock(&adev->mes); 459 436 460 437 gang = idr_find(&adev->mes.gang_id_idr, gang_id); 461 438 if (!gang) { 462 439 DRM_ERROR("gang id %d doesn't exist\n", gang_id); 463 - mutex_unlock(&adev->mes.mutex); 440 + amdgpu_mes_unlock(&adev->mes); 464 441 return -EINVAL; 465 442 } 466 443 467 444 if (!list_empty(&gang->queue_list)) { 468 445 DRM_ERROR("queue list is not empty\n"); 469 - mutex_unlock(&adev->mes.mutex); 446 + amdgpu_mes_unlock(&adev->mes); 470 447 return -EBUSY; 471 448 } 472 449 473 450 idr_remove(&adev->mes.gang_id_idr, gang->gang_id); 451 + list_del(&gang->list); 452 + amdgpu_mes_unlock(&adev->mes); 453 + 474 454 amdgpu_bo_free_kernel(&gang->gang_ctx_bo, 475 455 &gang->gang_ctx_gpu_addr, 476 456 &gang->gang_ctx_cpu_ptr); 477 - list_del(&gang->list); 457 + 478 458 kfree(gang); 479 459 480 - mutex_unlock(&adev->mes.mutex); 481 460 return 0; 482 461 } 483 462 ··· 495 462 struct mes_suspend_gang_input input; 496 463 int r, pasid; 497 464 498 - mutex_lock(&adev->mes.mutex); 465 + /* 466 + * Avoid taking any other locks under MES lock to avoid circular 467 + * lock dependencies. 468 + */ 469 + amdgpu_mes_lock(&adev->mes); 499 470 500 471 idp = &adev->mes.pasid_idr; 501 472 ··· 512 475 } 513 476 } 514 477 515 - mutex_unlock(&adev->mes.mutex); 478 + amdgpu_mes_unlock(&adev->mes); 516 479 return 0; 517 480 } 518 481 ··· 524 487 struct mes_resume_gang_input input; 525 488 int r, pasid; 526 489 527 - mutex_lock(&adev->mes.mutex); 490 + /* 491 + * Avoid taking any other locks under MES lock to avoid circular 492 + * lock dependencies. 493 + */ 494 + amdgpu_mes_lock(&adev->mes); 528 495 529 496 idp = &adev->mes.pasid_idr; 530 497 ··· 541 500 } 542 501 } 543 502 544 - mutex_unlock(&adev->mes.mutex); 503 + amdgpu_mes_unlock(&adev->mes); 545 504 return 0; 546 505 } 547 506 548 - static int amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev, 507 + static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev, 549 508 struct amdgpu_mes_queue *q, 550 509 struct amdgpu_mes_queue_properties *p) 551 510 { 552 511 struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type]; 553 512 u32 mqd_size = mqd_mgr->mqd_size; 554 - struct amdgpu_mqd_prop mqd_prop = {0}; 555 513 int r; 556 514 557 515 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, ··· 562 522 return r; 563 523 } 564 524 memset(q->mqd_cpu_ptr, 0, mqd_size); 525 + 526 + r = amdgpu_bo_reserve(q->mqd_obj, false); 527 + if (unlikely(r != 0)) 528 + goto clean_up; 529 + 530 + return 0; 531 + 532 + clean_up: 533 + amdgpu_bo_free_kernel(&q->mqd_obj, 534 + &q->mqd_gpu_addr, 535 + &q->mqd_cpu_ptr); 536 + return r; 537 + } 538 + 539 + static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev, 540 + struct amdgpu_mes_queue *q, 541 + struct amdgpu_mes_queue_properties *p) 542 + { 543 + struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type]; 544 + struct amdgpu_mqd_prop mqd_prop = {0}; 565 545 566 546 mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr; 567 547 mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr; ··· 595 535 mqd_prop.hqd_queue_priority = p->hqd_queue_priority; 596 536 mqd_prop.hqd_active = false; 597 537 598 - r = amdgpu_bo_reserve(q->mqd_obj, false); 599 - if (unlikely(r != 0)) 600 - goto clean_up; 601 - 602 538 mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop); 603 539 604 540 amdgpu_bo_unreserve(q->mqd_obj); 605 - return 0; 606 - 607 - clean_up: 608 - amdgpu_bo_free_kernel(&q->mqd_obj, 609 - &q->mqd_gpu_addr, 610 - &q->mqd_cpu_ptr); 611 - return r; 612 - } 613 - 614 - static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q) 615 - { 616 - amdgpu_bo_free_kernel(&q->mqd_obj, 617 - &q->mqd_gpu_addr, 618 - &q->mqd_cpu_ptr); 619 541 } 620 542 621 543 int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, ··· 610 568 unsigned long flags; 611 569 int r; 612 570 613 - mutex_lock(&adev->mes.mutex); 571 + /* allocate the mes queue buffer */ 572 + queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL); 573 + if (!queue) { 574 + DRM_ERROR("Failed to allocate memory for queue\n"); 575 + return -ENOMEM; 576 + } 577 + 578 + /* Allocate the queue mqd */ 579 + r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops); 580 + if (r) 581 + goto clean_up_memory; 582 + 583 + /* 584 + * Avoid taking any other locks under MES lock to avoid circular 585 + * lock dependencies. 586 + */ 587 + amdgpu_mes_lock(&adev->mes); 614 588 615 589 gang = idr_find(&adev->mes.gang_id_idr, gang_id); 616 590 if (!gang) { 617 591 DRM_ERROR("gang id %d doesn't exist\n", gang_id); 618 - mutex_unlock(&adev->mes.mutex); 619 - return -EINVAL; 620 - } 621 - 622 - /* allocate the mes queue buffer */ 623 - queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL); 624 - if (!queue) { 625 - mutex_unlock(&adev->mes.mutex); 626 - return -ENOMEM; 592 + r = -EINVAL; 593 + goto clean_up_mqd; 627 594 } 628 595 629 596 /* add the mes gang to idr list */ ··· 641 590 GFP_ATOMIC); 642 591 if (r < 0) { 643 592 spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); 644 - goto clean_up_memory; 593 + goto clean_up_mqd; 645 594 } 646 595 spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); 647 596 *queue_id = queue->queue_id = r; ··· 654 603 goto clean_up_queue_id; 655 604 656 605 /* initialize the queue mqd */ 657 - r = amdgpu_mes_queue_init_mqd(adev, queue, qprops); 658 - if (r) 659 - goto clean_up_doorbell; 606 + amdgpu_mes_queue_init_mqd(adev, queue, qprops); 660 607 661 608 /* add hw queue to mes */ 662 609 queue_input.process_id = gang->process->pasid; 663 - queue_input.page_table_base_addr = gang->process->pd_gpu_addr; 610 + 611 + queue_input.page_table_base_addr = 612 + adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr - 613 + adev->gmc.vram_start; 614 + 664 615 queue_input.process_va_start = 0; 665 616 queue_input.process_va_end = 666 617 (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT; ··· 682 629 if (r) { 683 630 DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n", 684 631 qprops->doorbell_off); 685 - goto clean_up_mqd; 632 + goto clean_up_doorbell; 686 633 } 687 634 688 635 DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, " ··· 698 645 queue->gang = gang; 699 646 list_add_tail(&queue->list, &gang->queue_list); 700 647 701 - mutex_unlock(&adev->mes.mutex); 648 + amdgpu_mes_unlock(&adev->mes); 702 649 return 0; 703 650 704 - clean_up_mqd: 705 - amdgpu_mes_queue_free_mqd(queue); 706 651 clean_up_doorbell: 707 652 amdgpu_mes_queue_doorbell_free(adev, gang->process, 708 653 qprops->doorbell_off); ··· 708 657 spin_lock_irqsave(&adev->mes.queue_id_lock, flags); 709 658 idr_remove(&adev->mes.queue_id_idr, queue->queue_id); 710 659 spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); 660 + clean_up_mqd: 661 + amdgpu_mes_unlock(&adev->mes); 662 + amdgpu_mes_queue_free_mqd(queue); 711 663 clean_up_memory: 712 664 kfree(queue); 713 - mutex_unlock(&adev->mes.mutex); 714 665 return r; 715 666 } 716 667 ··· 724 671 struct mes_remove_queue_input queue_input; 725 672 int r; 726 673 727 - mutex_lock(&adev->mes.mutex); 674 + /* 675 + * Avoid taking any other locks under MES lock to avoid circular 676 + * lock dependencies. 677 + */ 678 + amdgpu_mes_lock(&adev->mes); 728 679 729 680 /* remove the mes gang from idr list */ 730 681 spin_lock_irqsave(&adev->mes.queue_id_lock, flags); ··· 736 679 queue = idr_find(&adev->mes.queue_id_idr, queue_id); 737 680 if (!queue) { 738 681 spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); 739 - mutex_unlock(&adev->mes.mutex); 682 + amdgpu_mes_unlock(&adev->mes); 740 683 DRM_ERROR("queue id %d doesn't exist\n", queue_id); 741 684 return -EINVAL; 742 685 } ··· 756 699 DRM_ERROR("failed to remove hardware queue, queue id = %d\n", 757 700 queue_id); 758 701 759 - amdgpu_mes_queue_free_mqd(queue); 760 702 list_del(&queue->list); 761 703 amdgpu_mes_queue_doorbell_free(adev, gang->process, 762 704 queue->doorbell_off); 705 + amdgpu_mes_unlock(&adev->mes); 706 + 707 + amdgpu_mes_queue_free_mqd(queue); 763 708 kfree(queue); 764 - mutex_unlock(&adev->mes.mutex); 765 709 return 0; 710 + } 711 + 712 + int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, 713 + struct amdgpu_ring *ring, 714 + enum amdgpu_unmap_queues_action action, 715 + u64 gpu_addr, u64 seq) 716 + { 717 + struct mes_unmap_legacy_queue_input queue_input; 718 + int r; 719 + 720 + amdgpu_mes_lock(&adev->mes); 721 + 722 + queue_input.action = action; 723 + queue_input.queue_type = ring->funcs->type; 724 + queue_input.doorbell_offset = ring->doorbell_index; 725 + queue_input.pipe_id = ring->pipe; 726 + queue_input.queue_id = ring->queue; 727 + queue_input.trail_fence_addr = gpu_addr; 728 + queue_input.trail_fence_data = seq; 729 + 730 + r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input); 731 + if (r) 732 + DRM_ERROR("failed to unmap legacy queue\n"); 733 + 734 + amdgpu_mes_unlock(&adev->mes); 735 + return r; 766 736 } 767 737 768 738 static void ··· 855 771 struct amdgpu_mes_queue_properties qprops = {0}; 856 772 int r, queue_id, pasid; 857 773 858 - mutex_lock(&adev->mes.mutex); 774 + /* 775 + * Avoid taking any other locks under MES lock to avoid circular 776 + * lock dependencies. 777 + */ 778 + amdgpu_mes_lock(&adev->mes); 859 779 gang = idr_find(&adev->mes.gang_id_idr, gang_id); 860 780 if (!gang) { 861 781 DRM_ERROR("gang id %d doesn't exist\n", gang_id); 862 - mutex_unlock(&adev->mes.mutex); 782 + amdgpu_mes_unlock(&adev->mes); 863 783 return -EINVAL; 864 784 } 865 785 pasid = gang->process->pasid; 866 786 867 787 ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL); 868 788 if (!ring) { 869 - mutex_unlock(&adev->mes.mutex); 789 + amdgpu_mes_unlock(&adev->mes); 870 790 return -ENOMEM; 871 791 } 872 792 ··· 911 823 912 824 dma_fence_wait(gang->process->vm->last_update, false); 913 825 dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false); 914 - mutex_unlock(&adev->mes.mutex); 826 + amdgpu_mes_unlock(&adev->mes); 915 827 916 828 r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id); 917 829 if (r) ··· 938 850 amdgpu_ring_fini(ring); 939 851 clean_up_memory: 940 852 kfree(ring); 941 - mutex_unlock(&adev->mes.mutex); 853 + amdgpu_mes_unlock(&adev->mes); 942 854 return r; 943 855 } 944 856 ··· 1174 1086 } 1175 1087 1176 1088 for (i = 0; i < ARRAY_SIZE(queue_types); i++) { 1177 - /* On sienna cichlid+, fw hasn't supported to map sdma queue. */ 1178 - if (adev->asic_type >= CHIP_SIENNA_CICHLID && 1179 - i == AMDGPU_RING_TYPE_SDMA) 1089 + /* On GFX v10.3, fw hasn't supported to map sdma queue. */ 1090 + if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0) && 1091 + adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0) && 1092 + queue_types[i][0] == AMDGPU_RING_TYPE_SDMA) 1180 1093 continue; 1181 1094 1182 1095 r = amdgpu_mes_test_create_gang_and_queues(adev, pasid,
+84 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
··· 56 56 struct amdgpu_mes { 57 57 struct amdgpu_device *adev; 58 58 59 - struct mutex mutex; 59 + struct mutex mutex_hidden; 60 60 61 61 struct idr pasid_idr; 62 62 struct idr gang_id_idr; ··· 109 109 uint32_t query_status_fence_offs; 110 110 uint64_t query_status_fence_gpu_addr; 111 111 uint64_t *query_status_fence_ptr; 112 + uint32_t saved_flags; 112 113 113 114 /* initialize kiq pipe */ 114 115 int (*kiq_hw_init)(struct amdgpu_device *adev); 116 + int (*kiq_hw_fini)(struct amdgpu_device *adev); 115 117 116 118 /* ip specific functions */ 117 119 const struct amdgpu_mes_funcs *funcs; ··· 200 198 uint64_t wptr_addr; 201 199 uint32_t queue_type; 202 200 uint32_t paging; 201 + uint32_t gws_base; 202 + uint32_t gws_size; 203 + uint64_t tba_addr; 204 + uint64_t tma_addr; 203 205 }; 204 206 205 207 struct mes_remove_queue_input { 206 208 uint32_t doorbell_offset; 207 209 uint64_t gang_context_addr; 210 + }; 211 + 212 + struct mes_unmap_legacy_queue_input { 213 + enum amdgpu_unmap_queues_action action; 214 + uint32_t queue_type; 215 + uint32_t doorbell_offset; 216 + uint32_t pipe_id; 217 + uint32_t queue_id; 218 + uint64_t trail_fence_addr; 219 + uint64_t trail_fence_data; 208 220 }; 209 221 210 222 struct mes_suspend_gang_input { ··· 240 224 int (*remove_hw_queue)(struct amdgpu_mes *mes, 241 225 struct mes_remove_queue_input *input); 242 226 227 + int (*unmap_legacy_queue)(struct amdgpu_mes *mes, 228 + struct mes_unmap_legacy_queue_input *input); 229 + 243 230 int (*suspend_gang)(struct amdgpu_mes *mes, 244 231 struct mes_suspend_gang_input *input); 245 232 ··· 251 232 }; 252 233 253 234 #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev)) 235 + #define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev)) 254 236 255 237 int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs); 256 238 ··· 274 254 struct amdgpu_mes_queue_properties *qprops, 275 255 int *queue_id); 276 256 int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id); 257 + 258 + int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, 259 + struct amdgpu_ring *ring, 260 + enum amdgpu_unmap_queues_action action, 261 + u64 gpu_addr, u64 seq); 277 262 278 263 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, 279 264 int queue_type, int idx, ··· 305 280 uint32_t doorbell_index, 306 281 unsigned int doorbell_id); 307 282 int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev); 283 + 284 + /* 285 + * MES lock can be taken in MMU notifiers. 286 + * 287 + * A bit more detail about why to set no-FS reclaim with MES lock: 288 + * 289 + * The purpose of the MMU notifier is to stop GPU access to memory so 290 + * that the Linux VM subsystem can move pages around safely. This is 291 + * done by preempting user mode queues for the affected process. When 292 + * MES is used, MES lock needs to be taken to preempt the queues. 293 + * 294 + * The MMU notifier callback entry point in the driver is 295 + * amdgpu_mn_invalidate_range_start_hsa. The relevant call chain from 296 + * there is: 297 + * amdgpu_amdkfd_evict_userptr -> kgd2kfd_quiesce_mm -> 298 + * kfd_process_evict_queues -> pdd->dev->dqm->ops.evict_process_queues 299 + * 300 + * The last part of the chain is a function pointer where we take the 301 + * MES lock. 302 + * 303 + * The problem with taking locks in the MMU notifier is, that MMU 304 + * notifiers can be called in reclaim-FS context. That's where the 305 + * kernel frees up pages to make room for new page allocations under 306 + * memory pressure. While we are running in reclaim-FS context, we must 307 + * not trigger another memory reclaim operation because that would 308 + * recursively reenter the reclaim code and cause a deadlock. The 309 + * memalloc_nofs_save/restore calls guarantee that. 310 + * 311 + * In addition we also need to avoid lock dependencies on other locks taken 312 + * under the MES lock, for example reservation locks. Here is a possible 313 + * scenario of a deadlock: 314 + * Thread A: takes and holds reservation lock | triggers reclaim-FS | 315 + * MMU notifier | blocks trying to take MES lock 316 + * Thread B: takes and holds MES lock | blocks trying to take reservation lock 317 + * 318 + * In this scenario Thread B gets involved in a deadlock even without 319 + * triggering a reclaim-FS operation itself. 320 + * To fix this and break the lock dependency chain you'd need to either: 321 + * 1. protect reservation locks with memalloc_nofs_save/restore, or 322 + * 2. avoid taking reservation locks under the MES lock. 323 + * 324 + * Reservation locks are taken all over the kernel in different subsystems, we 325 + * have no control over them and their lock dependencies.So the only workable 326 + * solution is to avoid taking other locks under the MES lock. 327 + * As a result, make sure no reclaim-FS happens while holding this lock anywhere 328 + * to prevent deadlocks when an MMU notifier runs in reclaim-FS context. 329 + */ 330 + static inline void amdgpu_mes_lock(struct amdgpu_mes *mes) 331 + { 332 + mutex_lock(&mes->mutex_hidden); 333 + mes->saved_flags = memalloc_noreclaim_save(); 334 + } 335 + 336 + static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes) 337 + { 338 + memalloc_noreclaim_restore(mes->saved_flags); 339 + mutex_unlock(&mes->mutex_hidden); 340 + } 308 341 #endif /* __AMDGPU_MES_H__ */
+6
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
··· 3551 3551 enum amdgpu_unmap_queues_action action, 3552 3552 u64 gpu_addr, u64 seq) 3553 3553 { 3554 + struct amdgpu_device *adev = kiq_ring->adev; 3554 3555 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 3556 + 3557 + if (!adev->gfx.kiq.ring.sched.ready) { 3558 + amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq); 3559 + return; 3560 + } 3555 3561 3556 3562 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 3557 3563 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+6 -1
drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
··· 274 274 /* For SRIOV run time, driver shouldn't access the register through MMIO 275 275 * Directly use kiq to do the vm invalidation instead 276 276 */ 277 - if (adev->gfx.kiq.ring.sched.ready && 277 + if (adev->gfx.kiq.ring.sched.ready && !adev->enable_mes && 278 278 (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { 279 279 struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; 280 280 const unsigned eng = 17; ··· 410 410 { 411 411 struct amdgpu_device *adev = ring->adev; 412 412 uint32_t reg; 413 + 414 + /* MES fw manages IH_VMID_x_LUT updating */ 415 + if (ring->is_mes_queue) 416 + return; 413 417 414 418 if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) 415 419 reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; ··· 807 803 } 808 804 809 805 amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); 806 + 810 807 r = adev->mmhub.funcs->gart_enable(adev); 811 808 if (r) 812 809 return r;
+147 -20
drivers/gpu/drm/amd/amdgpu/mes_api_def.h drivers/gpu/drm/amd/include/mes_api_def.h
··· 59 59 MES_SCH_API_PROGRAM_GDS = 12, 60 60 MES_SCH_API_SET_DEBUG_VMID = 13, 61 61 MES_SCH_API_MISC = 14, 62 + MES_SCH_API_UPDATE_ROOT_PAGE_TABLE = 15, 63 + MES_SCH_API_AMD_LOG = 16, 62 64 MES_SCH_API_MAX = 0xFF 63 65 }; 64 66 ··· 118 116 enum { MAX_VMID_MMHUB = 16 }; 119 117 120 118 enum MES_LOG_OPERATION { 121 - MES_LOG_OPERATION_CONTEXT_STATE_CHANGE = 0 119 + MES_LOG_OPERATION_CONTEXT_STATE_CHANGE = 0, 120 + MES_LOG_OPERATION_QUEUE_NEW_WORK = 1, 121 + MES_LOG_OPERATION_QUEUE_UNWAIT_SYNC_OBJECT = 2, 122 + MES_LOG_OPERATION_QUEUE_NO_MORE_WORK = 3, 123 + MES_LOG_OPERATION_QUEUE_WAIT_SYNC_OBJECT = 4, 124 + MES_LOG_OPERATION_QUEUE_INVALID = 0xF, 122 125 }; 123 126 124 127 enum MES_LOG_CONTEXT_STATE { ··· 131 124 MES_LOG_CONTEXT_STATE_RUNNING = 1, 132 125 MES_LOG_CONTEXT_STATE_READY = 2, 133 126 MES_LOG_CONTEXT_STATE_READY_STANDBY = 3, 127 + MES_LOG_CONTEXT_STATE_INVALID = 0xF, 134 128 }; 135 129 136 130 struct MES_LOG_CONTEXT_STATE_CHANGE { 137 131 void *h_context; 138 132 enum MES_LOG_CONTEXT_STATE new_context_state; 133 + }; 134 + 135 + struct MES_LOG_QUEUE_NEW_WORK { 136 + uint64_t h_queue; 137 + uint64_t reserved; 138 + }; 139 + 140 + struct MES_LOG_QUEUE_UNWAIT_SYNC_OBJECT { 141 + uint64_t h_queue; 142 + uint64_t h_sync_object; 143 + }; 144 + 145 + struct MES_LOG_QUEUE_NO_MORE_WORK { 146 + uint64_t h_queue; 147 + uint64_t reserved; 148 + }; 149 + 150 + struct MES_LOG_QUEUE_WAIT_SYNC_OBJECT { 151 + uint64_t h_queue; 152 + uint64_t h_sync_object; 139 153 }; 140 154 141 155 struct MES_LOG_ENTRY_HEADER { ··· 171 143 uint32_t operation_type; /* operation_type is of MES_LOG_OPERATION type */ 172 144 uint32_t reserved_operation_type_bits; 173 145 union { 174 - struct MES_LOG_CONTEXT_STATE_CHANGE context_state_change; 175 - uint64_t reserved_operation_data[2]; 146 + struct MES_LOG_CONTEXT_STATE_CHANGE context_state_change; 147 + struct MES_LOG_QUEUE_NEW_WORK queue_new_work; 148 + struct MES_LOG_QUEUE_UNWAIT_SYNC_OBJECT queue_unwait_sync_object; 149 + struct MES_LOG_QUEUE_NO_MORE_WORK queue_no_more_work; 150 + struct MES_LOG_QUEUE_WAIT_SYNC_OBJECT queue_wait_sync_object; 151 + uint64_t all[2]; 176 152 }; 177 153 }; 178 154 179 155 struct MES_LOG_BUFFER { 180 156 struct MES_LOG_ENTRY_HEADER header; 181 157 struct MES_LOG_ENTRY_DATA entries[1]; 158 + }; 159 + 160 + enum MES_SWIP_TO_HWIP_DEF { 161 + MES_MAX_HWIP_SEGMENT = 6, 182 162 }; 183 163 184 164 union MESAPI_SET_HW_RESOURCES { ··· 199 163 uint32_t compute_hqd_mask[MAX_COMPUTE_PIPES]; 200 164 uint32_t gfx_hqd_mask[MAX_GFX_PIPES]; 201 165 uint32_t sdma_hqd_mask[MAX_SDMA_PIPES]; 202 - uint32_t agreegated_doorbells[AMD_PRIORITY_NUM_LEVELS]; 166 + uint32_t aggregated_doorbells[AMD_PRIORITY_NUM_LEVELS]; 203 167 uint64_t g_sch_ctx_gpu_mc_ptr; 204 168 uint64_t query_status_fence_gpu_mc_ptr; 169 + uint32_t gc_base[MES_MAX_HWIP_SEGMENT]; 170 + uint32_t mmhub_base[MES_MAX_HWIP_SEGMENT]; 171 + uint32_t osssys_base[MES_MAX_HWIP_SEGMENT]; 205 172 struct MES_API_STATUS api_status; 206 173 union { 207 174 struct { 208 175 uint32_t disable_reset : 1; 209 - uint32_t reserved : 31; 176 + uint32_t use_different_vmid_compute : 1; 177 + uint32_t disable_mes_log : 1; 178 + uint32_t apply_mmhub_pgvm_invalidate_ack_loss_wa : 1; 179 + uint32_t apply_grbm_remote_register_dummy_read_wa : 1; 180 + uint32_t second_gfx_pipe_enabled : 1; 181 + uint32_t enable_level_process_quantum_check : 1; 182 + uint32_t apply_cwsr_program_all_vmid_sq_shader_tba_registers_wa : 1; 183 + uint32_t enable_mqd_active_poll : 1; 184 + uint32_t disable_timer_int : 1; 185 + uint32_t reserved : 22; 210 186 }; 211 187 uint32_t uint32_t_all; 212 188 }; ··· 243 195 uint32_t doorbell_offset; 244 196 uint64_t mqd_addr; 245 197 uint64_t wptr_addr; 198 + uint64_t h_context; 199 + uint64_t h_queue; 246 200 enum MES_QUEUE_TYPE queue_type; 247 201 uint32_t gds_base; 248 202 uint32_t gds_size; 249 203 uint32_t gws_base; 250 204 uint32_t gws_size; 251 205 uint32_t oa_mask; 206 + uint64_t trap_handler_addr; 207 + uint32_t vm_context_cntl; 252 208 253 209 struct { 254 210 uint32_t paging : 1; ··· 260 208 uint32_t program_gds : 1; 261 209 uint32_t is_gang_suspended : 1; 262 210 uint32_t is_tmz_queue : 1; 263 - uint32_t reserved : 24; 211 + uint32_t map_kiq_utility_queue : 1; 212 + uint32_t reserved : 23; 264 213 }; 265 214 struct MES_API_STATUS api_status; 266 215 }; ··· 276 223 uint64_t gang_context_addr; 277 224 278 225 struct { 279 - uint32_t unmap_legacy_gfx_queue : 1; 280 - uint32_t reserved : 31; 226 + uint32_t unmap_legacy_gfx_queue : 1; 227 + uint32_t unmap_kiq_utility_queue : 1; 228 + uint32_t preempt_legacy_gfx_queue : 1; 229 + uint32_t reserved : 29; 281 230 }; 282 - struct MES_API_STATUS api_status; 231 + struct MES_API_STATUS api_status; 232 + 233 + uint32_t pipe_id; 234 + uint32_t queue_id; 235 + 236 + uint64_t tf_addr; 237 + uint32_t tf_data; 283 238 }; 284 239 285 240 uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; ··· 382 321 383 322 union MESAPI__RESET { 384 323 struct { 385 - union MES_API_HEADER header; 324 + union MES_API_HEADER header; 386 325 387 326 struct { 388 - uint32_t reset_queue : 1; 389 - uint32_t reserved : 31; 327 + /* Only reset the queue given by doorbell_offset (not entire gang) */ 328 + uint32_t reset_queue_only : 1; 329 + /* Hang detection first then reset any queues that are hung */ 330 + uint32_t hang_detect_then_reset : 1; 331 + /* Only do hang detection (no reset) */ 332 + uint32_t hang_detect_only : 1; 333 + /* Rest HP and LP kernel queues not managed by MES */ 334 + uint32_t reset_legacy_gfx : 1; 335 + uint32_t reserved : 28; 390 336 }; 391 337 392 - uint64_t gang_context_addr; 393 - uint32_t doorbell_offset; /* valid only if reset_queue = true */ 394 - struct MES_API_STATUS api_status; 338 + uint64_t gang_context_addr; 339 + 340 + /* valid only if reset_queue_only = true */ 341 + uint32_t doorbell_offset; 342 + 343 + /* valid only if hang_detect_then_reset = true */ 344 + uint64_t doorbell_offset_addr; 345 + enum MES_QUEUE_TYPE queue_type; 346 + 347 + /* valid only if reset_legacy_gfx = true */ 348 + uint32_t pipe_id_lp; 349 + uint32_t queue_id_lp; 350 + uint32_t vmid_id_lp; 351 + uint64_t mqd_mc_addr_lp; 352 + uint32_t doorbell_offset_lp; 353 + uint64_t wptr_addr_lp; 354 + 355 + uint32_t pipe_id_hp; 356 + uint32_t queue_id_hp; 357 + uint32_t vmid_id_hp; 358 + uint64_t mqd_mc_addr_hp; 359 + uint32_t doorbell_offset_hp; 360 + uint64_t wptr_addr_hp; 361 + 362 + struct MES_API_STATUS api_status; 395 363 }; 396 364 397 365 uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; ··· 498 408 499 409 enum MESAPI_MISC_OPCODE { 500 410 MESAPI_MISC__MODIFY_REG, 411 + MESAPI_MISC__INV_GART, 412 + MESAPI_MISC__QUERY_STATUS, 501 413 MESAPI_MISC__MAX, 502 414 }; 503 415 ··· 512 420 513 421 enum { MISC_DATA_MAX_SIZE_IN_DWORDS = 20 }; 514 422 423 + struct MODIFY_REG { 424 + enum MODIFY_REG_SUBCODE subcode; 425 + uint32_t reg_offset; 426 + uint32_t reg_value; 427 + }; 428 + 429 + struct INV_GART { 430 + uint64_t inv_range_va_start; 431 + uint64_t inv_range_size; 432 + }; 433 + 434 + struct QUERY_STATUS { 435 + uint32_t context_id; 436 + }; 437 + 515 438 union MESAPI__MISC { 516 439 struct { 517 440 union MES_API_HEADER header; ··· 534 427 struct MES_API_STATUS api_status; 535 428 536 429 union { 537 - struct { 538 - enum MODIFY_REG_SUBCODE subcode; 539 - uint32_t reg_offset; 540 - uint32_t reg_value; 541 - } modify_reg; 430 + struct MODIFY_REG modify_reg; 431 + struct INV_GART inv_gart; 432 + struct QUERY_STATUS query_status; 542 433 uint32_t data[MISC_DATA_MAX_SIZE_IN_DWORDS]; 543 434 }; 544 435 }; 545 436 546 437 uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; 438 + }; 439 + 440 + union MESAPI__UPDATE_ROOT_PAGE_TABLE { 441 + struct { 442 + union MES_API_HEADER header; 443 + uint64_t page_table_base_addr; 444 + uint64_t process_context_addr; 445 + struct MES_API_STATUS api_status; 446 + }; 447 + 448 + uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; 449 + }; 450 + 451 + union MESAPI_AMD_LOG { 452 + struct { 453 + union MES_API_HEADER header; 454 + uint64_t p_buffer_memory; 455 + uint64_t p_buffer_size_used; 456 + struct MES_API_STATUS api_status; 457 + }; 458 + 459 + uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; 547 460 }; 548 461 549 462 #pragma pack(pop)
+66 -3
drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
··· 133 133 { 134 134 struct amdgpu_device *adev = mes->adev; 135 135 union MESAPI__ADD_QUEUE mes_add_queue_pkt; 136 + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; 137 + uint32_t vm_cntx_cntl = hub->vm_cntx_cntl; 136 138 137 139 memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); 138 140 ··· 143 141 mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 144 142 145 143 mes_add_queue_pkt.process_id = input->process_id; 146 - mes_add_queue_pkt.page_table_base_addr = 147 - input->page_table_base_addr - adev->gmc.vram_start; 144 + mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr; 148 145 mes_add_queue_pkt.process_va_start = input->process_va_start; 149 146 mes_add_queue_pkt.process_va_end = input->process_va_end; 150 147 mes_add_queue_pkt.process_quantum = input->process_quantum; ··· 160 159 mes_add_queue_pkt.queue_type = 161 160 convert_to_mes_queue_type(input->queue_type); 162 161 mes_add_queue_pkt.paging = input->paging; 162 + mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl; 163 + mes_add_queue_pkt.gws_base = input->gws_base; 164 + mes_add_queue_pkt.gws_size = input->gws_size; 165 + mes_add_queue_pkt.trap_handler_addr = input->tba_addr; 163 166 164 167 mes_add_queue_pkt.api_status.api_completion_fence_addr = 165 168 mes->ring.fence_drv.gpu_addr; ··· 187 182 188 183 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; 189 184 mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; 185 + 186 + mes_remove_queue_pkt.api_status.api_completion_fence_addr = 187 + mes->ring.fence_drv.gpu_addr; 188 + mes_remove_queue_pkt.api_status.api_completion_fence_value = 189 + ++mes->ring.fence_drv.sync_seq; 190 + 191 + return mes_v10_1_submit_pkt_and_poll_completion(mes, 192 + &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt)); 193 + } 194 + 195 + static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes, 196 + struct mes_unmap_legacy_queue_input *input) 197 + { 198 + union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; 199 + 200 + memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); 201 + 202 + mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 203 + mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; 204 + mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 205 + 206 + mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; 207 + mes_remove_queue_pkt.gang_context_addr = 0; 208 + 209 + mes_remove_queue_pkt.pipe_id = input->pipe_id; 210 + mes_remove_queue_pkt.queue_id = input->queue_id; 211 + 212 + if (input->action == PREEMPT_QUEUES_NO_UNMAP) { 213 + mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1; 214 + mes_remove_queue_pkt.tf_addr = input->trail_fence_addr; 215 + mes_remove_queue_pkt.tf_data = 216 + lower_32_bits(input->trail_fence_data); 217 + } else { 218 + if (input->queue_type == AMDGPU_RING_TYPE_GFX) 219 + mes_remove_queue_pkt.unmap_legacy_gfx_queue = 1; 220 + else 221 + mes_remove_queue_pkt.unmap_kiq_utility_queue = 1; 222 + } 190 223 191 224 mes_remove_queue_pkt.api_status.api_completion_fence_addr = 192 225 mes->ring.fence_drv.gpu_addr; ··· 297 254 mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; 298 255 299 256 for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) 300 - mes_set_hw_res_pkt.agreegated_doorbells[i] = 257 + mes_set_hw_res_pkt.aggregated_doorbells[i] = 301 258 mes->agreegated_doorbells[i]; 259 + 260 + for (i = 0; i < 5; i++) { 261 + mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i]; 262 + mes_set_hw_res_pkt.mmhub_base[i] = 263 + adev->reg_offset[MMHUB_HWIP][0][i]; 264 + mes_set_hw_res_pkt.osssys_base[i] = 265 + adev->reg_offset[OSSSYS_HWIP][0][i]; 266 + } 267 + 268 + mes_set_hw_res_pkt.disable_reset = 1; 269 + mes_set_hw_res_pkt.disable_mes_log = 1; 270 + mes_set_hw_res_pkt.use_different_vmid_compute = 1; 302 271 303 272 mes_set_hw_res_pkt.api_status.api_completion_fence_addr = 304 273 mes->ring.fence_drv.gpu_addr; ··· 324 269 static const struct amdgpu_mes_funcs mes_v10_1_funcs = { 325 270 .add_hw_queue = mes_v10_1_add_hw_queue, 326 271 .remove_hw_queue = mes_v10_1_remove_hw_queue, 272 + .unmap_legacy_queue = mes_v10_1_unmap_legacy_queue, 327 273 .suspend_gang = mes_v10_1_suspend_gang, 328 274 .resume_gang = mes_v10_1_resume_gang, 329 275 }; ··· 1152 1096 DRM_ERROR("MES is busy\n"); 1153 1097 goto failure; 1154 1098 } 1099 + 1100 + /* 1101 + * Disable KIQ ring usage from the driver once MES is enabled. 1102 + * MES uses KIQ ring exclusively so driver cannot access KIQ ring 1103 + * with MES enabled. 1104 + */ 1105 + adev->gfx.kiq.ring.sched.ready = false; 1155 1106 1156 1107 return 0; 1157 1108