Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amdkfd: Move gfx9.4.3 and gfx 9.5 MQD to HBM

To reduce queue switch latency further, move MQD to VRAM domain, CP
access MQD and control stack via FB aperture, this requires contiguous
pages.

After MQD is initialized, updated or restored, flush HDP to guarantee
the data is written to HBM and GPU cache is invalidated, then CP will
read the new MQD.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Philip Yang and committed by
Alex Deucher
d4a814f4 d62dec8c

+24 -2
+2 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
··· 334 334 bp.size = size; 335 335 bp.byte_align = PAGE_SIZE; 336 336 bp.domain = domain; 337 - bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; 337 + bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | 338 + AMDGPU_GEM_CREATE_CPU_GTT_USWC; 338 339 bp.type = ttm_bo_type_kernel; 339 340 bp.resv = NULL; 340 341 bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+22 -1
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
··· 109 109 m->cp_hqd_queue_priority = q->priority; 110 110 } 111 111 112 + static bool mqd_on_vram(struct amdgpu_device *adev) 113 + { 114 + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 115 + case IP_VERSION(9, 4, 3): 116 + case IP_VERSION(9, 5, 0): 117 + return true; 118 + default: 119 + return false; 120 + } 121 + } 122 + 112 123 static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node, 113 124 struct queue_properties *q) 114 125 { ··· 150 139 (ALIGN(q->ctl_stack_size, PAGE_SIZE) + 151 140 ALIGN(sizeof(struct v9_mqd), PAGE_SIZE)) * 152 141 NUM_XCC(node->xcc_mask), 153 - AMDGPU_GEM_DOMAIN_GTT, 142 + mqd_on_vram(node->adev) ? AMDGPU_GEM_DOMAIN_VRAM : 143 + AMDGPU_GEM_DOMAIN_GTT, 154 144 &(mqd_mem_obj->mem), 155 145 &(mqd_mem_obj->gpu_addr), 156 146 (void *)&(mqd_mem_obj->cpu_ptr), true); ··· 751 739 *gart_addr = xcc_gart_addr; 752 740 } 753 741 } 742 + 743 + if (mqd_on_vram(mm->dev->adev)) 744 + amdgpu_device_flush_hdp(mm->dev->adev, NULL); 754 745 } 755 746 756 747 static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, ··· 790 775 m->pm4_target_xcc_in_xcp = q->pm4_target_xcc; 791 776 } 792 777 } 778 + 779 + if (mqd_on_vram(mm->dev->adev)) 780 + amdgpu_device_flush_hdp(mm->dev->adev, NULL); 793 781 } 794 782 795 783 static void restore_mqd_v9_4_3(struct mqd_manager *mm, void **mqd, ··· 831 813 (uint8_t *)ctl_stack_src + xcc * mqd_ctl_stack_size, 832 814 mqd_ctl_stack_size); 833 815 } 816 + 817 + if (mqd_on_vram(mm->dev->adev)) 818 + amdgpu_device_flush_hdp(mm->dev->adev, NULL); 834 819 } 835 820 static int destroy_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, 836 821 enum kfd_preempt_type type, unsigned int timeout,