Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amdkfd: Support chain runlists of XNACK+/XNACK-

If the MEC firmware supports chaining runlists of XNACK+/XNACK-
processes, set SQ_CONFIG1 chicken bit and SET_RESOURCES bit 28.

When the MEC/HWS supports it, KFD checks the XNACK+/XNACK- processes mix
happens or not. If it does, enter over-subscription.

Signed-off-by: Amber Lin <Amber.Lin@amd.com>
Reviewed-by: Philip Yang <Philip.Yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Amber Lin and committed by
Alex Deucher
e3d0870a 9c16e157

+84 -12
+4
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
··· 62 62 */ 63 63 #define AMDGPU_GMC_FAULT_TIMEOUT 5000ULL 64 64 65 + /* XNACK flags */ 66 + #define AMDGPU_GMC_XNACK_FLAG_CHAIN BIT(0) 67 + 65 68 struct firmware; 66 69 67 70 enum amdgpu_memory_partition { ··· 304 301 struct amdgpu_xgmi xgmi; 305 302 struct amdgpu_irq_src ecc_irq; 306 303 int noretry; 304 + uint32_t xnack_flags; 307 305 308 306 uint32_t vmid0_page_table_block_size; 309 307 uint32_t vmid0_page_table_depth;
+31
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
··· 1273 1273 } 1274 1274 } 1275 1275 1276 + /* For ASICs that needs xnack chain and MEC version supports, set SG_CONFIG1 1277 + * DISABLE_XNACK_CHECK_IN_RETRY_DISABLE bit and inform KFD to set xnack_chain 1278 + * bit in SET_RESOURCES 1279 + */ 1280 + static void gfx_v9_4_3_xcc_init_sq(struct amdgpu_device *adev, int xcc_id) 1281 + { 1282 + uint32_t data; 1283 + 1284 + if (!(adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN)) 1285 + return; 1286 + 1287 + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_CONFIG1); 1288 + data = REG_SET_FIELD(data, SQ_CONFIG1, DISABLE_XNACK_CHECK_IN_RETRY_DISABLE, 1); 1289 + WREG32_SOC15(GC, xcc_id, regSQ_CONFIG1, data); 1290 + } 1291 + 1276 1292 static void gfx_v9_4_3_xcc_constants_init(struct amdgpu_device *adev, 1277 1293 int xcc_id) 1278 1294 { ··· 1333 1317 1334 1318 gfx_v9_4_3_xcc_init_compute_vmid(adev, xcc_id); 1335 1319 gfx_v9_4_3_xcc_init_gds_vmid(adev, xcc_id); 1320 + gfx_v9_4_3_xcc_init_sq(adev, xcc_id); 1336 1321 } 1337 1322 1338 1323 static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) ··· 1345 1328 gfx_v9_4_3_get_cu_info(adev, &adev->gfx.cu_info); 1346 1329 adev->gfx.config.db_debug2 = 1347 1330 RREG32_SOC15(GC, GET_INST(GC, 0), regDB_DEBUG2); 1331 + 1332 + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1333 + /* ToDo: GC 9.4.4 */ 1334 + case IP_VERSION(9, 4, 3): 1335 + if (adev->gfx.mec_fw_version >= 184) 1336 + adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN; 1337 + break; 1338 + case IP_VERSION(9, 5, 0): 1339 + if (adev->gfx.mec_fw_version >= 23) 1340 + adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN; 1341 + break; 1342 + default: 1343 + break; 1344 + } 1348 1345 1349 1346 for (i = 0; i < num_xcc; i++) 1350 1347 gfx_v9_4_3_xcc_constants_init(adev, i);
+45 -11
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
··· 31 31 #define OVER_SUBSCRIPTION_PROCESS_COUNT (1 << 0) 32 32 #define OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT (1 << 1) 33 33 #define OVER_SUBSCRIPTION_GWS_QUEUE_COUNT (1 << 2) 34 + #define OVER_SUBSCRIPTION_XNACK_CONFLICT (1 << 3) 34 35 35 36 static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, 36 37 unsigned int buffer_size_bytes) ··· 45 44 46 45 static void pm_calc_rlib_size(struct packet_manager *pm, 47 46 unsigned int *rlib_size, 48 - int *over_subscription) 47 + int *over_subscription, 48 + int xnack_conflict) 49 49 { 50 50 unsigned int process_count, queue_count, compute_queue_count, gws_queue_count; 51 51 unsigned int map_queue_size; ··· 75 73 *over_subscription |= OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT; 76 74 if (gws_queue_count > 1) 77 75 *over_subscription |= OVER_SUBSCRIPTION_GWS_QUEUE_COUNT; 76 + if (xnack_conflict && (node->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN)) 77 + *over_subscription |= OVER_SUBSCRIPTION_XNACK_CONFLICT; 78 78 79 79 if (*over_subscription) 80 80 dev_dbg(dev, "Over subscribed runlist\n"); ··· 100 96 unsigned int **rl_buffer, 101 97 uint64_t *rl_gpu_buffer, 102 98 unsigned int *rl_buffer_size, 103 - int *is_over_subscription) 99 + int *is_over_subscription, 100 + int xnack_conflict) 104 101 { 105 102 struct kfd_node *node = pm->dqm->dev; 106 103 struct device *dev = node->adev->dev; ··· 110 105 if (WARN_ON(pm->allocated)) 111 106 return -EINVAL; 112 107 113 - pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription); 108 + pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription, 109 + xnack_conflict); 114 110 115 111 mutex_lock(&pm->lock); 116 112 ··· 148 142 struct queue *q; 149 143 struct kernel_queue *kq; 150 144 int is_over_subscription; 145 + int xnack_enabled = -1; 146 + bool xnack_conflict = 0; 151 147 152 148 rl_wptr = retval = processes_mapped = 0; 153 149 150 + /* Check if processes set different xnack modes */ 151 + list_for_each_entry(cur, queues, list) { 152 + qpd = cur->qpd; 153 + if (xnack_enabled < 0) 154 + /* First process */ 155 + xnack_enabled = qpd->pqm->process->xnack_enabled; 156 + else if (qpd->pqm->process->xnack_enabled != xnack_enabled) { 157 + /* Found a process with a different xnack mode */ 158 + xnack_conflict = 1; 159 + break; 160 + } 161 + } 162 + 154 163 retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr, 155 - &alloc_size_bytes, &is_over_subscription); 164 + &alloc_size_bytes, &is_over_subscription, 165 + xnack_conflict); 156 166 if (retval) 157 167 return retval; 158 168 ··· 178 156 dev_dbg(dev, "Building runlist ib process count: %d queues count %d\n", 179 157 pm->dqm->processes_count, pm->dqm->active_queue_count); 180 158 159 + build_runlist_ib: 181 160 /* build the run list ib packet */ 182 161 list_for_each_entry(cur, queues, list) { 183 162 qpd = cur->qpd; 163 + /* group processes with the same xnack mode together */ 164 + if (qpd->pqm->process->xnack_enabled != xnack_enabled) 165 + continue; 184 166 /* build map process packet */ 185 167 if (processes_mapped >= pm->dqm->processes_count) { 186 168 dev_dbg(dev, "Not enough space left in runlist IB\n"); ··· 241 215 alloc_size_bytes); 242 216 } 243 217 } 218 + if (xnack_conflict) { 219 + /* pick up processes with the other xnack mode */ 220 + xnack_enabled = !xnack_enabled; 221 + xnack_conflict = 0; 222 + goto build_runlist_ib; 223 + } 244 224 245 225 dev_dbg(dev, "Finished map process and queues to runlist\n"); 246 226 247 227 if (is_over_subscription) { 248 228 if (!pm->is_over_subscription) 249 - dev_warn(dev, "Runlist is getting oversubscribed due to%s%s%s. Expect reduced ROCm performance.\n", 250 - is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT ? 251 - " too many processes." : "", 252 - is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT ? 253 - " too many queues." : "", 254 - is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT ? 255 - " multiple processes using cooperative launch." : ""); 229 + dev_warn(dev, "Runlist is getting oversubscribed due to%s%s%s%s. Expect reduced ROCm performance.\n", 230 + is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT ? 231 + " too many processes" : "", 232 + is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT ? 233 + " too many queues" : "", 234 + is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT ? 235 + " multiple processes using cooperative launch" : "", 236 + is_over_subscription & OVER_SUBSCRIPTION_XNACK_CONFLICT ? 237 + " xnack on/off processes mixed on gfx9" : ""); 256 238 257 239 retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr], 258 240 *rl_gpu_addr,
+2
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
··· 203 203 queue_type__mes_set_resources__hsa_interface_queue_hiq; 204 204 packet->bitfields2.vmid_mask = res->vmid_mask; 205 205 packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100; 206 + if (pm->dqm->dev->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN) 207 + packet->bitfields2.enb_xnack_retry_disable_check = 1; 206 208 packet->bitfields7.oac_mask = res->oac_mask; 207 209 packet->bitfields8.gds_heap_base = res->gds_heap_base; 208 210 packet->bitfields8.gds_heap_size = res->gds_heap_size;
+2 -1
drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
··· 63 63 struct { 64 64 uint32_t vmid_mask:16; 65 65 uint32_t unmap_latency:8; 66 - uint32_t reserved1:5; 66 + uint32_t reserved1:4; 67 + uint32_t enb_xnack_retry_disable_check:1; 67 68 enum mes_set_resources_queue_type_enum queue_type:3; 68 69 } bitfields2; 69 70 uint32_t ordinal2;