Merge tag 'amd-drm-next-6.15-2025-03-21' of https://gitlab.freedesktop.org/agd5f/linux into drm-next

+11 -2

drivers/gpu/drm/amd/amdgpu/amdgpu.h

··· 1194 1194 bool debug_exp_resets; 1195 1195 bool debug_disable_gpu_ring_reset; 1196 1196 1197 - bool enforce_isolation[MAX_XCP]; 1198 - /* Added this mutex for cleaner shader isolation between GFX and compute processes */ 1197 + /* Protection for the following isolation structure */ 1199 1198 struct mutex enforce_isolation_mutex; 1199 + bool enforce_isolation[MAX_XCP]; 1200 + struct amdgpu_isolation { 1201 + void *owner; 1202 + struct dma_fence *spearhead; 1203 + struct amdgpu_sync active; 1204 + struct amdgpu_sync prev; 1205 + } isolation[MAX_XCP]; 1200 1206 1201 1207 struct amdgpu_init_level *init_lvl; 1202 1208 ··· 1488 1482 struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev); 1489 1483 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, 1490 1484 struct dma_fence *gang); 1485 + struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev, 1486 + struct amdgpu_ring *ring, 1487 + struct amdgpu_job *job); 1491 1488 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev); 1492 1489 ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring); 1493 1490 ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset);

+23 -2

drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c

··· 391 391 { 392 392 struct aca_bank_node *node; 393 393 struct aca_bank *bank; 394 + int r; 394 395 395 396 if (!adev->cper.enabled) 396 397 return; ··· 403 402 404 403 /* UEs must be encoded into separate CPER entries */ 405 404 if (type == ACA_SMU_TYPE_UE) { 405 + struct aca_banks de_banks; 406 + 407 + aca_banks_init(&de_banks); 406 408 list_for_each_entry(node, &banks->list, node) { 407 409 bank = &node->bank; 408 - if (amdgpu_cper_generate_ue_record(adev, bank)) 409 - dev_warn(adev->dev, "fail to generate ue cper records\n"); 410 + if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) { 411 + r = aca_banks_add_bank(&de_banks, bank); 412 + if (r) 413 + dev_warn(adev->dev, "fail to add de banks, ret = %d\n", r); 414 + } else { 415 + if (amdgpu_cper_generate_ue_record(adev, bank)) 416 + dev_warn(adev->dev, "fail to generate ue cper records\n"); 417 + } 410 418 } 419 + 420 + if (!list_empty(&de_banks.list)) { 421 + if (amdgpu_cper_generate_ce_records(adev, &de_banks, de_banks.nr_banks)) 422 + dev_warn(adev->dev, "fail to generate de cper records\n"); 423 + } 424 + 425 + aca_banks_release(&de_banks); 411 426 } else { 412 427 /* 413 428 * SMU_TYPE_CE banks are combined into 1 CPER entries, ··· 557 540 ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, qctx, NULL); 558 541 if (ret) 559 542 return ret; 543 + 544 + /* DEs may contain in CEs or UEs */ 545 + if (type != ACA_ERROR_TYPE_DEFERRED) 546 + aca_log_aca_error(handle, ACA_ERROR_TYPE_DEFERRED, err_data); 560 547 561 548 return aca_log_aca_error(handle, type, err_data); 562 549 }

+11 -5

drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h

··· 76 76 #define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */ 77 77 #define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */ 78 78 79 - #define ACA_BANK_ERR_CE_DE_DECODE(bank) \ 80 - ((ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \ 81 - ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS])) ? \ 82 - ACA_ERROR_TYPE_DEFERRED : \ 83 - ACA_ERROR_TYPE_CE) 79 + #define ACA_BANK_ERR_IS_DEFFERED(bank) \ 80 + (ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \ 81 + ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS])) 82 + 83 + #define ACA_BANK_ERR_CE_DE_DECODE(bank) \ 84 + (ACA_BANK_ERR_IS_DEFFERED(bank) ? ACA_ERROR_TYPE_DEFERRED : \ 85 + ACA_ERROR_TYPE_CE) 86 + 87 + #define ACA_BANK_ERR_UE_DE_DECODE(bank) \ 88 + (ACA_BANK_ERR_IS_DEFFERED(bank) ? ACA_ERROR_TYPE_DEFERRED : \ 89 + ACA_ERROR_TYPE_UE) 84 90 85 91 enum aca_reg_idx { 86 92 ACA_REG_IDX_CTL = 0,

+4 -4

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

··· 491 491 if (ret) 492 492 return ret; 493 493 494 - return amdgpu_sync_fence(sync, vm->last_update); 494 + return amdgpu_sync_fence(sync, vm->last_update, GFP_KERNEL); 495 495 } 496 496 497 497 static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) ··· 1249 1249 1250 1250 (void)amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); 1251 1251 1252 - (void)amdgpu_sync_fence(sync, bo_va->last_pt_update); 1252 + (void)amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL); 1253 1253 1254 1254 return 0; 1255 1255 } ··· 1273 1273 return ret; 1274 1274 } 1275 1275 1276 - return amdgpu_sync_fence(sync, bo_va->last_pt_update); 1276 + return amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL); 1277 1277 } 1278 1278 1279 1279 static int map_bo_to_gpuvm(struct kgd_mem *mem, ··· 2913 2913 } 2914 2914 dma_resv_for_each_fence(&cursor, bo->tbo.base.resv, 2915 2915 DMA_RESV_USAGE_KERNEL, fence) { 2916 - ret = amdgpu_sync_fence(&sync_obj, fence); 2916 + ret = amdgpu_sync_fence(&sync_obj, fence, GFP_KERNEL); 2917 2917 if (ret) { 2918 2918 pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); 2919 2919 goto validate_map_fail;

+8 -7

drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c

··· 455 455 return umin(rec_len, chunk); 456 456 } 457 457 458 - void amdgpu_cper_ring_write(struct amdgpu_ring *ring, 459 - void *src, int count) 458 + void amdgpu_cper_ring_write(struct amdgpu_ring *ring, void *src, int count) 460 459 { 461 460 u64 pos, wptr_old, rptr = *ring->rptr_cpu_addr & ring->ptr_mask; 461 + int rec_cnt_dw = count >> 2; 462 462 u32 chunk, ent_sz; 463 463 u8 *s = (u8 *)src; 464 464 ··· 485 485 s += chunk; 486 486 } 487 487 488 + if (ring->count_dw < rec_cnt_dw) 489 + ring->count_dw = 0; 490 + 488 491 /* the buffer is overflow, adjust rptr */ 489 492 if (((wptr_old < rptr) && (rptr <= ring->wptr)) || 490 493 ((ring->wptr < wptr_old) && (wptr_old < rptr)) || ··· 504 501 pos = rptr; 505 502 } while (!amdgpu_cper_is_hdr(ring, rptr)); 506 503 } 507 - mutex_unlock(&ring->adev->cper.ring_lock); 508 504 509 - if (ring->count_dw >= (count >> 2)) 510 - ring->count_dw -= (count >> 2); 511 - else 512 - ring->count_dw = 0; 505 + if (ring->count_dw >= rec_cnt_dw) 506 + ring->count_dw -= rec_cnt_dw; 507 + mutex_unlock(&ring->adev->cper.ring_lock); 513 508 } 514 509 515 510 static u64 amdgpu_cper_ring_get_rptr(struct amdgpu_ring *ring)

+12 -8

drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

··· 428 428 dma_fence_put(old); 429 429 } 430 430 431 - r = amdgpu_sync_fence(&p->sync, fence); 431 + r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL); 432 432 dma_fence_put(fence); 433 433 if (r) 434 434 return r; ··· 450 450 return r; 451 451 } 452 452 453 - r = amdgpu_sync_fence(&p->sync, fence); 453 + r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL); 454 454 dma_fence_put(fence); 455 455 return r; 456 456 } ··· 1111 1111 struct drm_gpu_scheduler *sched = entity->rq->sched; 1112 1112 struct amdgpu_ring *ring = to_amdgpu_ring(sched); 1113 1113 1114 - if (amdgpu_vmid_uses_reserved(adev, vm, ring->vm_hub)) 1114 + if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub)) 1115 1115 return -EINVAL; 1116 1116 } 1117 1117 } ··· 1124 1124 if (r) 1125 1125 return r; 1126 1126 1127 - r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update); 1127 + r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update, 1128 + GFP_KERNEL); 1128 1129 if (r) 1129 1130 return r; 1130 1131 ··· 1136 1135 if (r) 1137 1136 return r; 1138 1137 1139 - r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); 1138 + r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update, 1139 + GFP_KERNEL); 1140 1140 if (r) 1141 1141 return r; 1142 1142 } ··· 1156 1154 if (r) 1157 1155 return r; 1158 1156 1159 - r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); 1157 + r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update, 1158 + GFP_KERNEL); 1160 1159 if (r) 1161 1160 return r; 1162 1161 } ··· 1170 1167 if (r) 1171 1168 return r; 1172 1169 1173 - r = amdgpu_sync_fence(&p->sync, vm->last_update); 1170 + r = amdgpu_sync_fence(&p->sync, vm->last_update, GFP_KERNEL); 1174 1171 if (r) 1175 1172 return r; 1176 1173 ··· 1251 1248 continue; 1252 1249 } 1253 1250 1254 - r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); 1251 + r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence, 1252 + GFP_KERNEL); 1255 1253 dma_fence_put(fence); 1256 1254 if (r) 1257 1255 return r;

+144 -13

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

··· 227 227 static DEVICE_ATTR(pcie_replay_count, 0444, 228 228 amdgpu_device_get_pcie_replay_count, NULL); 229 229 230 + static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev) 231 + { 232 + int ret = 0; 233 + 234 + if (!amdgpu_sriov_vf(adev)) 235 + ret = sysfs_create_file(&adev->dev->kobj, 236 + &dev_attr_pcie_replay_count.attr); 237 + 238 + return ret; 239 + } 240 + 241 + static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev) 242 + { 243 + if (!amdgpu_sriov_vf(adev)) 244 + sysfs_remove_file(&adev->dev->kobj, 245 + &dev_attr_pcie_replay_count.attr); 246 + } 247 + 230 248 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj, 231 249 struct bin_attribute *attr, char *buf, 232 250 loff_t ppos, size_t count) ··· 4190 4172 } 4191 4173 #endif 4192 4174 4193 - static const struct attribute *amdgpu_dev_attributes[] = { 4194 - &dev_attr_pcie_replay_count.attr, 4195 - NULL 4196 - }; 4197 - 4198 4175 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) 4199 4176 { 4200 4177 if (amdgpu_mcbp == 1) ··· 4294 4281 mutex_init(&adev->gfx.reset_sem_mutex); 4295 4282 /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */ 4296 4283 mutex_init(&adev->enforce_isolation_mutex); 4284 + for (i = 0; i < MAX_XCP; ++i) { 4285 + adev->isolation[i].spearhead = dma_fence_get_stub(); 4286 + amdgpu_sync_create(&adev->isolation[i].active); 4287 + amdgpu_sync_create(&adev->isolation[i].prev); 4288 + } 4297 4289 mutex_init(&adev->gfx.kfd_sch_mutex); 4290 + mutex_init(&adev->gfx.workload_profile_mutex); 4291 + mutex_init(&adev->vcn.workload_profile_mutex); 4298 4292 4299 4293 amdgpu_device_init_apu_flags(adev); 4300 4294 ··· 4419 4399 if (r) 4420 4400 return r; 4421 4401 4422 - /* Get rid of things like offb */ 4423 - r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name); 4424 - if (r) 4425 - return r; 4402 + /* 4403 + * No need to remove conflicting FBs for non-display class devices. 4404 + * This prevents the sysfb from being freed accidently. 4405 + */ 4406 + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA || 4407 + (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) { 4408 + /* Get rid of things like offb */ 4409 + r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name); 4410 + if (r) 4411 + return r; 4412 + } 4426 4413 4427 4414 /* Enable TMZ based on IP_VERSION */ 4428 4415 amdgpu_gmc_tmz_set(adev); ··· 4640 4613 } else 4641 4614 adev->ucode_sysfs_en = true; 4642 4615 4643 - r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes); 4616 + r = amdgpu_device_attr_sysfs_init(adev); 4644 4617 if (r) 4645 4618 dev_err(adev->dev, "Could not create amdgpu device attr\n"); 4646 4619 ··· 4777 4750 amdgpu_pm_sysfs_fini(adev); 4778 4751 if (adev->ucode_sysfs_en) 4779 4752 amdgpu_ucode_sysfs_fini(adev); 4780 - sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); 4753 + amdgpu_device_attr_sysfs_fini(adev); 4781 4754 amdgpu_fru_sysfs_fini(adev); 4782 4755 4783 4756 amdgpu_reg_state_sysfs_fini(adev); ··· 4804 4777 4805 4778 void amdgpu_device_fini_sw(struct amdgpu_device *adev) 4806 4779 { 4807 - int idx; 4780 + int i, idx; 4808 4781 bool px; 4809 4782 4810 4783 amdgpu_device_ip_fini(adev); ··· 4812 4785 amdgpu_ucode_release(&adev->firmware.gpu_info_fw); 4813 4786 adev->accel_working = false; 4814 4787 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true)); 4788 + for (i = 0; i < MAX_XCP; ++i) { 4789 + dma_fence_put(adev->isolation[i].spearhead); 4790 + amdgpu_sync_free(&adev->isolation[i].active); 4791 + amdgpu_sync_free(&adev->isolation[i].prev); 4792 + } 4815 4793 4816 4794 amdgpu_reset_fini(adev); 4817 4795 ··· 4831 4799 4832 4800 kfree(adev->fru_info); 4833 4801 adev->fru_info = NULL; 4802 + 4803 + kfree(adev->xcp_mgr); 4804 + adev->xcp_mgr = NULL; 4834 4805 4835 4806 px = amdgpu_device_supports_px(adev_to_drm(adev)); 4836 4807 ··· 5366 5331 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || 5367 5332 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || 5368 5333 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || 5334 + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) || 5369 5335 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) 5370 5336 amdgpu_ras_resume(adev); 5371 5337 ··· 6939 6903 { 6940 6904 struct dma_fence *old = NULL; 6941 6905 6906 + dma_fence_get(gang); 6942 6907 do { 6943 6908 dma_fence_put(old); 6944 6909 old = amdgpu_device_get_gang(adev); 6945 6910 if (old == gang) 6946 6911 break; 6947 6912 6948 - if (!dma_fence_is_signaled(old)) 6913 + if (!dma_fence_is_signaled(old)) { 6914 + dma_fence_put(gang); 6949 6915 return old; 6916 + } 6950 6917 6951 6918 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit, 6952 6919 old, gang) != old); 6953 6920 6921 + /* 6922 + * Drop it once for the exchanged reference in adev and once for the 6923 + * thread local reference acquired in amdgpu_device_get_gang(). 6924 + */ 6925 + dma_fence_put(old); 6954 6926 dma_fence_put(old); 6955 6927 return NULL; 6928 + } 6929 + 6930 + /** 6931 + * amdgpu_device_enforce_isolation - enforce HW isolation 6932 + * @adev: the amdgpu device pointer 6933 + * @ring: the HW ring the job is supposed to run on 6934 + * @job: the job which is about to be pushed to the HW ring 6935 + * 6936 + * Makes sure that only one client at a time can use the GFX block. 6937 + * Returns: The dependency to wait on before the job can be pushed to the HW. 6938 + * The function is called multiple times until NULL is returned. 6939 + */ 6940 + struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev, 6941 + struct amdgpu_ring *ring, 6942 + struct amdgpu_job *job) 6943 + { 6944 + struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id]; 6945 + struct drm_sched_fence *f = job->base.s_fence; 6946 + struct dma_fence *dep; 6947 + void *owner; 6948 + int r; 6949 + 6950 + /* 6951 + * For now enforce isolation only for the GFX block since we only need 6952 + * the cleaner shader on those rings. 6953 + */ 6954 + if (ring->funcs->type != AMDGPU_RING_TYPE_GFX && 6955 + ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 6956 + return NULL; 6957 + 6958 + /* 6959 + * All submissions where enforce isolation is false are handled as if 6960 + * they come from a single client. Use ~0l as the owner to distinct it 6961 + * from kernel submissions where the owner is NULL. 6962 + */ 6963 + owner = job->enforce_isolation ? f->owner : (void *)~0l; 6964 + 6965 + mutex_lock(&adev->enforce_isolation_mutex); 6966 + 6967 + /* 6968 + * The "spearhead" submission is the first one which changes the 6969 + * ownership to its client. We always need to wait for it to be 6970 + * pushed to the HW before proceeding with anything. 6971 + */ 6972 + if (&f->scheduled != isolation->spearhead && 6973 + !dma_fence_is_signaled(isolation->spearhead)) { 6974 + dep = isolation->spearhead; 6975 + goto out_grab_ref; 6976 + } 6977 + 6978 + if (isolation->owner != owner) { 6979 + 6980 + /* 6981 + * Wait for any gang to be assembled before switching to a 6982 + * different owner or otherwise we could deadlock the 6983 + * submissions. 6984 + */ 6985 + if (!job->gang_submit) { 6986 + dep = amdgpu_device_get_gang(adev); 6987 + if (!dma_fence_is_signaled(dep)) 6988 + goto out_return_dep; 6989 + dma_fence_put(dep); 6990 + } 6991 + 6992 + dma_fence_put(isolation->spearhead); 6993 + isolation->spearhead = dma_fence_get(&f->scheduled); 6994 + amdgpu_sync_move(&isolation->active, &isolation->prev); 6995 + trace_amdgpu_isolation(isolation->owner, owner); 6996 + isolation->owner = owner; 6997 + } 6998 + 6999 + /* 7000 + * Specifying the ring here helps to pipeline submissions even when 7001 + * isolation is enabled. If that is not desired for testing NULL can be 7002 + * used instead of the ring to enforce a CPU round trip while switching 7003 + * between clients. 7004 + */ 7005 + dep = amdgpu_sync_peek_fence(&isolation->prev, ring); 7006 + r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT); 7007 + if (r) 7008 + DRM_WARN("OOM tracking isolation\n"); 7009 + 7010 + out_grab_ref: 7011 + dma_fence_get(dep); 7012 + out_return_dep: 7013 + mutex_unlock(&adev->enforce_isolation_mutex); 7014 + return dep; 6956 7015 } 6957 7016 6958 7017 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)

+83 -27

drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c

··· 113 113 #include "amdgpu_isp.h" 114 114 #endif 115 115 116 - #define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin" 117 - MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY); 116 + MODULE_FIRMWARE("amdgpu/ip_discovery.bin"); 117 + MODULE_FIRMWARE("amdgpu/vega10_ip_discovery.bin"); 118 + MODULE_FIRMWARE("amdgpu/vega12_ip_discovery.bin"); 119 + MODULE_FIRMWARE("amdgpu/vega20_ip_discovery.bin"); 120 + MODULE_FIRMWARE("amdgpu/raven_ip_discovery.bin"); 121 + MODULE_FIRMWARE("amdgpu/raven2_ip_discovery.bin"); 122 + MODULE_FIRMWARE("amdgpu/picasso_ip_discovery.bin"); 118 123 119 124 #define mmIP_DISCOVERY_VERSION 0x16A00 120 125 #define mmRCC_CONFIG_MEMSIZE 0xde3 ··· 302 297 return ret; 303 298 } 304 299 305 - static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, uint8_t *binary) 300 + static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, 301 + uint8_t *binary, 302 + const char *fw_name) 306 303 { 307 304 const struct firmware *fw; 308 - const char *fw_name; 309 305 int r; 310 - 311 - switch (amdgpu_discovery) { 312 - case 2: 313 - fw_name = FIRMWARE_IP_DISCOVERY; 314 - break; 315 - default: 316 - dev_warn(adev->dev, "amdgpu_discovery is not set properly\n"); 317 - return -EINVAL; 318 - } 319 306 320 307 r = request_firmware(&fw, fw_name, adev->dev); 321 308 if (r) { ··· 401 404 return 0; 402 405 } 403 406 407 + static const char *amdgpu_discovery_get_fw_name(struct amdgpu_device *adev) 408 + { 409 + if (amdgpu_discovery == 2) 410 + return "amdgpu/ip_discovery.bin"; 411 + 412 + switch (adev->asic_type) { 413 + case CHIP_VEGA10: 414 + return "amdgpu/vega10_ip_discovery.bin"; 415 + case CHIP_VEGA12: 416 + return "amdgpu/vega12_ip_discovery.bin"; 417 + case CHIP_RAVEN: 418 + if (adev->apu_flags & AMD_APU_IS_RAVEN2) 419 + return "amdgpu/raven2_ip_discovery.bin"; 420 + else if (adev->apu_flags & AMD_APU_IS_PICASSO) 421 + return "amdgpu/picasso_ip_discovery.bin"; 422 + else 423 + return "amdgpu/raven_ip_discovery.bin"; 424 + case CHIP_VEGA20: 425 + return "amdgpu/vega20_ip_discovery.bin"; 426 + case CHIP_ARCTURUS: 427 + return "amdgpu/arcturus_ip_discovery.bin"; 428 + case CHIP_ALDEBARAN: 429 + return "amdgpu/aldebaran_ip_discovery.bin"; 430 + default: 431 + return NULL; 432 + } 433 + } 434 + 404 435 static int amdgpu_discovery_init(struct amdgpu_device *adev) 405 436 { 406 437 struct table_info *info; 407 438 struct binary_header *bhdr; 439 + const char *fw_name; 408 440 uint16_t offset; 409 441 uint16_t size; 410 442 uint16_t checksum; ··· 445 419 return -ENOMEM; 446 420 447 421 /* Read from file if it is the preferred option */ 448 - if (amdgpu_discovery == 2) { 422 + fw_name = amdgpu_discovery_get_fw_name(adev); 423 + if (fw_name != NULL) { 449 424 dev_info(adev->dev, "use ip discovery information from file"); 450 - r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin); 425 + r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin, fw_name); 451 426 452 427 if (r) { 453 428 dev_err(adev->dev, "failed to read ip discovery binary from file\n"); ··· 1317 1290 uint16_t die_offset; 1318 1291 uint16_t ip_offset; 1319 1292 uint16_t num_dies; 1293 + uint32_t wafl_ver; 1320 1294 uint16_t num_ips; 1321 1295 uint16_t hw_id; 1322 1296 uint8_t inst; ··· 1331 1303 return r; 1332 1304 } 1333 1305 1306 + wafl_ver = 0; 1334 1307 adev->gfx.xcc_mask = 0; 1335 1308 adev->sdma.sdma_mask = 0; 1336 1309 adev->vcn.inst_mask = 0; ··· 1432 1403 adev->gfx.xcc_mask |= 1433 1404 (1U << ip->instance_number); 1434 1405 1406 + if (!wafl_ver && le16_to_cpu(ip->hw_id) == WAFLC_HWID) 1407 + wafl_ver = IP_VERSION_FULL(ip->major, ip->minor, 1408 + ip->revision, 0, 0); 1409 + 1435 1410 for (k = 0; k < num_base_address; k++) { 1436 1411 /* 1437 1412 * convert the endianness of base addresses in place, ··· 1500 1467 ip_offset += struct_size(ip, base_address, ip->num_base_address); 1501 1468 } 1502 1469 } 1470 + 1471 + if (wafl_ver && !adev->ip_versions[XGMI_HWIP][0]) 1472 + adev->ip_versions[XGMI_HWIP][0] = wafl_ver; 1503 1473 1504 1474 return 0; 1505 1475 } ··· 2547 2511 2548 2512 switch (adev->asic_type) { 2549 2513 case CHIP_VEGA10: 2514 + case CHIP_VEGA12: 2515 + case CHIP_RAVEN: 2516 + case CHIP_VEGA20: 2517 + case CHIP_ARCTURUS: 2518 + case CHIP_ALDEBARAN: 2519 + /* this is not fatal. We have a fallback below 2520 + * if the new firmwares are not present. some of 2521 + * this will be overridden below to keep things 2522 + * consistent with the current behavior. 2523 + */ 2524 + r = amdgpu_discovery_reg_base_init(adev); 2525 + if (!r) { 2526 + amdgpu_discovery_harvest_ip(adev); 2527 + amdgpu_discovery_get_gfx_info(adev); 2528 + amdgpu_discovery_get_mall_info(adev); 2529 + amdgpu_discovery_get_vcn_info(adev); 2530 + } 2531 + break; 2532 + default: 2533 + r = amdgpu_discovery_reg_base_init(adev); 2534 + if (r) 2535 + return -EINVAL; 2536 + 2537 + amdgpu_discovery_harvest_ip(adev); 2538 + amdgpu_discovery_get_gfx_info(adev); 2539 + amdgpu_discovery_get_mall_info(adev); 2540 + amdgpu_discovery_get_vcn_info(adev); 2541 + break; 2542 + } 2543 + 2544 + switch (adev->asic_type) { 2545 + case CHIP_VEGA10: 2550 2546 vega10_reg_base_init(adev); 2551 2547 adev->sdma.num_instances = 2; 2552 2548 adev->gmc.num_umc = 4; ··· 2741 2673 adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0); 2742 2674 break; 2743 2675 default: 2744 - r = amdgpu_discovery_reg_base_init(adev); 2745 - if (r) 2746 - return -EINVAL; 2747 - 2748 - amdgpu_discovery_harvest_ip(adev); 2749 - amdgpu_discovery_get_gfx_info(adev); 2750 - amdgpu_discovery_get_mall_info(adev); 2751 - amdgpu_discovery_get_vcn_info(adev); 2752 2676 break; 2753 2677 } 2754 2678 ··· 2831 2771 default: 2832 2772 break; 2833 2773 } 2834 - 2835 - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || 2836 - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) 2837 - adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 4, 0); 2838 2774 2839 2775 /* set NBIO version */ 2840 2776 switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {

+24 -3

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

··· 139 139 AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4), 140 140 AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5), 141 141 AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6), 142 + AMDGPU_DEBUG_SMU_POOL = BIT(7), 142 143 }; 143 144 144 145 unsigned int amdgpu_vram_limit = UINT_MAX; ··· 177 176 char *amdgpu_disable_cu; 178 177 char *amdgpu_virtual_display; 179 178 bool enforce_isolation; 179 + int amdgpu_modeset = -1; 180 180 181 181 /* Specifies the default granularity for SVM, used in buffer 182 182 * migration and restoration of backing memory when handling ··· 1040 1038 MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on"); 1041 1039 1042 1040 /** 1041 + * DOC: modeset (int) 1042 + * Override nomodeset (1 = override, -1 = auto). The default is -1 (auto). 1043 + */ 1044 + MODULE_PARM_DESC(modeset, "Override nomodeset (1 = enable, -1 = auto)"); 1045 + module_param_named(modeset, amdgpu_modeset, int, 0444); 1046 + 1047 + /** 1043 1048 * DOC: seamless (int) 1044 1049 * Seamless boot will keep the image on the screen during the boot process. 1045 1050 */ ··· 1062 1053 * limits the VRAM size reported to ROCm applications to the visible 1063 1054 * size, usually 256MB. 1064 1055 * - 0x4: Disable GPU soft recovery, always do a full reset 1056 + * - 0x8: Use VRAM for firmware loading 1057 + * - 0x10: Enable ACA based RAS logging 1058 + * - 0x20: Enable experimental resets 1059 + * - 0x40: Disable ring resets 1060 + * - 0x80: Use VRAM for SMU pool 1065 1061 */ 1066 1062 MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default"); 1067 1063 module_param_named_unsafe(debug_mask, amdgpu_debug_mask, uint, 0444); ··· 2244 2230 pr_info("debug: ring reset disabled\n"); 2245 2231 adev->debug_disable_gpu_ring_reset = true; 2246 2232 } 2233 + if (amdgpu_debug_mask & AMDGPU_DEBUG_SMU_POOL) { 2234 + pr_info("debug: use vram for smu pool\n"); 2235 + adev->pm.smu_debug_mask |= SMU_DEBUG_POOL_USE_VRAM; 2236 + } 2247 2237 } 2248 2238 2249 2239 static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) ··· 2274 2256 unsigned long flags = ent->driver_data; 2275 2257 int ret, retry = 0, i; 2276 2258 bool supports_atomic = false; 2259 + 2260 + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA || 2261 + (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) { 2262 + if (drm_firmware_drivers_only() && amdgpu_modeset == -1) 2263 + return -EINVAL; 2264 + } 2277 2265 2278 2266 /* skip devices which are owned by radeon */ 2279 2267 for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) { ··· 3013 2989 static int __init amdgpu_init(void) 3014 2990 { 3015 2991 int r; 3016 - 3017 - if (drm_firmware_drivers_only()) 3018 - return -EINVAL; 3019 2992 3020 2993 r = amdgpu_sync_init(); 3021 2994 if (r)

+24 -14

drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

··· 1665 1665 } 1666 1666 1667 1667 mutex_lock(&adev->enforce_isolation_mutex); 1668 - for (i = 0; i < num_partitions; i++) { 1669 - if (adev->enforce_isolation[i] && !partition_values[i]) 1670 - /* Going from enabled to disabled */ 1671 - amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i)); 1672 - else if (!adev->enforce_isolation[i] && partition_values[i]) 1673 - /* Going from disabled to enabled */ 1674 - amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); 1668 + for (i = 0; i < num_partitions; i++) 1675 1669 adev->enforce_isolation[i] = partition_values[i]; 1676 - } 1677 1670 mutex_unlock(&adev->enforce_isolation_mutex); 1678 1671 1679 1672 amdgpu_mes_update_enforce_isolation(adev); ··· 2153 2160 for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) 2154 2161 fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]); 2155 2162 if (!fences && !atomic_read(&adev->gfx.total_submission_cnt)) { 2156 - r = amdgpu_dpm_switch_power_profile(adev, profile, false); 2157 - if (r) 2158 - dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r, 2159 - profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ? 2160 - "fullscreen 3D" : "compute"); 2163 + mutex_lock(&adev->gfx.workload_profile_mutex); 2164 + if (adev->gfx.workload_profile_active) { 2165 + r = amdgpu_dpm_switch_power_profile(adev, profile, false); 2166 + if (r) 2167 + dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r, 2168 + profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ? 2169 + "fullscreen 3D" : "compute"); 2170 + adev->gfx.workload_profile_active = false; 2171 + } 2172 + mutex_unlock(&adev->gfx.workload_profile_mutex); 2161 2173 } else { 2162 2174 schedule_delayed_work(&adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT); 2163 2175 } ··· 2181 2183 2182 2184 atomic_inc(&adev->gfx.total_submission_cnt); 2183 2185 2184 - if (!cancel_delayed_work_sync(&adev->gfx.idle_work)) { 2186 + cancel_delayed_work_sync(&adev->gfx.idle_work); 2187 + 2188 + /* We can safely return early here because we've cancelled the 2189 + * the delayed work so there is no one else to set it to false 2190 + * and we don't care if someone else sets it to true. 2191 + */ 2192 + if (adev->gfx.workload_profile_active) 2193 + return; 2194 + 2195 + mutex_lock(&adev->gfx.workload_profile_mutex); 2196 + if (!adev->gfx.workload_profile_active) { 2185 2197 r = amdgpu_dpm_switch_power_profile(adev, profile, true); 2186 2198 if (r) 2187 2199 dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r, 2188 2200 profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ? 2189 2201 "fullscreen 3D" : "compute"); 2202 + adev->gfx.workload_profile_active = true; 2190 2203 } 2204 + mutex_unlock(&adev->gfx.workload_profile_mutex); 2191 2205 } 2192 2206 2193 2207 void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring)

+2

drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h

··· 482 482 483 483 atomic_t total_submission_cnt; 484 484 struct delayed_work idle_work; 485 + bool workload_profile_active; 486 + struct mutex workload_profile_mutex; 485 487 }; 486 488 487 489 struct amdgpu_gfx_ras_reg_entry {

+20

drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c

··· 573 573 unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = {0}; 574 574 unsigned i; 575 575 unsigned vmhub, inv_eng; 576 + struct amdgpu_ring *shared_ring; 576 577 577 578 /* init the vm inv eng for all vmhubs */ 578 579 for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) { ··· 596 595 ring == &adev->cper.ring_buf) 597 596 continue; 598 597 598 + /* Skip if the ring is a shared ring */ 599 + if (amdgpu_sdma_is_shared_inv_eng(adev, ring)) 600 + continue; 601 + 599 602 inv_eng = ffs(vm_inv_engs[vmhub]); 600 603 if (!inv_eng) { 601 604 dev_err(adev->dev, "no VM inv eng for ring %s\n", ··· 612 607 613 608 dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n", 614 609 ring->name, ring->vm_inv_eng, ring->vm_hub); 610 + /* SDMA has a special packet which allows it to use the same 611 + * invalidation engine for all the rings in one instance. 612 + * Therefore, we do not allocate a separate VM invalidation engine 613 + * for SDMA page rings. Instead, they share the VM invalidation 614 + * engine with the SDMA gfx ring. This change ensures efficient 615 + * resource management and avoids the issue of insufficient VM 616 + * invalidation engines. 617 + */ 618 + shared_ring = amdgpu_sdma_get_shared_ring(adev, ring); 619 + if (shared_ring) { 620 + shared_ring->vm_inv_eng = ring->vm_inv_eng; 621 + dev_info(adev->dev, "ring %s shares VM invalidation engine %u with ring %s on hub %u\n", 622 + ring->name, ring->vm_inv_eng, shared_ring->name, ring->vm_hub); 623 + continue; 624 + } 615 625 } 616 626 617 627 return 0;

+25 -40

drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c

··· 209 209 return 0; 210 210 } 211 211 212 - fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL); 212 + fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_NOWAIT); 213 213 if (!fences) 214 214 return -ENOMEM; 215 215 ··· 287 287 (*id)->flushed_updates < updates || 288 288 !(*id)->last_flush || 289 289 ((*id)->last_flush->context != fence_context && 290 - !dma_fence_is_signaled((*id)->last_flush))) { 290 + !dma_fence_is_signaled((*id)->last_flush))) 291 + needs_flush = true; 292 + 293 + if ((*id)->owner != vm->immediate.fence_context || 294 + (!adev->vm_manager.concurrent_flush && needs_flush)) { 291 295 struct dma_fence *tmp; 292 296 293 - /* Wait for the gang to be assembled before using a 294 - * reserved VMID or otherwise the gang could deadlock. 297 + /* Don't use per engine and per process VMID at the 298 + * same time 295 299 */ 296 - tmp = amdgpu_device_get_gang(adev); 297 - if (!dma_fence_is_signaled(tmp) && tmp != job->gang_submit) { 300 + if (adev->vm_manager.concurrent_flush) 301 + ring = NULL; 302 + 303 + /* to prevent one context starved by another context */ 304 + (*id)->pd_gpu_addr = 0; 305 + tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); 306 + if (tmp) { 298 307 *id = NULL; 299 - *fence = tmp; 308 + *fence = dma_fence_get(tmp); 300 309 return 0; 301 310 } 302 - dma_fence_put(tmp); 303 - 304 - /* Make sure the id is owned by the gang before proceeding */ 305 - if (!job->gang_submit || 306 - (*id)->owner != vm->immediate.fence_context) { 307 - 308 - /* Don't use per engine and per process VMID at the 309 - * same time 310 - */ 311 - if (adev->vm_manager.concurrent_flush) 312 - ring = NULL; 313 - 314 - /* to prevent one context starved by another context */ 315 - (*id)->pd_gpu_addr = 0; 316 - tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); 317 - if (tmp) { 318 - *id = NULL; 319 - *fence = dma_fence_get(tmp); 320 - return 0; 321 - } 322 - } 323 - needs_flush = true; 324 311 } 325 312 326 313 /* Good we can use this VMID. Remember this submission as 327 314 * user of the VMID. 328 315 */ 329 - r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished); 316 + r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished, 317 + GFP_NOWAIT); 330 318 if (r) 331 319 return r; 332 320 ··· 373 385 * user of the VMID. 374 386 */ 375 387 r = amdgpu_sync_fence(&(*id)->active, 376 - &job->base.s_fence->finished); 388 + &job->base.s_fence->finished, 389 + GFP_NOWAIT); 377 390 if (r) 378 391 return r; 379 392 ··· 411 422 if (r || !idle) 412 423 goto error; 413 424 414 - if (amdgpu_vmid_uses_reserved(adev, vm, vmhub)) { 425 + if (amdgpu_vmid_uses_reserved(vm, vmhub)) { 415 426 r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence); 416 427 if (r || !id) 417 428 goto error; ··· 426 437 427 438 /* Remember this submission as user of the VMID */ 428 439 r = amdgpu_sync_fence(&id->active, 429 - &job->base.s_fence->finished); 440 + &job->base.s_fence->finished, 441 + GFP_NOWAIT); 430 442 if (r) 431 443 goto error; 432 444 ··· 464 474 465 475 /* 466 476 * amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID 467 - * @adev: amdgpu_device pointer 468 477 * @vm: the VM to check 469 478 * @vmhub: the VMHUB which will be used 470 479 * 471 480 * Returns: True if the VM will use a reserved VMID. 472 481 */ 473 - bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev, 474 - struct amdgpu_vm *vm, unsigned int vmhub) 482 + bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub) 475 483 { 476 - return vm->reserved_vmid[vmhub] || 477 - (adev->enforce_isolation[(vm->root.bo->xcp_id != AMDGPU_XCP_NO_PARTITION) ? 478 - vm->root.bo->xcp_id : 0] && 479 - AMDGPU_IS_GFXHUB(vmhub)); 484 + return vm->reserved_vmid[vmhub]; 480 485 } 481 486 482 487 int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,

+1 -2

drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h

··· 78 78 79 79 bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, 80 80 struct amdgpu_vmid *id); 81 - bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev, 82 - struct amdgpu_vm *vm, unsigned int vmhub); 81 + bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub); 83 82 int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, 84 83 unsigned vmhub); 85 84 void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,

+12 -4

drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

··· 361 361 { 362 362 struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched); 363 363 struct amdgpu_job *job = to_amdgpu_job(sched_job); 364 - struct dma_fence *fence = NULL; 364 + struct dma_fence *fence; 365 365 int r; 366 366 367 367 r = drm_sched_entity_error(s_entity); 368 368 if (r) 369 369 goto error; 370 370 371 - if (job->gang_submit) 371 + if (job->gang_submit) { 372 372 fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit); 373 + if (fence) 374 + return fence; 375 + } 373 376 374 - if (!fence && job->vm && !job->vmid) { 377 + fence = amdgpu_device_enforce_isolation(ring->adev, ring, job); 378 + if (fence) 379 + return fence; 380 + 381 + if (job->vm && !job->vmid) { 375 382 r = amdgpu_vmid_grab(job->vm, ring, job, &fence); 376 383 if (r) { 377 384 dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r); ··· 391 384 */ 392 385 if (!fence) 393 386 job->vm = NULL; 387 + return fence; 394 388 } 395 389 396 - return fence; 390 + return NULL; 397 391 398 392 error: 399 393 dma_fence_set_error(&job->base.s_fence->finished, r);

+7 -13

drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c

··· 145 145 adev->mes.vmid_mask_gfxhub = 0xffffff00; 146 146 147 147 for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) { 148 - /* use only 1st MEC pipes */ 149 - if (i >= adev->gfx.mec.num_pipe_per_mec) 150 - continue; 148 + if (i >= (adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec)) 149 + break; 151 150 adev->mes.compute_hqd_mask[i] = 0xc; 152 151 } 153 152 ··· 154 155 adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe; 155 156 156 157 for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) { 157 - if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < 158 - IP_VERSION(6, 0, 0)) 159 - adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc; 160 - /* zero sdma_hqd_mask for non-existent engine */ 161 - else if (adev->sdma.num_instances == 1) 162 - adev->mes.sdma_hqd_mask[i] = i ? 0 : 0xfc; 163 - else 164 - adev->mes.sdma_hqd_mask[i] = 0xfc; 158 + if (i >= adev->sdma.num_instances) 159 + break; 160 + adev->mes.sdma_hqd_mask[i] = 0xfc; 165 161 } 166 162 167 163 for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { ··· 1330 1336 DRM_ERROR("failed to do vm_bo_update on meta data\n"); 1331 1337 goto error_del_bo_va; 1332 1338 } 1333 - amdgpu_sync_fence(&sync, bo_va->last_pt_update); 1339 + amdgpu_sync_fence(&sync, bo_va->last_pt_update, GFP_KERNEL); 1334 1340 1335 1341 r = amdgpu_vm_update_pdes(adev, vm, false); 1336 1342 if (r) { 1337 1343 DRM_ERROR("failed to update pdes on meta data\n"); 1338 1344 goto error_del_bo_va; 1339 1345 } 1340 - amdgpu_sync_fence(&sync, vm->last_update); 1346 + amdgpu_sync_fence(&sync, vm->last_update, GFP_KERNEL); 1341 1347 1342 1348 amdgpu_sync_wait(&sync, false); 1343 1349 drm_exec_fini(&exec);

+4

drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c

··· 153 153 adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA; 154 154 ret = psp_init_cap_microcode(psp, ucode_prefix); 155 155 break; 156 + case IP_VERSION(13, 0, 12): 157 + ret = psp_init_ta_microcode(psp, ucode_prefix); 158 + break; 156 159 default: 157 160 return -EINVAL; 158 161 } ··· 1864 1861 if (adev->gmc.gmc_funcs->query_mem_partition_mode) 1865 1862 ras_cmd->ras_in_message.init_flags.nps_mode = 1866 1863 adev->gmc.gmc_funcs->query_mem_partition_mode(adev); 1864 + ras_cmd->ras_in_message.init_flags.active_umc_mask = adev->umc.active_mask; 1867 1865 1868 1866 ret = psp_ta_load(psp, &psp->ras_context.context); 1869 1867

+7

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

··· 3473 3473 adev, control->bad_channel_bitmap); 3474 3474 con->update_channel_flag = false; 3475 3475 } 3476 + 3477 + /* The format action is only applied to new ASICs */ 3478 + if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) >= 12 && 3479 + control->tbl_hdr.version < RAS_TABLE_VER_V3) 3480 + if (!amdgpu_ras_eeprom_reset_table(control)) 3481 + if (amdgpu_ras_save_bad_pages(adev, NULL)) 3482 + dev_warn(adev->dev, "Failed to format RAS EEPROM data in V3 version!\n"); 3476 3483 } 3477 3484 3478 3485 return ret;

+16 -12

drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c

··· 161 161 case IP_VERSION(13, 0, 10): 162 162 return true; 163 163 case IP_VERSION(13, 0, 6): 164 + case IP_VERSION(13, 0, 12): 164 165 case IP_VERSION(13, 0, 14): 165 166 return (adev->gmc.is_app_apu) ? false : true; 166 167 default: ··· 224 223 return true; 225 224 case IP_VERSION(13, 0, 6): 226 225 case IP_VERSION(13, 0, 10): 226 + case IP_VERSION(13, 0, 12): 227 227 case IP_VERSION(13, 0, 14): 228 228 control->i2c_address = EEPROM_I2C_MADDR_4; 229 229 return true; ··· 415 413 416 414 switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { 417 415 case IP_VERSION(8, 10, 0): 418 - case IP_VERSION(12, 0, 0): 419 416 hdr->version = RAS_TABLE_VER_V2_1; 417 + return; 418 + case IP_VERSION(12, 0, 0): 419 + hdr->version = RAS_TABLE_VER_V3; 420 420 return; 421 421 default: 422 422 hdr->version = RAS_TABLE_VER_V1; ··· 447 443 hdr->header = RAS_TABLE_HDR_VAL; 448 444 amdgpu_ras_set_eeprom_table_version(control); 449 445 450 - if (hdr->version == RAS_TABLE_VER_V2_1) { 446 + if (hdr->version >= RAS_TABLE_VER_V2_1) { 451 447 hdr->first_rec_offset = RAS_RECORD_START_V2_1; 452 448 hdr->tbl_size = RAS_TABLE_HEADER_SIZE + 453 449 RAS_TABLE_V2_1_INFO_SIZE; ··· 465 461 } 466 462 467 463 csum = __calc_hdr_byte_sum(control); 468 - if (hdr->version == RAS_TABLE_VER_V2_1) 464 + if (hdr->version >= RAS_TABLE_VER_V2_1) 469 465 csum += __calc_ras_info_byte_sum(control); 470 466 csum = -csum; 471 467 hdr->checksum = csum; ··· 761 757 "Saved bad pages %d reaches threshold value %d\n", 762 758 control->ras_num_bad_pages, ras->bad_page_cnt_threshold); 763 759 control->tbl_hdr.header = RAS_TABLE_HDR_BAD; 764 - if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) { 760 + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) { 765 761 control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD; 766 762 control->tbl_rai.health_percent = 0; 767 763 } ··· 774 770 amdgpu_dpm_send_rma_reason(adev); 775 771 } 776 772 777 - if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) 773 + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) 778 774 control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE + 779 775 RAS_TABLE_V2_1_INFO_SIZE + 780 776 control->ras_num_recs * RAS_TABLE_RECORD_SIZE; ··· 814 810 * now calculate gpu health percent 815 811 */ 816 812 if (amdgpu_bad_page_threshold != 0 && 817 - control->tbl_hdr.version == RAS_TABLE_VER_V2_1 && 813 + control->tbl_hdr.version >= RAS_TABLE_VER_V2_1 && 818 814 control->ras_num_bad_pages <= ras->bad_page_cnt_threshold) 819 815 control->tbl_rai.health_percent = ((ras->bad_page_cnt_threshold - 820 816 control->ras_num_bad_pages) * 100) / ··· 827 823 csum += *pp; 828 824 829 825 csum += __calc_hdr_byte_sum(control); 830 - if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) 826 + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) 831 827 csum += __calc_ras_info_byte_sum(control); 832 828 /* avoid sign extension when assigning to "checksum" */ 833 829 csum = -csum; ··· 1044 1040 /* get available eeprom table version first before eeprom table init */ 1045 1041 amdgpu_ras_set_eeprom_table_version(control); 1046 1042 1047 - if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) 1043 + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) 1048 1044 return RAS_MAX_RECORD_COUNT_V2_1; 1049 1045 else 1050 1046 return RAS_MAX_RECORD_COUNT; ··· 1289 1285 int buf_size, res; 1290 1286 u8 csum, *buf, *pp; 1291 1287 1292 - if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) 1288 + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) 1293 1289 buf_size = RAS_TABLE_HEADER_SIZE + 1294 1290 RAS_TABLE_V2_1_INFO_SIZE + 1295 1291 control->ras_num_recs * RAS_TABLE_RECORD_SIZE; ··· 1392 1388 1393 1389 __decode_table_header_from_buf(hdr, buf); 1394 1390 1395 - if (hdr->version == RAS_TABLE_VER_V2_1) { 1391 + if (hdr->version >= RAS_TABLE_VER_V2_1) { 1396 1392 control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr); 1397 1393 control->ras_record_offset = RAS_RECORD_START_V2_1; 1398 1394 control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1; ··· 1432 1428 DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", 1433 1429 control->ras_num_bad_pages); 1434 1430 1435 - if (hdr->version == RAS_TABLE_VER_V2_1) { 1431 + if (hdr->version >= RAS_TABLE_VER_V2_1) { 1436 1432 res = __read_table_ras_info(control); 1437 1433 if (res) 1438 1434 return res; ··· 1452 1448 ras->bad_page_cnt_threshold); 1453 1449 } else if (hdr->header == RAS_TABLE_HDR_BAD && 1454 1450 amdgpu_bad_page_threshold != 0) { 1455 - if (hdr->version == RAS_TABLE_VER_V2_1) { 1451 + if (hdr->version >= RAS_TABLE_VER_V2_1) { 1456 1452 res = __read_table_ras_info(control); 1457 1453 if (res) 1458 1454 return res;

+1

drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h

··· 28 28 29 29 #define RAS_TABLE_VER_V1 0x00010000 30 30 #define RAS_TABLE_VER_V2_1 0x00021000 31 + #define RAS_TABLE_VER_V3 0x00030000 31 32 32 33 struct amdgpu_device; 33 34

+1 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

··· 37 37 struct amdgpu_vm; 38 38 39 39 /* max number of rings */ 40 - #define AMDGPU_MAX_RINGS 133 40 + #define AMDGPU_MAX_RINGS 149 41 41 #define AMDGPU_MAX_HWIP_RINGS 64 42 42 #define AMDGPU_MAX_GFX_RINGS 2 43 43 #define AMDGPU_MAX_SW_GFX_RINGS 2

+38 -14

drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c

··· 504 504 } 505 505 } 506 506 507 + struct amdgpu_ring *amdgpu_sdma_get_shared_ring(struct amdgpu_device *adev, struct amdgpu_ring *ring) 508 + { 509 + if (adev->sdma.has_page_queue && 510 + (ring->me < adev->sdma.num_instances) && 511 + (ring == &adev->sdma.instance[ring->me].ring)) 512 + return &adev->sdma.instance[ring->me].page; 513 + else 514 + return NULL; 515 + } 516 + 517 + /** 518 + * amdgpu_sdma_is_shared_inv_eng - Check if a ring is an SDMA ring that shares a VM invalidation engine 519 + * @adev: Pointer to the AMDGPU device structure 520 + * @ring: Pointer to the ring structure to check 521 + * 522 + * This function checks if the given ring is an SDMA ring that shares a VM invalidation engine. 523 + * It returns true if the ring is such an SDMA ring, false otherwise. 524 + */ 525 + bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_ring *ring) 526 + { 527 + int i = ring->me; 528 + 529 + if (!adev->sdma.has_page_queue || i >= adev->sdma.num_instances) 530 + return false; 531 + 532 + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || 533 + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || 534 + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) 535 + return (ring == &adev->sdma.instance[i].page); 536 + else 537 + return false; 538 + } 539 + 507 540 /** 508 541 * amdgpu_sdma_register_on_reset_callbacks - Register SDMA reset callbacks 509 542 * @funcs: Pointer to the callback structure containing pre_reset and post_reset functions ··· 565 532 * amdgpu_sdma_reset_engine - Reset a specific SDMA engine 566 533 * @adev: Pointer to the AMDGPU device 567 534 * @instance_id: ID of the SDMA engine instance to reset 568 - * @suspend_user_queues: check if suspend user queue. 569 535 * 570 536 * This function performs the following steps: 571 537 * 1. Calls all registered pre_reset callbacks to allow KFD and AMDGPU to save their state. ··· 573 541 * 574 542 * Returns: 0 on success, or a negative error code on failure. 575 543 */ 576 - int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, bool suspend_user_queues) 544 + int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) 577 545 { 578 546 struct sdma_on_reset_funcs *funcs; 579 547 int ret = 0; 580 - struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];; 548 + struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id]; 581 549 struct amdgpu_ring *gfx_ring = &sdma_instance->ring; 582 550 struct amdgpu_ring *page_ring = &sdma_instance->page; 583 551 bool gfx_sched_stopped = false, page_sched_stopped = false; 584 552 585 - /* Suspend KFD if suspend_user_queues is true. 586 - * prevent the destruction of in-flight healthy user queue packets and 587 - * avoid race conditions between KFD and KGD during the reset process. 588 - */ 589 - if (suspend_user_queues) 590 - amdgpu_amdkfd_suspend(adev, false); 591 - 553 + mutex_lock(&sdma_instance->engine_reset_mutex); 592 554 /* Stop the scheduler's work queue for the GFX and page rings if they are running. 593 555 * This ensures that no new tasks are submitted to the queues while 594 556 * the reset is in progress. ··· 635 609 * if they were stopped by this function. This allows new tasks 636 610 * to be submitted to the queues after the reset is complete. 637 611 */ 638 - if (ret) { 612 + if (!ret) { 639 613 if (gfx_sched_stopped && amdgpu_ring_sched_ready(gfx_ring)) { 640 614 drm_sched_wqueue_start(&gfx_ring->sched); 641 615 } ··· 643 617 drm_sched_wqueue_start(&page_ring->sched); 644 618 } 645 619 } 646 - 647 - if (suspend_user_queues) 648 - amdgpu_amdkfd_resume(adev, false); 620 + mutex_unlock(&sdma_instance->engine_reset_mutex); 649 621 650 622 return ret; 651 623 }

+9 -4

drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h

··· 64 64 struct amdgpu_bo *sdma_fw_obj; 65 65 uint64_t sdma_fw_gpu_addr; 66 66 uint32_t *sdma_fw_ptr; 67 + struct mutex engine_reset_mutex; 68 + /* track guilty state of GFX and PAGE queues */ 69 + bool gfx_guilty; 70 + bool page_guilty; 71 + 67 72 }; 68 73 69 74 enum amdgpu_sdma_ras_memory_id { ··· 131 126 uint32_t *ip_dump; 132 127 uint32_t supported_reset; 133 128 struct list_head reset_callback_list; 134 - /* track guilty state of GFX and PAGE queues */ 135 - bool gfx_guilty; 136 - bool page_guilty; 137 129 }; 138 130 139 131 /* ··· 171 169 }; 172 170 173 171 void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct sdma_on_reset_funcs *funcs); 174 - int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, bool suspend_user_queues); 172 + int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id); 175 173 176 174 #define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t)) 177 175 #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) ··· 196 194 void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev); 197 195 int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev); 198 196 void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev); 197 + bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_ring *ring); 198 + struct amdgpu_ring *amdgpu_sdma_get_shared_ring(struct amdgpu_device *adev, 199 + struct amdgpu_ring *ring); 199 200 #endif

+35 -9

drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c

··· 135 135 struct amdgpu_sync_entry *e; 136 136 137 137 hash_for_each_possible(sync->fences, e, node, f->context) { 138 - if (unlikely(e->fence->context != f->context)) 139 - continue; 138 + if (dma_fence_is_signaled(e->fence)) { 139 + dma_fence_put(e->fence); 140 + e->fence = dma_fence_get(f); 141 + return true; 142 + } 140 143 141 - amdgpu_sync_keep_later(&e->fence, f); 142 - return true; 144 + if (likely(e->fence->context == f->context)) { 145 + amdgpu_sync_keep_later(&e->fence, f); 146 + return true; 147 + } 143 148 } 144 149 return false; 145 150 } ··· 154 149 * 155 150 * @sync: sync object to add fence to 156 151 * @f: fence to sync to 152 + * @flags: memory allocation flags to use when allocating sync entry 157 153 * 158 154 * Add the fence to the sync object. 159 155 */ 160 - int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) 156 + int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, 157 + gfp_t flags) 161 158 { 162 159 struct amdgpu_sync_entry *e; 163 160 ··· 169 162 if (amdgpu_sync_add_later(sync, f)) 170 163 return 0; 171 164 172 - e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL); 165 + e = kmem_cache_alloc(amdgpu_sync_slab, flags); 173 166 if (!e) 174 167 return -ENOMEM; 175 168 ··· 256 249 struct dma_fence *tmp = dma_fence_chain_contained(f); 257 250 258 251 if (amdgpu_sync_test_fence(adev, mode, owner, tmp)) { 259 - r = amdgpu_sync_fence(sync, f); 252 + r = amdgpu_sync_fence(sync, f, GFP_KERNEL); 260 253 dma_fence_put(f); 261 254 if (r) 262 255 return r; ··· 288 281 if (fence_owner != AMDGPU_FENCE_OWNER_KFD) 289 282 continue; 290 283 291 - r = amdgpu_sync_fence(sync, f); 284 + r = amdgpu_sync_fence(sync, f, GFP_KERNEL); 292 285 if (r) 293 286 break; 294 287 } ··· 395 388 hash_for_each_safe(source->fences, i, tmp, e, node) { 396 389 f = e->fence; 397 390 if (!dma_fence_is_signaled(f)) { 398 - r = amdgpu_sync_fence(clone, f); 391 + r = amdgpu_sync_fence(clone, f, GFP_KERNEL); 399 392 if (r) 400 393 return r; 401 394 } else { ··· 404 397 } 405 398 406 399 return 0; 400 + } 401 + 402 + /** 403 + * amdgpu_sync_move - move all fences from src to dst 404 + * 405 + * @src: source of the fences, empty after function 406 + * @dst: destination for the fences 407 + * 408 + * Moves all fences from source to destination. All fences in destination are 409 + * freed and source is empty after the function call. 410 + */ 411 + void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst) 412 + { 413 + unsigned int i; 414 + 415 + amdgpu_sync_free(dst); 416 + 417 + for (i = 0; i < HASH_SIZE(src->fences); ++i) 418 + hlist_move_list(&src->fences[i], &dst->fences[i]); 407 419 } 408 420 409 421 /**

+3 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h

··· 47 47 }; 48 48 49 49 void amdgpu_sync_create(struct amdgpu_sync *sync); 50 - int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f); 50 + int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, 51 + gfp_t flags); 51 52 int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, 52 53 struct dma_resv *resv, enum amdgpu_sync_mode mode, 53 54 void *owner); ··· 57 56 struct amdgpu_ring *ring); 58 57 struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); 59 58 int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone); 59 + void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst); 60 60 int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job); 61 61 int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); 62 62 void amdgpu_sync_free(struct amdgpu_sync *sync);

+32

drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h

··· 457 457 TP_ARGS(pasid) 458 458 ); 459 459 460 + TRACE_EVENT(amdgpu_isolation, 461 + TP_PROTO(void *prev, void *next), 462 + TP_ARGS(prev, next), 463 + TP_STRUCT__entry( 464 + __field(void *, prev) 465 + __field(void *, next) 466 + ), 467 + 468 + TP_fast_assign( 469 + __entry->prev = prev; 470 + __entry->next = next; 471 + ), 472 + TP_printk("prev=%p, next=%p", 473 + __entry->prev, 474 + __entry->next) 475 + ); 476 + 477 + TRACE_EVENT(amdgpu_cleaner_shader, 478 + TP_PROTO(struct amdgpu_ring *ring, struct dma_fence *fence), 479 + TP_ARGS(ring, fence), 480 + TP_STRUCT__entry( 481 + __string(ring, ring->name) 482 + __field(u64, seqno) 483 + ), 484 + 485 + TP_fast_assign( 486 + __assign_str(ring); 487 + __entry->seqno = fence->seqno; 488 + ), 489 + TP_printk("ring=%s, seqno=%Lu", __get_str(ring), __entry->seqno) 490 + ); 491 + 460 492 TRACE_EVENT(amdgpu_bo_list_set, 461 493 TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo), 462 494 TP_ARGS(list, bo),

+24 -6

drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c

··· 438 438 439 439 if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) { 440 440 vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE); 441 - r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, 442 - false); 443 - if (r) 444 - dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); 441 + mutex_lock(&adev->vcn.workload_profile_mutex); 442 + if (adev->vcn.workload_profile_active) { 443 + r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, 444 + false); 445 + if (r) 446 + dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); 447 + adev->vcn.workload_profile_active = false; 448 + } 449 + mutex_unlock(&adev->vcn.workload_profile_mutex); 445 450 } else { 446 451 schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT); 447 452 } ··· 460 455 461 456 atomic_inc(&vcn_inst->total_submission_cnt); 462 457 463 - if (!cancel_delayed_work_sync(&vcn_inst->idle_work)) { 458 + cancel_delayed_work_sync(&vcn_inst->idle_work); 459 + 460 + /* We can safely return early here because we've cancelled the 461 + * the delayed work so there is no one else to set it to false 462 + * and we don't care if someone else sets it to true. 463 + */ 464 + if (adev->vcn.workload_profile_active) 465 + goto pg_lock; 466 + 467 + mutex_lock(&adev->vcn.workload_profile_mutex); 468 + if (!adev->vcn.workload_profile_active) { 464 469 r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, 465 - true); 470 + true); 466 471 if (r) 467 472 dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); 473 + adev->vcn.workload_profile_active = true; 468 474 } 475 + mutex_unlock(&adev->vcn.workload_profile_mutex); 469 476 477 + pg_lock: 470 478 mutex_lock(&vcn_inst->vcn_pg_lock); 471 479 vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE); 472 480

+3

drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h

··· 358 358 359 359 bool per_inst_fw; 360 360 unsigned fw_version; 361 + 362 + bool workload_profile_active; 363 + struct mutex workload_profile_mutex; 361 364 }; 362 365 363 366 struct amdgpu_fw_shared_rb_ptrs_struct {

+22 -6

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

··· 754 754 bool need_pipe_sync) 755 755 { 756 756 struct amdgpu_device *adev = ring->adev; 757 + struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id]; 757 758 unsigned vmhub = ring->vm_hub; 758 759 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 759 760 struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; ··· 762 761 bool gds_switch_needed = ring->funcs->emit_gds_switch && 763 762 job->gds_switch_needed; 764 763 bool vm_flush_needed = job->vm_needs_flush; 765 - struct dma_fence *fence = NULL; 764 + bool cleaner_shader_needed = false; 766 765 bool pasid_mapping_needed = false; 766 + struct dma_fence *fence = NULL; 767 767 unsigned int patch; 768 768 int r; 769 769 ··· 787 785 pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && 788 786 ring->funcs->emit_wreg; 789 787 788 + cleaner_shader_needed = adev->gfx.enable_cleaner_shader && 789 + ring->funcs->emit_cleaner_shader && job->base.s_fence && 790 + &job->base.s_fence->scheduled == isolation->spearhead; 791 + 790 792 if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync && 791 - !(job->enforce_isolation && !job->vmid)) 793 + !cleaner_shader_needed) 792 794 return 0; 793 795 794 796 amdgpu_ring_ib_begin(ring); ··· 803 797 if (need_pipe_sync) 804 798 amdgpu_ring_emit_pipeline_sync(ring); 805 799 806 - if (adev->gfx.enable_cleaner_shader && 807 - ring->funcs->emit_cleaner_shader && 808 - job->enforce_isolation) 800 + if (cleaner_shader_needed) 809 801 ring->funcs->emit_cleaner_shader(ring); 810 802 811 803 if (vm_flush_needed) { ··· 825 821 job->oa_size); 826 822 } 827 823 828 - if (vm_flush_needed || pasid_mapping_needed) { 824 + if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) { 829 825 r = amdgpu_fence_emit(ring, &fence, NULL, 0); 830 826 if (r) 831 827 return r; ··· 846 842 dma_fence_put(id->pasid_mapping); 847 843 id->pasid_mapping = dma_fence_get(fence); 848 844 mutex_unlock(&id_mgr->lock); 845 + } 846 + 847 + /* 848 + * Make sure that all other submissions wait for the cleaner shader to 849 + * finish before we push them to the HW. 850 + */ 851 + if (cleaner_shader_needed) { 852 + trace_amdgpu_cleaner_shader(ring, fence); 853 + mutex_lock(&adev->enforce_isolation_mutex); 854 + dma_fence_put(isolation->spearhead); 855 + isolation->spearhead = dma_fence_get(fence); 856 + mutex_unlock(&adev->enforce_isolation_mutex); 849 857 } 850 858 dma_fence_put(fence); 851 859

+14

drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

··· 1626 1626 } 1627 1627 } 1628 1628 break; 1629 + case IP_VERSION(11, 5, 0): 1630 + case IP_VERSION(11, 5, 1): 1631 + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1632 + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1633 + if (adev->gfx.mec_fw_version >= 26 && 1634 + adev->mes.fw_version[0] >= 114) { 1635 + adev->gfx.enable_cleaner_shader = true; 1636 + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1637 + if (r) { 1638 + adev->gfx.enable_cleaner_shader = false; 1639 + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1640 + } 1641 + } 1642 + break; 1629 1643 default: 1630 1644 adev->gfx.enable_cleaner_shader = false; 1631 1645 break;

-11

drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c

··· 2637 2637 u32 tmp; 2638 2638 u32 rb_bufsz; 2639 2639 u64 rb_addr, rptr_addr, wptr_gpu_addr; 2640 - u32 i; 2641 2640 2642 2641 /* Set the write pointer delay */ 2643 2642 WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0); ··· 2691 2692 2692 2693 /* start the ring */ 2693 2694 gfx_v12_0_cp_gfx_start(adev); 2694 - 2695 - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2696 - ring = &adev->gfx.gfx_ring[i]; 2697 - ring->sched.ready = true; 2698 - } 2699 - 2700 2695 return 0; 2701 2696 } 2702 2697 ··· 3030 3037 if (r) 3031 3038 goto done; 3032 3039 3033 - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 3034 - ring = &adev->gfx.gfx_ring[i]; 3035 - ring->sched.ready = true; 3036 - } 3037 3040 done: 3038 3041 return r; 3039 3042 }

+2 -3

drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c

··· 867 867 868 868 switch (type) { 869 869 case ACA_SMU_TYPE_UE: 870 - bank->aca_err_type = ACA_ERROR_TYPE_UE; 871 - ret = aca_error_cache_log_bank_error(handle, &info, 872 - ACA_ERROR_TYPE_UE, 1ULL); 870 + bank->aca_err_type = ACA_BANK_ERR_UE_DE_DECODE(bank); 871 + ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, 1ULL); 873 872 break; 874 873 case ACA_SMU_TYPE_CE: 875 874 bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank);

+91 -28

drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c

··· 31 31 #include "amdgpu_ucode.h" 32 32 #include "amdgpu_trace.h" 33 33 #include "amdgpu_reset.h" 34 + #include "gc/gc_9_0_sh_mask.h" 34 35 35 36 #include "sdma/sdma_4_4_2_offset.h" 36 37 #include "sdma/sdma_4_4_2_sh_mask.h" ··· 673 672 * @adev: amdgpu_device pointer 674 673 * @i: instance to resume 675 674 * @restore: used to restore wptr when restart 676 - * @guilty: boolean indicating whether this queue is the guilty one (caused the timeout/error) 677 675 * 678 676 * Set up the gfx DMA ring buffers and enable them. 679 677 * Returns 0 for success, error for failure. 680 678 */ 681 - static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, bool restore, bool guilty) 679 + static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, bool restore) 682 680 { 683 681 struct amdgpu_ring *ring = &adev->sdma.instance[i].ring; 684 682 u32 rb_cntl, ib_cntl, wptr_poll_cntl; ··· 714 714 /* For the guilty queue, set RPTR to the current wptr to skip bad commands, 715 715 * It is not a guilty queue, restore cache_rptr and continue execution. 716 716 */ 717 - if (guilty) 717 + if (adev->sdma.instance[i].gfx_guilty) 718 718 rwptr = ring->wptr; 719 719 else 720 720 rwptr = ring->cached_rptr; ··· 779 779 * @adev: amdgpu_device pointer 780 780 * @i: instance to resume 781 781 * @restore: boolean to say restore needed or not 782 - * @guilty: boolean indicating whether this queue is the guilty one (caused the timeout/error) 783 782 * 784 783 * Set up the page DMA ring buffers and enable them. 785 784 * Returns 0 for success, error for failure. 786 785 */ 787 - static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i, bool restore, bool guilty) 786 + static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i, bool restore) 788 787 { 789 788 struct amdgpu_ring *ring = &adev->sdma.instance[i].page; 790 789 u32 rb_cntl, ib_cntl, wptr_poll_cntl; ··· 802 803 /* For the guilty queue, set RPTR to the current wptr to skip bad commands, 803 804 * It is not a guilty queue, restore cache_rptr and continue execution. 804 805 */ 805 - if (guilty) 806 + if (adev->sdma.instance[i].page_guilty) 806 807 rwptr = ring->wptr; 807 808 else 808 809 rwptr = ring->cached_rptr; ··· 988 989 uint32_t temp; 989 990 990 991 WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); 991 - sdma_v4_4_2_gfx_resume(adev, i, restore, adev->sdma.gfx_guilty); 992 + sdma_v4_4_2_gfx_resume(adev, i, restore); 992 993 if (adev->sdma.has_page_queue) 993 - sdma_v4_4_2_page_resume(adev, i, restore, adev->sdma.page_guilty); 994 + sdma_v4_4_2_page_resume(adev, i, restore); 994 995 995 996 /* set utc l1 enable flag always to 1 */ 996 997 temp = RREG32_SDMA(i, regSDMA_CNTL); ··· 1291 1292 seq, 0xffffffff, 4); 1292 1293 } 1293 1294 1295 + /* 1296 + * sdma_v4_4_2_get_invalidate_req - Construct the VM_INVALIDATE_ENG0_REQ register value 1297 + * @vmid: The VMID to invalidate 1298 + * @flush_type: The type of flush (0 = legacy, 1 = lightweight, 2 = heavyweight) 1299 + * 1300 + * This function constructs the VM_INVALIDATE_ENG0_REQ register value for the specified VMID 1301 + * and flush type. It ensures that all relevant page table cache levels (L1 PTEs, L2 PTEs, and 1302 + * L2 PDEs) are invalidated. 1303 + */ 1304 + static uint32_t sdma_v4_4_2_get_invalidate_req(unsigned int vmid, 1305 + uint32_t flush_type) 1306 + { 1307 + u32 req = 0; 1294 1308 1295 - /** 1296 - * sdma_v4_4_2_ring_emit_vm_flush - vm flush using sDMA 1309 + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, 1310 + PER_VMID_INVALIDATE_REQ, 1 << vmid); 1311 + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); 1312 + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); 1313 + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); 1314 + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); 1315 + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); 1316 + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); 1317 + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, 1318 + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); 1319 + 1320 + return req; 1321 + } 1322 + 1323 + /* 1324 + * sdma_v4_4_2_ring_emit_vm_flush - Emit VM flush commands for SDMA 1325 + * @ring: The SDMA ring 1326 + * @vmid: The VMID to flush 1327 + * @pd_addr: The page directory address 1297 1328 * 1298 - * @ring: amdgpu_ring pointer 1299 - * @vmid: vmid number to use 1300 - * @pd_addr: address 1301 - * 1302 - * Update the page table base and flush the VM TLB 1303 - * using sDMA. 1329 + * This function emits the necessary register writes and waits to perform a VM flush for the 1330 + * specified VMID. It updates the PTB address registers and issues a VM invalidation request 1331 + * using the specified VM invalidation engine. 1304 1332 */ 1305 1333 static void sdma_v4_4_2_ring_emit_vm_flush(struct amdgpu_ring *ring, 1306 - unsigned vmid, uint64_t pd_addr) 1334 + unsigned int vmid, uint64_t pd_addr) 1307 1335 { 1308 - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 1336 + struct amdgpu_device *adev = ring->adev; 1337 + uint32_t req = sdma_v4_4_2_get_invalidate_req(vmid, 0); 1338 + unsigned int eng = ring->vm_inv_eng; 1339 + struct amdgpu_vmhub *hub = &adev->vmhub[ring->vm_hub]; 1340 + 1341 + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + 1342 + (hub->ctx_addr_distance * vmid), 1343 + lower_32_bits(pd_addr)); 1344 + 1345 + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + 1346 + (hub->ctx_addr_distance * vmid), 1347 + upper_32_bits(pd_addr)); 1348 + /* 1349 + * Construct and emit the VM invalidation packet 1350 + */ 1351 + amdgpu_ring_write(ring, 1352 + SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_VM_INVALIDATE) | 1353 + SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATE) | 1354 + SDMA_PKT_VM_INVALIDATION_HEADER_XCC0_ENG_ID(0x1f) | 1355 + SDMA_PKT_VM_INVALIDATION_HEADER_XCC1_ENG_ID(0x1f) | 1356 + SDMA_PKT_VM_INVALIDATION_HEADER_MMHUB_ENG_ID(eng)); 1357 + amdgpu_ring_write(ring, SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(req)); 1358 + amdgpu_ring_write(ring, 0); 1359 + amdgpu_ring_write(ring, SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(BIT(vmid))); 1309 1360 } 1310 1361 1311 1362 static void sdma_v4_4_2_ring_emit_wreg(struct amdgpu_ring *ring, ··· 1494 1445 } 1495 1446 1496 1447 for (i = 0; i < adev->sdma.num_instances; i++) { 1448 + mutex_init(&adev->sdma.instance[i].engine_reset_mutex); 1449 + /* Initialize guilty flags for GFX and PAGE queues */ 1450 + adev->sdma.instance[i].gfx_guilty = false; 1451 + adev->sdma.instance[i].page_guilty = false; 1452 + 1497 1453 ring = &adev->sdma.instance[i].ring; 1498 1454 ring->ring_obj = NULL; 1499 1455 ring->use_doorbell = true; ··· 1560 1506 r = amdgpu_sdma_sysfs_reset_mask_init(adev); 1561 1507 if (r) 1562 1508 return r; 1563 - /* Initialize guilty flags for GFX and PAGE queues */ 1564 - adev->sdma.gfx_guilty = false; 1565 - adev->sdma.page_guilty = false; 1566 1509 1567 1510 return r; 1568 1511 } ··· 1717 1666 { 1718 1667 struct amdgpu_device *adev = ring->adev; 1719 1668 u32 id = GET_INST(SDMA0, ring->me); 1720 - return amdgpu_sdma_reset_engine(adev, id, true); 1669 + int r; 1670 + 1671 + if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) 1672 + return -EOPNOTSUPP; 1673 + 1674 + amdgpu_amdkfd_suspend(adev, false); 1675 + r = amdgpu_sdma_reset_engine(adev, id); 1676 + amdgpu_amdkfd_resume(adev, false); 1677 + 1678 + return r; 1721 1679 } 1722 1680 1723 1681 static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t instance_id) ··· 1739 1679 return -EINVAL; 1740 1680 1741 1681 /* Check if this queue is the guilty one */ 1742 - adev->sdma.gfx_guilty = sdma_v4_4_2_is_queue_selected(adev, instance_id, false); 1682 + adev->sdma.instance[instance_id].gfx_guilty = 1683 + sdma_v4_4_2_is_queue_selected(adev, instance_id, false); 1743 1684 if (adev->sdma.has_page_queue) 1744 - adev->sdma.page_guilty = sdma_v4_4_2_is_queue_selected(adev, instance_id, true); 1685 + adev->sdma.instance[instance_id].page_guilty = 1686 + sdma_v4_4_2_is_queue_selected(adev, instance_id, true); 1745 1687 1746 1688 /* Cache the rptr before reset, after the reset, 1747 1689 * all of the registers will be reset to 0 ··· 2177 2115 3 + /* hdp invalidate */ 2178 2116 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */ 2179 2117 /* sdma_v4_4_2_ring_emit_vm_flush */ 2180 - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 2181 - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + 2118 + 4 + 2 * 3 + 2182 2119 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */ 2183 2120 .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */ 2184 2121 .emit_ib = sdma_v4_4_2_ring_emit_ib, ··· 2209 2148 3 + /* hdp invalidate */ 2210 2149 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */ 2211 2150 /* sdma_v4_4_2_ring_emit_vm_flush */ 2212 - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 2213 - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + 2151 + 4 + 2 * 3 + 2214 2152 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */ 2215 2153 .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */ 2216 2154 .emit_ib = sdma_v4_4_2_ring_emit_ib, ··· 2407 2347 */ 2408 2348 static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev) 2409 2349 { 2350 + /* per queue reset not supported for SRIOV */ 2351 + if (amdgpu_sriov_vf(adev)) 2352 + return; 2410 2353 2411 2354 /* 2412 2355 * the user queue relies on MEC fw and pmfw when the sdma queue do reset.

+1

drivers/gpu/drm/amd/amdgpu/ta_ras_if.h

··· 151 151 uint16_t xcc_mask; 152 152 uint8_t channel_dis_num; 153 153 uint8_t nps_mode; 154 + uint32_t active_umc_mask; 154 155 }; 155 156 156 157 struct ta_ras_mca_addr {

+23 -5

drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c

··· 147 147 if (!fences && !atomic_read(&adev->vcn.inst[0].total_submission_cnt)) { 148 148 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, 149 149 AMD_PG_STATE_GATE); 150 - r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, 151 - false); 152 - if (r) 153 - dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); 150 + mutex_lock(&adev->vcn.workload_profile_mutex); 151 + if (adev->vcn.workload_profile_active) { 152 + r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, 153 + false); 154 + if (r) 155 + dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); 156 + adev->vcn.workload_profile_active = false; 157 + } 158 + mutex_unlock(&adev->vcn.workload_profile_mutex); 154 159 } else { 155 160 schedule_delayed_work(&adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT); 156 161 } ··· 169 164 170 165 atomic_inc(&adev->vcn.inst[0].total_submission_cnt); 171 166 172 - if (!cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work)) { 167 + cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work); 168 + 169 + /* We can safely return early here because we've cancelled the 170 + * the delayed work so there is no one else to set it to false 171 + * and we don't care if someone else sets it to true. 172 + */ 173 + if (adev->vcn.workload_profile_active) 174 + goto pg_lock; 175 + 176 + mutex_lock(&adev->vcn.workload_profile_mutex); 177 + if (!adev->vcn.workload_profile_active) { 173 178 r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, 174 179 true); 175 180 if (r) 176 181 dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); 182 + adev->vcn.workload_profile_active = true; 177 183 } 184 + mutex_unlock(&adev->vcn.workload_profile_mutex); 178 185 186 + pg_lock: 179 187 mutex_lock(&adev->vcn.inst[0].vcn_pg_lock); 180 188 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, 181 189 AMD_PG_STATE_UNGATE);

+70

drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h

··· 64 64 #define HEADER_BARRIER 5 65 65 #define SDMA_OP_AQL_COPY 0 66 66 #define SDMA_OP_AQL_BARRIER_OR 0 67 + /* vm invalidation is only available for GC9.4.3/GC9.4.4/GC9.5.0 */ 68 + #define SDMA_OP_VM_INVALIDATE 8 69 + #define SDMA_SUBOP_VM_INVALIDATE 4 67 70 68 71 /*define for op field*/ 69 72 #define SDMA_PKT_HEADER_op_offset 0 ··· 3334 3331 #define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0 3335 3332 #define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift) 3336 3333 3334 + /* 3335 + ** Definitions for SDMA_PKT_VM_INVALIDATION packet 3336 + */ 3337 + 3338 + /*define for HEADER word*/ 3339 + /*define for op field*/ 3340 + #define SDMA_PKT_VM_INVALIDATION_HEADER_op_offset 0 3341 + #define SDMA_PKT_VM_INVALIDATION_HEADER_op_mask 0x000000FF 3342 + #define SDMA_PKT_VM_INVALIDATION_HEADER_op_shift 0 3343 + #define SDMA_PKT_VM_INVALIDATION_HEADER_OP(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_op_shift) 3344 + 3345 + /*define for sub_op field*/ 3346 + #define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_offset 0 3347 + #define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask 0x000000FF 3348 + #define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift 8 3349 + #define SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift) 3350 + 3351 + /*define for xcc0_eng_id field*/ 3352 + #define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_offset 0 3353 + #define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_mask 0x0000001F 3354 + #define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_shift 16 3355 + #define SDMA_PKT_VM_INVALIDATION_HEADER_XCC0_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_shift) 3356 + 3357 + /*define for xcc1_eng_id field*/ 3358 + #define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_offset 0 3359 + #define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_mask 0x0000001F 3360 + #define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_shift 21 3361 + #define SDMA_PKT_VM_INVALIDATION_HEADER_XCC1_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_shift) 3362 + 3363 + /*define for mmhub_eng_id field*/ 3364 + #define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_offset 0 3365 + #define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_mask 0x0000001F 3366 + #define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_shift 26 3367 + #define SDMA_PKT_VM_INVALIDATION_HEADER_MMHUB_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_shift) 3368 + 3369 + /*define for INVALIDATEREQ word*/ 3370 + /*define for invalidatereq field*/ 3371 + #define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_offset 1 3372 + #define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask 0xFFFFFFFF 3373 + #define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift 0 3374 + #define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(x) ((x & SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask) << SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift) 3375 + 3376 + /*define for ADDRESSRANGELO word*/ 3377 + /*define for addressrangelo field*/ 3378 + #define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_offset 2 3379 + #define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask 0xFFFFFFFF 3380 + #define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift 0 3381 + #define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_ADDRESSRANGELO(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift) 3382 + 3383 + /*define for ADDRESSRANGEHI word*/ 3384 + /*define for invalidateack field*/ 3385 + #define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_offset 3 3386 + #define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask 0x0000FFFF 3387 + #define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift 0 3388 + #define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift) 3389 + 3390 + /*define for addressrangehi field*/ 3391 + #define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_offset 3 3392 + #define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask 0x0000001F 3393 + #define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift 16 3394 + #define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift) 3395 + 3396 + /*define for reserved field*/ 3397 + #define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_offset 3 3398 + #define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask 0x000001FF 3399 + #define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift 23 3400 + #define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_RESERVED(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift) 3337 3401 3338 3402 #endif /* __SDMA_PKT_OPEN_H_ */

+1 -1

drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

··· 2310 2310 continue; 2311 2311 2312 2312 /* Reset engine and check. */ 2313 - if (amdgpu_sdma_reset_engine(dqm->dev->adev, i, false) || 2313 + if (amdgpu_sdma_reset_engine(dqm->dev->adev, i) || 2314 2314 dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j) || 2315 2315 !set_sdma_queue_as_reset(dqm, doorbell_off)) { 2316 2316 r = -ENOTRECOVERABLE;

+11 -7

drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c

··· 418 418 !pm->pmf->config_dequeue_wait_counts_size) 419 419 return 0; 420 420 421 + if (cmd == KFD_DEQUEUE_WAIT_INIT && (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) || 422 + KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0))) 423 + return 0; 424 + 421 425 size = pm->pmf->config_dequeue_wait_counts_size; 422 426 423 427 mutex_lock(&pm->lock); ··· 440 436 441 437 retval = pm->pmf->config_dequeue_wait_counts(pm, buffer, 442 438 cmd, value); 443 - if (!retval) 439 + if (!retval) { 444 440 retval = kq_submit_packet(pm->priv_queue); 445 - else 441 + 442 + /* If default value is modified, cache that in dqm->wait_times */ 443 + if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT) 444 + update_dqm_wait_times(pm->dqm); 445 + } else { 446 446 kq_rollback_packet(pm->priv_queue); 447 + } 447 448 } 448 - 449 - /* If default value is modified, cache that value in dqm->wait_times */ 450 - if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT) 451 - update_dqm_wait_times(pm->dqm); 452 - 453 449 out: 454 450 mutex_unlock(&pm->lock); 455 451 return retval;

+20 -12

drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c

··· 310 310 reg_data); 311 311 } 312 312 313 + /* pm_config_dequeue_wait_counts_v9: Builds WRITE_DATA packet with 314 + * register/value for configuring dequeue wait counts 315 + * 316 + * @return: -ve for failure and 0 for success and buffer is 317 + * filled in with packet 318 + * 319 + **/ 313 320 static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm, 314 321 uint32_t *buffer, 315 322 enum kfd_config_dequeue_wait_counts_cmd cmd, ··· 328 321 329 322 switch (cmd) { 330 323 case KFD_DEQUEUE_WAIT_INIT: { 331 - uint32_t sch_wave = 0, que_sleep = 0; 332 - /* Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40. 324 + uint32_t sch_wave = 0, que_sleep = 1; 325 + 326 + /* For all gfx9 ASICs > gfx941, 327 + * Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40. 333 328 * On a 1GHz machine this is roughly 1 microsecond, which is 334 329 * about how long it takes to load data out of memory during 335 330 * queue connect 336 331 * QUE_SLEEP: Wait Count for Dequeue Retry. 332 + * 333 + * Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU 337 334 */ 338 - if (KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(9, 4, 1) && 339 - KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(10, 0, 0)) { 340 - que_sleep = 1; 335 + if (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) || 336 + KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0)) 337 + return -EPERM; 341 338 342 - /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ 343 - if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu && 344 - (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3))) 345 - sch_wave = 1; 346 - } else { 347 - return 0; 348 - } 339 + if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu && 340 + (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3))) 341 + sch_wave = 1; 342 + 349 343 pm_build_dequeue_wait_counts_packet_info(pm, sch_wave, que_sleep, 350 344 &reg_offset, &reg_data); 351 345

-4

drivers/gpu/drm/amd/amdkfd/kfd_topology.c

··· 2006 2006 dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | 2007 2007 HSA_DBG_WATCH_ADDR_MASK_HI_BIT; 2008 2008 2009 - if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0)) 2010 - dev->node_props.capability |= 2011 - HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED; 2012 - 2013 2009 if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(12, 0, 0)) 2014 2010 dev->node_props.capability |= 2015 2011 HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED;

+1 -1

drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

··· 1752 1752 } 1753 1753 if (quirk_entries.support_edp0_on_dp1) { 1754 1754 init_data->flags.support_edp0_on_dp1 = true; 1755 - drm_info(dev, "aux_hpd_discon_quirk attached\n"); 1755 + drm_info(dev, "support_edp0_on_dp1 attached\n"); 1756 1756 } 1757 1757 } 1758 1758

+12 -8

drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c

··· 130 130 struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); 131 131 struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; 132 132 struct dc *dc = clk_mgr_base->ctx->dc; 133 - int display_count; 133 + int display_count = 0; 134 134 bool update_dppclk = false; 135 135 bool update_dispclk = false; 136 136 bool dpp_clock_lowered = false; ··· 202 202 update_dppclk = true; 203 203 } 204 204 205 - if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { 206 - /* No need to apply the w/a if we haven't taken over from bios yet */ 207 - if (clk_mgr_base->clks.dispclk_khz) 208 - dcn315_disable_otg_wa(clk_mgr_base, context, true); 205 + if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) && 206 + (new_clocks->dispclk_khz > 0 || (safe_to_lower && display_count == 0))) { 207 + int requested_dispclk_khz = new_clocks->dispclk_khz; 209 208 209 + dcn315_disable_otg_wa(clk_mgr_base, context, true); 210 + 211 + /* Clamp the requested clock to PMFW based on their limit. */ 212 + if (dc->debug.min_disp_clk_khz > 0 && requested_dispclk_khz < dc->debug.min_disp_clk_khz) 213 + requested_dispclk_khz = dc->debug.min_disp_clk_khz; 214 + 215 + dcn315_smu_set_dispclk(clk_mgr, requested_dispclk_khz); 210 216 clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; 211 - dcn315_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); 212 - if (clk_mgr_base->clks.dispclk_khz) 213 - dcn315_disable_otg_wa(clk_mgr_base, context, false); 217 + dcn315_disable_otg_wa(clk_mgr_base, context, false); 214 218 215 219 update_dispclk = true; 216 220 }

+10 -3

drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c

··· 140 140 struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); 141 141 struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; 142 142 struct dc *dc = clk_mgr_base->ctx->dc; 143 - int display_count; 143 + int display_count = 0; 144 144 bool update_dppclk = false; 145 145 bool update_dispclk = false; 146 146 bool dpp_clock_lowered = false; ··· 209 209 update_dppclk = true; 210 210 } 211 211 212 - if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { 212 + if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) && 213 + (new_clocks->dispclk_khz > 0 || (safe_to_lower && display_count == 0))) { 214 + int requested_dispclk_khz = new_clocks->dispclk_khz; 215 + 213 216 dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); 214 217 218 + /* Clamp the requested clock to PMFW based on their limit. */ 219 + if (dc->debug.min_disp_clk_khz > 0 && requested_dispclk_khz < dc->debug.min_disp_clk_khz) 220 + requested_dispclk_khz = dc->debug.min_disp_clk_khz; 221 + 222 + dcn316_smu_set_dispclk(clk_mgr, requested_dispclk_khz); 215 223 clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; 216 - dcn316_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); 217 224 dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); 218 225 219 226 update_dispclk = true;

+6 -9

drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c

··· 204 204 struct link_encoder *new_pipe_link_enc = new_pipe->link_res.dio_link_enc; 205 205 struct link_encoder *pipe_link_enc = pipe->link_res.dio_link_enc; 206 206 bool stream_changed_otg_dig_on = false; 207 + bool has_active_hpo = false; 208 + 207 209 if (pipe->top_pipe || pipe->prev_odm_pipe) 208 210 continue; 209 211 ··· 227 225 new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled && 228 226 new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled(new_pipe->stream_res.stream_enc); 229 227 230 - bool has_active_hpo = false; 231 - 232 228 if (old_pipe->stream && new_pipe->stream && old_pipe->stream == new_pipe->stream) { 233 229 has_active_hpo = dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(old_pipe) && 234 230 dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(new_pipe); 235 231 236 - } 232 + } 237 233 238 - 239 - if (!has_active_hpo && !dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(pipe) && 240 - (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) || 241 - !pipe_link_enc) && !stream_changed_otg_dig_on)) { 242 - 243 - 234 + if (!has_active_hpo && !stream_changed_otg_dig_on && pipe->stream && 235 + (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) || !pipe_link_enc) && 236 + !dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(pipe)) { 244 237 /* This w/a should not trigger when we have a dig active */ 245 238 if (disable) { 246 239 if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc)

+2 -7

drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c

··· 370 370 return link->dc->link_srv->dp_should_enable_fec(link); 371 371 } 372 372 373 - int dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link( 373 + void dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link( 374 374 struct dc_link *link, int peak_bw) 375 375 { 376 - return link->dc->link_srv->dpia_handle_usb4_bandwidth_allocation_for_link(link, peak_bw); 377 - } 378 - 379 - void dc_link_handle_usb4_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t result) 380 - { 381 - link->dc->link_srv->dpia_handle_bw_alloc_response(link, bw, result); 376 + link->dc->link_srv->dpia_handle_usb4_bandwidth_allocation_for_link(link, peak_bw); 382 377 } 383 378 384 379 bool dc_link_check_link_loss_status(

+2 -16

drivers/gpu/drm/amd/display/dc/dc.h

··· 53 53 struct set_config_cmd_payload; 54 54 struct dmub_notification; 55 55 56 - #define DC_VER "3.2.324" 56 + #define DC_VER "3.2.325" 57 57 58 58 /** 59 59 * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC ··· 2354 2354 void dc_link_set_usb4_req_bw_req(struct dc_link *link, int req_bw); 2355 2355 2356 2356 /* 2357 - * Handle function for when the status of the Request above is complete. 2358 - * We will find out the result of allocating on CM and update structs. 2359 - * 2360 - * @link: pointer to the dc_link struct instance 2361 - * @bw: Allocated or Estimated BW depending on the result 2362 - * @result: Response type 2363 - * 2364 - * return: none 2365 - */ 2366 - void dc_link_handle_usb4_bw_alloc_response(struct dc_link *link, 2367 - uint8_t bw, uint8_t result); 2368 - 2369 - /* 2370 2357 * Handle the USB4 BW Allocation related functionality here: 2371 2358 * Plug => Try to allocate max bw from timing parameters supported by the sink 2372 2359 * Unplug => de-allocate bw ··· 2361 2374 * @link: pointer to the dc_link struct instance 2362 2375 * @peak_bw: Peak bw used by the link/sink 2363 2376 * 2364 - * return: allocated bw else return 0 2365 2377 */ 2366 - int dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link( 2378 + void dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link( 2367 2379 struct dc_link *link, int peak_bw); 2368 2380 2369 2381 /*

+85 -100

drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c

··· 70 70 } 71 71 } 72 72 73 - bool dc_dmub_srv_wait_for_pending(struct dc_dmub_srv *dc_dmub_srv) 73 + void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv) 74 74 { 75 - struct dmub_srv *dmub; 76 - struct dc_context *dc_ctx; 75 + struct dmub_srv *dmub = dc_dmub_srv->dmub; 76 + struct dc_context *dc_ctx = dc_dmub_srv->ctx; 77 77 enum dmub_status status; 78 78 79 - if (!dc_dmub_srv || !dc_dmub_srv->dmub) 80 - return false; 81 - 82 - dc_ctx = dc_dmub_srv->ctx; 83 - dmub = dc_dmub_srv->dmub; 84 - 85 79 do { 86 - status = dmub_srv_wait_for_pending(dmub, 100000); 80 + status = dmub_srv_wait_for_idle(dmub, 100000); 87 81 } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK); 88 82 89 83 if (status != DMUB_STATUS_OK) { 90 84 DC_ERROR("Error waiting for DMUB idle: status=%d\n", status); 91 85 dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); 92 86 } 93 - 94 - return status == DMUB_STATUS_OK; 95 87 } 96 88 97 89 void dc_dmub_srv_clear_inbox0_ack(struct dc_dmub_srv *dc_dmub_srv) ··· 126 134 } 127 135 } 128 136 129 - static bool dc_dmub_srv_reg_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, 130 - unsigned int count, 131 - union dmub_rb_cmd *cmd_list) 132 - { 133 - struct dc_context *dc_ctx; 134 - struct dmub_srv *dmub; 135 - enum dmub_status status = DMUB_STATUS_OK; 136 - int i; 137 - 138 - if (!dc_dmub_srv || !dc_dmub_srv->dmub) 139 - return false; 140 - 141 - dc_ctx = dc_dmub_srv->ctx; 142 - dmub = dc_dmub_srv->dmub; 143 - 144 - for (i = 0 ; i < count; i++) { 145 - /* confirm no messages pending */ 146 - do { 147 - status = dmub_srv_wait_for_idle(dmub, 100000); 148 - } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK); 149 - 150 - /* queue command */ 151 - if (status == DMUB_STATUS_OK) 152 - status = dmub_srv_reg_cmd_execute(dmub, &cmd_list[i]); 153 - 154 - /* check for errors */ 155 - if (status != DMUB_STATUS_OK) { 156 - break; 157 - } 158 - } 159 - 160 - if (status != DMUB_STATUS_OK) { 161 - if (status != DMUB_STATUS_POWER_STATE_D3) { 162 - DC_ERROR("Error starting DMUB execution: status=%d\n", status); 163 - dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); 164 - } 165 - return false; 166 - } 167 - 168 - return true; 169 - } 170 - 171 - static bool dc_dmub_srv_fb_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, 137 + bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, 172 138 unsigned int count, 173 139 union dmub_rb_cmd *cmd_list) 174 140 { ··· 143 193 144 194 for (i = 0 ; i < count; i++) { 145 195 // Queue command 146 - if (!cmd_list[i].cmd_common.header.multi_cmd_pending || 147 - dmub_rb_num_free(&dmub->inbox1.rb) >= count - i) { 148 - status = dmub_srv_fb_cmd_queue(dmub, &cmd_list[i]); 149 - } else { 150 - status = DMUB_STATUS_QUEUE_FULL; 151 - } 196 + status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); 152 197 153 198 if (status == DMUB_STATUS_QUEUE_FULL) { 154 199 /* Execute and wait for queue to become empty again. */ 155 - status = dmub_srv_fb_cmd_execute(dmub); 200 + status = dmub_srv_cmd_execute(dmub); 156 201 if (status == DMUB_STATUS_POWER_STATE_D3) 157 202 return false; 158 203 ··· 156 211 } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK); 157 212 158 213 /* Requeue the command. */ 159 - status = dmub_srv_fb_cmd_queue(dmub, &cmd_list[i]); 214 + status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); 160 215 } 161 216 162 217 if (status != DMUB_STATUS_OK) { ··· 168 223 } 169 224 } 170 225 171 - status = dmub_srv_fb_cmd_execute(dmub); 226 + status = dmub_srv_cmd_execute(dmub); 172 227 if (status != DMUB_STATUS_OK) { 173 228 if (status != DMUB_STATUS_POWER_STATE_D3) { 174 229 DC_ERROR("Error starting DMUB execution: status=%d\n", status); ··· 178 233 } 179 234 180 235 return true; 181 - } 182 - 183 - bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, 184 - unsigned int count, 185 - union dmub_rb_cmd *cmd_list) 186 - { 187 - bool res = false; 188 - 189 - if (dc_dmub_srv && dc_dmub_srv->dmub) { 190 - if (dc_dmub_srv->dmub->inbox_type == DMUB_CMD_INTERFACE_REG) { 191 - res = dc_dmub_srv_reg_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list); 192 - } else { 193 - res = dc_dmub_srv_fb_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list); 194 - } 195 - } 196 - 197 - return res; 198 236 } 199 237 200 238 bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv, ··· 202 274 DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status); 203 275 if (!dmub->debug.timeout_info.timeout_occured) { 204 276 dmub->debug.timeout_info.timeout_occured = true; 205 - if (cmd_list) 206 - dmub->debug.timeout_info.timeout_cmd = *cmd_list; 277 + dmub->debug.timeout_info.timeout_cmd = *cmd_list; 207 278 dmub->debug.timeout_info.timestamp = dm_get_timestamp(dc_dmub_srv->ctx); 208 279 } 209 280 dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); ··· 210 283 } 211 284 212 285 // Copy data back from ring buffer into command 213 - if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY && cmd_list) { 214 - dmub_srv_cmd_get_response(dc_dmub_srv->dmub, cmd_list); 215 - } 286 + if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) 287 + dmub_rb_get_return_data(&dmub->inbox1_rb, cmd_list); 216 288 } 217 289 218 290 return true; ··· 224 298 225 299 bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int count, union dmub_rb_cmd *cmd_list, enum dm_dmub_wait_type wait_type) 226 300 { 227 - if (!dc_dmub_srv_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list)) 301 + struct dc_context *dc_ctx; 302 + struct dmub_srv *dmub; 303 + enum dmub_status status; 304 + int i; 305 + 306 + if (!dc_dmub_srv || !dc_dmub_srv->dmub) 228 307 return false; 229 308 230 - return dc_dmub_srv_wait_for_idle(dc_dmub_srv, wait_type, cmd_list); 309 + dc_ctx = dc_dmub_srv->ctx; 310 + dmub = dc_dmub_srv->dmub; 311 + 312 + for (i = 0 ; i < count; i++) { 313 + // Queue command 314 + status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); 315 + 316 + if (status == DMUB_STATUS_QUEUE_FULL) { 317 + /* Execute and wait for queue to become empty again. */ 318 + status = dmub_srv_cmd_execute(dmub); 319 + if (status == DMUB_STATUS_POWER_STATE_D3) 320 + return false; 321 + 322 + status = dmub_srv_wait_for_idle(dmub, 100000); 323 + if (status != DMUB_STATUS_OK) 324 + return false; 325 + 326 + /* Requeue the command. */ 327 + status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); 328 + } 329 + 330 + if (status != DMUB_STATUS_OK) { 331 + if (status != DMUB_STATUS_POWER_STATE_D3) { 332 + DC_ERROR("Error queueing DMUB command: status=%d\n", status); 333 + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); 334 + } 335 + return false; 336 + } 337 + } 338 + 339 + status = dmub_srv_cmd_execute(dmub); 340 + if (status != DMUB_STATUS_OK) { 341 + if (status != DMUB_STATUS_POWER_STATE_D3) { 342 + DC_ERROR("Error starting DMUB execution: status=%d\n", status); 343 + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); 344 + } 345 + return false; 346 + } 347 + 348 + // Wait for DMUB to process command 349 + if (wait_type != DM_DMUB_WAIT_TYPE_NO_WAIT) { 350 + if (dc_dmub_srv->ctx->dc->debug.disable_timeout) { 351 + do { 352 + status = dmub_srv_wait_for_idle(dmub, 100000); 353 + } while (status != DMUB_STATUS_OK); 354 + } else 355 + status = dmub_srv_wait_for_idle(dmub, 100000); 356 + 357 + if (status != DMUB_STATUS_OK) { 358 + DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status); 359 + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); 360 + return false; 361 + } 362 + 363 + // Copy data back from ring buffer into command 364 + if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) 365 + dmub_rb_get_return_data(&dmub->inbox1_rb, cmd_list); 366 + } 367 + 368 + return true; 231 369 } 232 370 233 371 bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv) ··· 1243 1253 ips_fw->signals.bits.ips1_commit, 1244 1254 ips_fw->signals.bits.ips2_commit); 1245 1255 1246 - dc_dmub_srv_wait_for_idle(dc->ctx->dmub_srv, DM_DMUB_WAIT_TYPE_WAIT, NULL); 1256 + dc_dmub_srv_wait_idle(dc->ctx->dmub_srv); 1247 1257 1248 1258 memset(&new_signals, 0, sizeof(new_signals)); 1249 1259 ··· 1400 1410 ips_fw->signals.bits.ips1_commit, 1401 1411 ips_fw->signals.bits.ips2_commit); 1402 1412 1403 - dmub_srv_sync_inboxes(dc->ctx->dmub_srv->dmub); 1413 + dmub_srv_sync_inbox1(dc->ctx->dmub_srv->dmub); 1404 1414 } 1405 1415 } 1406 1416 ··· 1654 1664 /* fill in generic command header */ 1655 1665 global_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; 1656 1666 global_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; 1657 - global_cmd->header.payload_bytes = 1658 - sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); 1667 + global_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); 1659 1668 1660 1669 if (enable) { 1661 1670 /* send global configuration parameters */ ··· 1673 1684 /* configure command header */ 1674 1685 stream_base_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; 1675 1686 stream_base_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; 1676 - stream_base_cmd->header.payload_bytes = 1677 - sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); 1687 + stream_base_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); 1678 1688 stream_base_cmd->header.multi_cmd_pending = 1; 1679 1689 stream_sub_state_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; 1680 1690 stream_sub_state_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; 1681 - stream_sub_state_cmd->header.payload_bytes = 1682 - sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); 1691 + stream_sub_state_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); 1683 1692 stream_sub_state_cmd->header.multi_cmd_pending = 1; 1684 1693 /* copy stream static base state */ 1685 1694 memcpy(&stream_base_cmd->config, ··· 1723 1736 cmd.fams2_drr_update.dmub_optc_state_req.v_total_mid_frame_num = vtotal_mid_frame_num; 1724 1737 cmd.fams2_drr_update.dmub_optc_state_req.program_manual_trigger = program_manual_trigger; 1725 1738 1726 - cmd.fams2_drr_update.header.payload_bytes = 1727 - sizeof(cmd.fams2_drr_update) - sizeof(cmd.fams2_drr_update.header); 1739 + cmd.fams2_drr_update.header.payload_bytes = sizeof(cmd.fams2_drr_update) - sizeof(cmd.fams2_drr_update.header); 1728 1740 1729 1741 dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); 1730 1742 } ··· 1759 1773 /* build command header */ 1760 1774 cmds[num_cmds].fams2_flip.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; 1761 1775 cmds[num_cmds].fams2_flip.header.sub_type = DMUB_CMD__FAMS2_FLIP; 1762 - cmds[num_cmds].fams2_flip.header.payload_bytes = 1763 - sizeof(struct dmub_rb_cmd_fams2_flip) - sizeof(struct dmub_cmd_header); 1776 + cmds[num_cmds].fams2_flip.header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2_flip); 1764 1777 1765 1778 /* for chaining multiple commands, all but last command should set to 1 */ 1766 1779 cmds[num_cmds].fams2_flip.header.multi_cmd_pending = 1;

+1 -1

drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h

··· 58 58 bool needs_idle_wake; 59 59 }; 60 60 61 - bool dc_dmub_srv_wait_for_pending(struct dc_dmub_srv *dc_dmub_srv); 61 + void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv); 62 62 63 63 bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv); 64 64

+1 -1

drivers/gpu/drm/amd/display/dc/dc_helper.c

··· 682 682 if (offload && 683 683 ctx->dc->debug.dmub_offload_enabled && 684 684 !ctx->dc->debug.dmcub_emulation) { 685 - dc_dmub_srv_wait_for_idle(ctx->dmub_srv, DM_DMUB_WAIT_TYPE_WAIT, NULL); 685 + dc_dmub_srv_wait_idle(ctx->dmub_srv); 686 686 } 687 687 } 688 688

-1

drivers/gpu/drm/amd/display/dc/dc_types.h

··· 1224 1224 int bw_granularity; // BW Granularity 1225 1225 int dp_overhead; // DP overhead in dp tunneling 1226 1226 bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3: DP-Tx & Dpia & CM 1227 - bool response_ready; // Response ready from the CM side 1228 1227 uint8_t nrd_max_lane_count; // Non-reduced max lane count 1229 1228 uint8_t nrd_max_link_rate; // Non-reduced max link rate 1230 1229 };

+1 -2

drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c

··· 240 240 cmd.abm_save_restore.abm_init_config_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1; 241 241 cmd.abm_save_restore.abm_init_config_data.panel_mask = panel_mask; 242 242 243 - cmd.abm_save_restore.header.payload_bytes = 244 - sizeof(struct dmub_rb_cmd_abm_save_restore) - sizeof(struct dmub_cmd_header); 243 + cmd.abm_save_restore.header.payload_bytes = sizeof(struct dmub_rb_cmd_abm_save_restore); 245 244 246 245 dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); 247 246

+11

drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c

··· 73 73 if (link->replay_settings.replay_feature_enabled) 74 74 return true; 75 75 76 + /* only use HW lock for PSR1 on single eDP */ 77 + if (link->psr_settings.psr_version == DC_PSR_VERSION_1) { 78 + struct dc_link *edp_links[MAX_NUM_EDP]; 79 + int edp_num; 80 + 81 + dc_get_edp_links(link->dc, edp_links, &edp_num); 82 + 83 + if (edp_num == 1) 84 + return true; 85 + } 86 + 76 87 return false; 77 88 }

+6 -13

drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c

··· 280 280 memset(&cmd, 0, sizeof(cmd)); 281 281 pCmd->header.type = DMUB_CMD__REPLAY; 282 282 pCmd->header.sub_type = DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL; 283 - pCmd->header.payload_bytes = 284 - sizeof(struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal) - 285 - sizeof(struct dmub_cmd_header); 283 + pCmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal); 286 284 pCmd->replay_set_power_opt_data.power_opt = power_opt; 287 285 pCmd->replay_set_power_opt_data.panel_inst = panel_inst; 288 286 pCmd->replay_set_coasting_vtotal_data.coasting_vtotal = (coasting_vtotal & 0xFFFF); ··· 319 321 cmd.replay_set_timing_sync.header.sub_type = 320 322 DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED; 321 323 cmd.replay_set_timing_sync.header.payload_bytes = 322 - sizeof(struct dmub_rb_cmd_replay_set_timing_sync) - 323 - sizeof(struct dmub_cmd_header); 324 + sizeof(struct dmub_rb_cmd_replay_set_timing_sync); 324 325 //Cmd Body 325 326 cmd.replay_set_timing_sync.replay_set_timing_sync_data.panel_inst = 326 327 cmd_element->sync_data.panel_inst; ··· 331 334 cmd.replay_set_frameupdate_timer.header.sub_type = 332 335 DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER; 333 336 cmd.replay_set_frameupdate_timer.header.payload_bytes = 334 - sizeof(struct dmub_rb_cmd_replay_set_frameupdate_timer) - 335 - sizeof(struct dmub_cmd_header); 337 + sizeof(struct dmub_rb_cmd_replay_set_frameupdate_timer); 336 338 //Cmd Body 337 339 cmd.replay_set_frameupdate_timer.data.panel_inst = 338 340 cmd_element->panel_inst; ··· 345 349 cmd.replay_set_pseudo_vtotal.header.sub_type = 346 350 DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL; 347 351 cmd.replay_set_pseudo_vtotal.header.payload_bytes = 348 - sizeof(struct dmub_rb_cmd_replay_set_pseudo_vtotal) - 349 - sizeof(struct dmub_cmd_header); 352 + sizeof(struct dmub_rb_cmd_replay_set_pseudo_vtotal); 350 353 //Cmd Body 351 354 cmd.replay_set_pseudo_vtotal.data.panel_inst = 352 355 cmd_element->pseudo_vtotal_data.panel_inst; ··· 357 362 cmd.replay_disabled_adaptive_sync_sdp.header.sub_type = 358 363 DMUB_CMD__REPLAY_DISABLED_ADAPTIVE_SYNC_SDP; 359 364 cmd.replay_disabled_adaptive_sync_sdp.header.payload_bytes = 360 - sizeof(struct dmub_rb_cmd_replay_disabled_adaptive_sync_sdp) - 361 - sizeof(struct dmub_cmd_header); 365 + sizeof(struct dmub_rb_cmd_replay_disabled_adaptive_sync_sdp); 362 366 //Cmd Body 363 367 cmd.replay_disabled_adaptive_sync_sdp.data.panel_inst = 364 368 cmd_element->disabled_adaptive_sync_sdp_data.panel_inst; ··· 369 375 cmd.replay_set_general_cmd.header.sub_type = 370 376 DMUB_CMD__REPLAY_SET_GENERAL_CMD; 371 377 cmd.replay_set_general_cmd.header.payload_bytes = 372 - sizeof(struct dmub_rb_cmd_replay_set_general_cmd) - 373 - sizeof(struct dmub_cmd_header); 378 + sizeof(struct dmub_rb_cmd_replay_set_general_cmd); 374 379 //Cmd Body 375 380 cmd.replay_set_general_cmd.data.panel_inst = 376 381 cmd_element->set_general_cmd_data.panel_inst;

+1 -1

drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c

··· 159 159 .dppclk_mhz = 1200.0, 160 160 .phyclk_mhz = 810.0, 161 161 .phyclk_d18_mhz = 667.0, 162 - .dscclk_mhz = 417.0, 162 + .dscclk_mhz = 400.0, 163 163 .dtbclk_mhz = 600.0, 164 164 }, 165 165 },

+2

drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c

··· 32 32 33 33 #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 34 34 #define TB_BORROWED_MAX 400 35 + #define DML_MAX_VSTARTUP_START 1023 35 36 36 37 // --------------------------- 37 38 // Declaration Begins ··· 6211 6210 dml_print("DML::%s: vblank_avail = %u\n", __func__, vblank_avail); 6212 6211 dml_print("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines); 6213 6212 #endif 6213 + max_vstartup_lines = (dml_uint_t) dml_min(max_vstartup_lines, DML_MAX_VSTARTUP_START); 6214 6214 return max_vstartup_lines; 6215 6215 } 6216 6216

+1 -1

drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c

··· 590 590 p->out_states->state_array[i].dtbclk_mhz = max_dtbclk_mhz; 591 591 p->out_states->state_array[i].phyclk_mhz = max_phyclk_mhz; 592 592 593 - p->out_states->state_array[i].dscclk_mhz = max_dispclk_mhz / 3.0; 594 593 p->out_states->state_array[i].phyclk_mhz = max_phyclk_mhz; 595 594 p->out_states->state_array[i].dtbclk_mhz = max_dtbclk_mhz; 596 595 597 596 /* Dependent states. */ 597 + p->out_states->state_array[i].dscclk_mhz = p->in_states->state_array[i].dscclk_mhz; 598 598 p->out_states->state_array[i].dram_speed_mts = p->in_states->state_array[i].dram_speed_mts; 599 599 p->out_states->state_array[i].fabricclk_mhz = p->in_states->state_array[i].fabricclk_mhz; 600 600 p->out_states->state_array[i].socclk_mhz = p->in_states->state_array[i].socclk_mhz;

+1 -3

drivers/gpu/drm/amd/display/dc/inc/link.h

··· 218 218 219 219 220 220 /*************************** DP DPIA/PHY ******************************/ 221 - int (*dpia_handle_usb4_bandwidth_allocation_for_link)( 221 + void (*dpia_handle_usb4_bandwidth_allocation_for_link)( 222 222 struct dc_link *link, int peak_bw); 223 - void (*dpia_handle_bw_alloc_response)( 224 - struct dc_link *link, uint8_t bw, uint8_t result); 225 223 void (*dp_set_drive_settings)( 226 224 struct dc_link *link, 227 225 const struct link_resource *link_res,

+1 -16

drivers/gpu/drm/amd/display/dc/link/link_dpms.c

··· 2291 2291 link->dpia_bw_alloc_config.dp_overhead = link_dp_dpia_get_dp_overhead_in_dp_tunneling(link); 2292 2292 req_bw += link->dpia_bw_alloc_config.dp_overhead; 2293 2293 2294 - if (link_dp_dpia_allocate_usb4_bandwidth_for_stream(link, req_bw)) { 2295 - if (req_bw <= link->dpia_bw_alloc_config.allocated_bw) { 2296 - DC_LOG_DEBUG("%s, Success in allocate bw for link(%d), allocated_bw(%d), dp_overhead(%d)\n", 2297 - __func__, link->link_index, link->dpia_bw_alloc_config.allocated_bw, 2298 - link->dpia_bw_alloc_config.dp_overhead); 2299 - } else { 2300 - // Cannot get the required bandwidth. 2301 - DC_LOG_ERROR("%s, Failed to allocate bw for link(%d), allocated_bw(%d), dp_overhead(%d)\n", 2302 - __func__, link->link_index, link->dpia_bw_alloc_config.allocated_bw, 2303 - link->dpia_bw_alloc_config.dp_overhead); 2304 - return false; 2305 - } 2306 - } else { 2307 - DC_LOG_DEBUG("%s, usb4 request bw timeout\n", __func__); 2308 - return false; 2309 - } 2294 + link_dp_dpia_allocate_usb4_bandwidth_for_stream(link, req_bw); 2310 2295 2311 2296 if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { 2312 2297 int i = 0;

-1

drivers/gpu/drm/amd/display/dc/link/link_factory.c

··· 175 175 { 176 176 link_srv->dpia_handle_usb4_bandwidth_allocation_for_link = 177 177 dpia_handle_usb4_bandwidth_allocation_for_link; 178 - link_srv->dpia_handle_bw_alloc_response = dpia_handle_bw_alloc_response; 179 178 link_srv->dp_set_drive_settings = dp_set_drive_settings; 180 179 link_srv->dpcd_write_rx_power_ctrl = dpcd_write_rx_power_ctrl; 181 180 }

-1

drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c

··· 92 92 93 93 /* prepare QUERY_HPD command */ 94 94 cmd.query_hpd.header.type = DMUB_CMD__QUERY_HPD_STATE; 95 - cmd.query_hpd.header.payload_bytes = sizeof(cmd.query_hpd.data); 96 95 cmd.query_hpd.data.instance = link->link_id.enum_id - ENUM_ID_1; 97 96 cmd.query_hpd.data.ch_type = AUX_CHANNEL_DPIA; 98 97

+40 -155

drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c

··· 24 24 * 25 25 */ 26 26 /*********************************************************************/ 27 - // USB4 DPIA BANDWIDTH ALLOCATION LOGIC 27 + // USB4 DPIA BANDWIDTH ALLOCATION LOGIC 28 28 /*********************************************************************/ 29 29 #include "link_dp_dpia_bw.h" 30 30 #include "link_dpcd.h" ··· 36 36 #define Kbps_TO_Gbps (1000 * 1000) 37 37 38 38 // ------------------------------------------------------------------ 39 - // PRIVATE FUNCTIONS 39 + // PRIVATE FUNCTIONS 40 40 // ------------------------------------------------------------------ 41 41 /* 42 42 * Always Check the following: ··· 44 44 * - Is HPD HIGH? 45 45 * - Is BW Allocation Support Mode enabled on DP-Tx? 46 46 */ 47 - static bool get_bw_alloc_proceed_flag(struct dc_link *tmp) 47 + static bool link_dp_is_bw_alloc_available(struct dc_link *link) 48 48 { 49 - return (tmp && DISPLAY_ENDPOINT_USB4_DPIA == tmp->ep_type 50 - && tmp->hpd_status 51 - && tmp->dpia_bw_alloc_config.bw_alloc_enabled); 49 + return (link && link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA 50 + && link->hpd_status 51 + && link->dpia_bw_alloc_config.bw_alloc_enabled); 52 52 } 53 53 54 54 static void reset_bw_alloc_struct(struct dc_link *link) ··· 60 60 link->dpia_bw_alloc_config.estimated_bw = 0; 61 61 link->dpia_bw_alloc_config.bw_granularity = 0; 62 62 link->dpia_bw_alloc_config.dp_overhead = 0; 63 - link->dpia_bw_alloc_config.response_ready = false; 64 63 link->dpia_bw_alloc_config.nrd_max_lane_count = 0; 65 64 link->dpia_bw_alloc_config.nrd_max_link_rate = 0; 66 65 for (int i = 0; i < MAX_SINKS_PER_LINK; i++) ··· 242 243 static void dpia_bw_alloc_unplug(struct dc_link *link) 243 244 { 244 245 if (link) { 245 - DC_LOG_DEBUG("%s: resetting bw alloc config for link(%d)\n", 246 + DC_LOG_DEBUG("%s: resetting BW alloc config for link(%d)\n", 246 247 __func__, link->link_index); 247 248 reset_bw_alloc_struct(link); 248 249 } 249 250 } 250 251 251 - static void set_usb4_req_bw_req(struct dc_link *link, int req_bw) 252 + static void link_dpia_send_bw_alloc_request(struct dc_link *link, int req_bw) 252 253 { 253 254 uint8_t requested_bw; 254 255 uint32_t temp; 255 256 256 257 /* Error check whether request bw greater than allocated */ 257 258 if (req_bw > link->dpia_bw_alloc_config.estimated_bw) { 258 - DC_LOG_ERROR("%s: Request bw greater than estimated bw for link(%d)\n", 259 + DC_LOG_ERROR("%s: Request BW greater than estimated BW for link(%d)\n", 259 260 __func__, link->link_index); 260 261 req_bw = link->dpia_bw_alloc_config.estimated_bw; 261 262 } ··· 270 271 /* Error check whether requested and allocated are equal */ 271 272 req_bw = requested_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); 272 273 if (req_bw && (req_bw == link->dpia_bw_alloc_config.allocated_bw)) { 273 - DC_LOG_ERROR("%s: Request bw equals to allocated bw for link(%d)\n", 274 + DC_LOG_ERROR("%s: Request BW equals to allocated BW for link(%d)\n", 274 275 __func__, link->link_index); 275 276 } 276 277 277 - link->dpia_bw_alloc_config.response_ready = false; // Reset flag 278 - core_link_write_dpcd( 279 - link, 280 - REQUESTED_BW, 278 + core_link_write_dpcd(link, REQUESTED_BW, 281 279 &requested_bw, 282 280 sizeof(uint8_t)); 283 281 } 284 282 285 - /* 286 - * Return the response_ready flag from dc_link struct 287 - * 288 - * @link: pointer to the dc_link struct instance 289 - * 290 - * return: response_ready flag from dc_link struct 291 - */ 292 - static bool get_cm_response_ready_flag(struct dc_link *link) 293 - { 294 - return link->dpia_bw_alloc_config.response_ready; 295 - } 296 - 297 283 // ------------------------------------------------------------------ 298 - // PUBLIC FUNCTIONS 284 + // PUBLIC FUNCTIONS 299 285 // ------------------------------------------------------------------ 300 286 bool link_dp_dpia_set_dptx_usb4_bw_alloc_support(struct dc_link *link) 301 287 { ··· 354 370 DC_LOG_DEBUG("%s: BW Allocation request succeeded on link(%d)", 355 371 __func__, link->link_index); 356 372 } else if (status & DP_TUNNELING_BW_REQUEST_FAILED) { 373 + link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link); 374 + 357 375 DC_LOG_DEBUG("%s: BW Allocation request failed on link(%d) allocated/estimated BW=%d", 358 376 __func__, link->link_index, link->dpia_bw_alloc_config.estimated_bw); 377 + 378 + link_dpia_send_bw_alloc_request(link, link->dpia_bw_alloc_config.estimated_bw); 359 379 } else if (status & DP_TUNNELING_ESTIMATED_BW_CHANGED) { 380 + link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link); 381 + 360 382 DC_LOG_DEBUG("%s: Estimated BW changed on link(%d) new estimated BW=%d", 361 383 __func__, link->link_index, link->dpia_bw_alloc_config.estimated_bw); 362 384 } ··· 372 382 &status, sizeof(status)); 373 383 } 374 384 375 - void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t result) 385 + /* 386 + * Handle the DP Bandwidth allocation for DPIA 387 + * 388 + */ 389 + void dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int peak_bw) 376 390 { 377 - int bw_needed = 0; 378 - int estimated = 0; 391 + if (link && link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && link->dpia_bw_alloc_config.bw_alloc_enabled) { 392 + //1. Hot Plug 393 + if (link->hpd_status && peak_bw > 0) { 394 + // If DP over USB4 then we need to check BW allocation 395 + link->dpia_bw_alloc_config.link_max_bw = peak_bw; 379 396 380 - if (!get_bw_alloc_proceed_flag((link))) 381 - return; 382 - 383 - switch (result) { 384 - 385 - case DPIA_BW_REQ_FAILED: 386 - 387 - /* 388 - * Ideally, we shouldn't run into this case as we always validate available 389 - * bandwidth and request within that limit 390 - */ 391 - estimated = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); 392 - 393 - DC_LOG_ERROR("%s: BW REQ FAILURE for DP-TX Request for link(%d)\n", 394 - __func__, link->link_index); 395 - DC_LOG_ERROR("%s: current estimated_bw(%d), new estimated_bw(%d)\n", 396 - __func__, link->dpia_bw_alloc_config.estimated_bw, estimated); 397 - 398 - /* Update the new Estimated BW value updated by CM */ 399 - link->dpia_bw_alloc_config.estimated_bw = estimated; 400 - 401 - /* Allocate the previously requested bandwidth */ 402 - set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.estimated_bw); 403 - 404 - /* 405 - * If FAIL then it is either: 406 - * 1. Due to DP-Tx trying to allocate more than available i.e. it failed locally 407 - * => get estimated and allocate that 408 - * 2. Due to the fact that DP-Tx tried to allocated ESTIMATED BW and failed then 409 - * CM will have to update 0xE0023 with new ESTIMATED BW value. 410 - */ 411 - break; 412 - 413 - case DPIA_BW_REQ_SUCCESS: 414 - 415 - bw_needed = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); 416 - 417 - DC_LOG_DEBUG("%s: BW REQ SUCCESS for DP-TX Request for link(%d)\n", 418 - __func__, link->link_index); 419 - DC_LOG_DEBUG("%s: current allocated_bw(%d), new allocated_bw(%d)\n", 420 - __func__, link->dpia_bw_alloc_config.allocated_bw, bw_needed); 421 - 422 - link->dpia_bw_alloc_config.allocated_bw = bw_needed; 423 - 424 - link->dpia_bw_alloc_config.response_ready = true; 425 - break; 426 - 427 - case DPIA_EST_BW_CHANGED: 428 - 429 - estimated = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); 430 - 431 - DC_LOG_DEBUG("%s: ESTIMATED BW CHANGED for link(%d)\n", 432 - __func__, link->link_index); 433 - DC_LOG_DEBUG("%s: current estimated_bw(%d), new estimated_bw(%d)\n", 434 - __func__, link->dpia_bw_alloc_config.estimated_bw, estimated); 435 - 436 - link->dpia_bw_alloc_config.estimated_bw = estimated; 437 - break; 438 - 439 - case DPIA_BW_ALLOC_CAPS_CHANGED: 440 - 441 - DC_LOG_ERROR("%s: BW ALLOC CAPABILITY CHANGED to Disabled for link(%d)\n", 442 - __func__, link->link_index); 443 - link->dpia_bw_alloc_config.bw_alloc_enabled = false; 444 - break; 397 + link_dpia_send_bw_alloc_request(link, peak_bw); 398 + } 399 + //2. Cold Unplug 400 + else if (!link->hpd_status) 401 + dpia_bw_alloc_unplug(link); 445 402 } 446 403 } 447 - int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int peak_bw) 404 + 405 + void link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw) 448 406 { 449 - int ret = 0; 450 - uint8_t timeout = 10; 451 - 452 - if (!(link && DISPLAY_ENDPOINT_USB4_DPIA == link->ep_type 453 - && link->dpia_bw_alloc_config.bw_alloc_enabled)) 454 - goto out; 455 - 456 - //1. Hot Plug 457 - if (link->hpd_status && peak_bw > 0) { 458 - 459 - // If DP over USB4 then we need to check BW allocation 460 - link->dpia_bw_alloc_config.link_max_bw = peak_bw; 461 - set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.link_max_bw); 462 - 463 - do { 464 - if (timeout > 0) 465 - timeout--; 466 - else 467 - break; 468 - msleep(10); 469 - } while (!get_cm_response_ready_flag(link)); 470 - 471 - if (!timeout) 472 - ret = 0;// ERROR TIMEOUT waiting for response for allocating bw 473 - else if (link->dpia_bw_alloc_config.allocated_bw > 0) 474 - ret = link->dpia_bw_alloc_config.allocated_bw; 475 - } 476 - //2. Cold Unplug 477 - else if (!link->hpd_status) 478 - dpia_bw_alloc_unplug(link); 479 - 480 - out: 481 - return ret; 482 - } 483 - bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw) 484 - { 485 - bool ret = false; 486 - uint8_t timeout = 10; 487 - 488 407 DC_LOG_DEBUG("%s: ENTER: link(%d), hpd_status(%d), current allocated_bw(%d), req_bw(%d)\n", 489 408 __func__, link->link_index, link->hpd_status, 490 409 link->dpia_bw_alloc_config.allocated_bw, req_bw); 491 410 492 - if (!get_bw_alloc_proceed_flag(link)) 493 - goto out; 494 - 495 - set_usb4_req_bw_req(link, req_bw); 496 - do { 497 - if (timeout > 0) 498 - timeout--; 499 - else 500 - break; 501 - msleep(10); 502 - } while (!get_cm_response_ready_flag(link)); 503 - 504 - if (timeout) 505 - ret = true; 506 - 507 - out: 508 - DC_LOG_DEBUG("%s: EXIT: timeout(%d), ret(%d)\n", __func__, timeout, ret); 509 - return ret; 411 + if (link_dp_is_bw_alloc_available(link)) 412 + link_dpia_send_bw_alloc_request(link, req_bw); 413 + else 414 + DC_LOG_DEBUG("%s: Not able to send the BW Allocation request", __func__); 510 415 } 511 416 512 417 bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed_per_dpia, const unsigned int num_dpias) ··· 452 567 { 453 568 int dp_overhead = 0, link_mst_overhead = 0; 454 569 455 - if (!get_bw_alloc_proceed_flag((link))) 570 + if (!link_dp_is_bw_alloc_available(link)) 456 571 return dp_overhead; 457 572 458 573 /* if its mst link, add MTPH overhead */

+2 -16

drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h

··· 59 59 * @link: pointer to the dc_link struct instance 60 60 * @req_bw: Bw requested by the stream 61 61 * 62 - * return: true if allocated successfully 63 62 */ 64 - bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw); 63 + void link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw); 65 64 66 65 /* 67 66 * Handle the USB4 BW Allocation related functionality here: ··· 70 71 * @link: pointer to the dc_link struct instance 71 72 * @peak_bw: Peak bw used by the link/sink 72 73 * 73 - * return: allocated bw else return 0 74 74 */ 75 - int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int peak_bw); 76 - 77 - /* 78 - * Handle function for when the status of the Request above is complete. 79 - * We will find out the result of allocating on CM and update structs. 80 - * 81 - * @link: pointer to the dc_link struct instance 82 - * @bw: Allocated or Estimated BW depending on the result 83 - * @result: Response type 84 - * 85 - * return: none 86 - */ 87 - void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t result); 75 + void dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int peak_bw); 88 76 89 77 /* 90 78 * Handle the validation of total BW here and confirm that the bw used by each

+43 -89

drivers/gpu/drm/amd/display/dmub/dmub_srv.h

··· 51 51 * for the cache windows. 52 52 * 53 53 * The call to dmub_srv_hw_init() programs the DMCUB registers to prepare 54 - * for command submission. Commands can be queued via dmub_srv_fb_cmd_queue() 55 - * and executed via dmub_srv_fb_cmd_execute(). 54 + * for command submission. Commands can be queued via dmub_srv_cmd_queue() 55 + * and executed via dmub_srv_cmd_execute(). 56 56 * 57 57 * If the queue is full the dmub_srv_wait_for_idle() call can be used to 58 58 * wait until the queue has been cleared. ··· 168 168 DMUB_POWER_STATE_UNDEFINED = 0, 169 169 DMUB_POWER_STATE_D0 = 1, 170 170 DMUB_POWER_STATE_D3 = 8 171 - }; 172 - 173 - /* enum dmub_inbox_cmd_interface type - defines default interface for host->dmub commands */ 174 - enum dmub_inbox_cmd_interface_type { 175 - DMUB_CMD_INTERFACE_DEFAULT = 0, 176 - DMUB_CMD_INTERFACE_FB = 1, 177 - DMUB_CMD_INTERFACE_REG = 2, 178 171 }; 179 172 180 173 /** ··· 349 356 uint8_t is_cw6_enabled : 1; 350 357 }; 351 358 352 - struct dmub_srv_inbox { 353 - /* generic status */ 354 - uint64_t num_submitted; 355 - uint64_t num_reported; 356 - union { 357 - /* frame buffer mailbox status */ 358 - struct dmub_rb rb; 359 - /* register mailbox status */ 360 - struct { 361 - bool is_pending; 362 - bool is_multi_pending; 363 - }; 364 - }; 365 - }; 366 - 367 359 /** 368 360 * struct dmub_srv_base_funcs - Driver specific base callbacks 369 361 */ ··· 462 484 void (*init_reg_offsets)(struct dmub_srv *dmub, struct dc_context *ctx); 463 485 464 486 void (*subvp_save_surf_addr)(struct dmub_srv *dmub, const struct dc_plane_address *addr, uint8_t subvp_index); 465 - 466 487 void (*send_reg_inbox0_cmd_msg)(struct dmub_srv *dmub, 467 488 union dmub_rb_cmd *cmd); 468 489 uint32_t (*read_reg_inbox0_rsp_int_status)(struct dmub_srv *dmub); 469 490 void (*read_reg_inbox0_cmd_rsp)(struct dmub_srv *dmub, 470 491 union dmub_rb_cmd *cmd); 471 492 void (*write_reg_inbox0_rsp_int_ack)(struct dmub_srv *dmub); 472 - void (*clear_reg_inbox0_rsp_int_ack)(struct dmub_srv *dmub); 473 - void (*enable_reg_inbox0_rsp_int)(struct dmub_srv *dmub, bool enable); 474 - 475 493 uint32_t (*read_reg_outbox0_rdy_int_status)(struct dmub_srv *dmub); 476 494 void (*write_reg_outbox0_rdy_int_ack)(struct dmub_srv *dmub); 477 495 void (*read_reg_outbox0_msg)(struct dmub_srv *dmub, uint32_t *msg); 478 496 void (*write_reg_outbox0_rsp)(struct dmub_srv *dmub, uint32_t *rsp); 479 497 uint32_t (*read_reg_outbox0_rsp_int_status)(struct dmub_srv *dmub); 498 + void (*enable_reg_inbox0_rsp_int)(struct dmub_srv *dmub, bool enable); 480 499 void (*enable_reg_outbox0_rdy_int)(struct dmub_srv *dmub, bool enable); 481 500 }; 482 501 ··· 493 518 enum dmub_asic asic; 494 519 uint32_t fw_version; 495 520 bool is_virtual; 496 - enum dmub_inbox_cmd_interface_type inbox_type; 497 521 }; 498 522 499 523 /** ··· 521 547 const struct dmub_srv_dcn401_regs *regs_dcn401; 522 548 struct dmub_srv_base_funcs funcs; 523 549 struct dmub_srv_hw_funcs hw_funcs; 524 - struct dmub_srv_inbox inbox1; 550 + struct dmub_rb inbox1_rb; 525 551 uint32_t inbox1_last_wptr; 526 - struct dmub_srv_inbox reg_inbox0; 527 552 /** 528 553 * outbox1_rb is accessed without locks (dal & dc) 529 554 * and to be used only in dmub_srv_stat_get_notification() ··· 542 569 struct dmub_fw_meta_info meta_info; 543 570 struct dmub_feature_caps feature_caps; 544 571 struct dmub_visual_confirm_color visual_confirm_color; 545 - enum dmub_inbox_cmd_interface_type inbox_type; 546 572 547 573 enum dmub_srv_power_state_type power_state; 548 574 struct dmub_diagnostic_data debug; ··· 699 727 enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub); 700 728 701 729 /** 702 - * dmub_srv_fb_cmd_queue() - queues a command to the DMUB 730 + * dmub_srv_sync_inbox1() - sync sw state with hw state 731 + * @dmub: the dmub service 732 + * 733 + * Sync sw state with hw state when resume from S0i3 734 + * 735 + * Return: 736 + * DMUB_STATUS_OK - success 737 + * DMUB_STATUS_INVALID - unspecified error 738 + */ 739 + enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub); 740 + 741 + /** 742 + * dmub_srv_cmd_queue() - queues a command to the DMUB 703 743 * @dmub: the dmub service 704 744 * @cmd: the command to queue 705 745 * ··· 723 739 * DMUB_STATUS_QUEUE_FULL - no remaining room in queue 724 740 * DMUB_STATUS_INVALID - unspecified error 725 741 */ 726 - enum dmub_status dmub_srv_fb_cmd_queue(struct dmub_srv *dmub, 742 + enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, 727 743 const union dmub_rb_cmd *cmd); 728 744 729 745 /** 730 - * dmub_srv_fb_cmd_execute() - Executes a queued sequence to the dmub 746 + * dmub_srv_cmd_execute() - Executes a queued sequence to the dmub 731 747 * @dmub: the dmub service 732 748 * 733 749 * Begins execution of queued commands on the dmub. ··· 736 752 * DMUB_STATUS_OK - success 737 753 * DMUB_STATUS_INVALID - unspecified error 738 754 */ 739 - enum dmub_status dmub_srv_fb_cmd_execute(struct dmub_srv *dmub); 755 + enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub); 740 756 741 757 /** 742 758 * dmub_srv_wait_for_hw_pwr_up() - Waits for firmware hardware power up is completed ··· 793 809 */ 794 810 enum dmub_status dmub_srv_wait_for_phy_init(struct dmub_srv *dmub, 795 811 uint32_t timeout_us); 796 - 797 - /** 798 - * dmub_srv_wait_for_pending() - Re-entrant wait for messages currently pending 799 - * @dmub: the dmub service 800 - * @timeout_us: the maximum number of microseconds to wait 801 - * 802 - * Waits until the commands queued prior to this call are complete. 803 - * If interfaces remain busy due to additional work being submitted 804 - * concurrently, this function will not continue to wait. 805 - * 806 - * Return: 807 - * DMUB_STATUS_OK - success 808 - * DMUB_STATUS_TIMEOUT - wait for buffer to flush timed out 809 - * DMUB_STATUS_INVALID - unspecified error 810 - */ 811 - enum dmub_status dmub_srv_wait_for_pending(struct dmub_srv *dmub, 812 - uint32_t timeout_us); 813 812 814 813 /** 815 814 * dmub_srv_wait_for_idle() - Waits for the DMUB to be idle ··· 892 925 enum dmub_status dmub_srv_get_fw_boot_option(struct dmub_srv *dmub, 893 926 union dmub_fw_boot_options *option); 894 927 928 + enum dmub_status dmub_srv_cmd_with_reply_data(struct dmub_srv *dmub, 929 + union dmub_rb_cmd *cmd); 930 + 895 931 enum dmub_status dmub_srv_set_skip_panel_power_sequence(struct dmub_srv *dmub, 896 932 bool skip); 897 933 ··· 959 989 void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_address *addr, uint8_t subvp_index); 960 990 961 991 /** 992 + * dmub_srv_send_reg_inbox0_cmd() - send a dmub command and wait for the command 993 + * being processed by DMUB. 994 + * @dmub: The dmub service 995 + * @cmd: The dmub command being sent. If with_replay is true, the function will 996 + * update cmd with replied data. 997 + * @with_reply: true if DMUB reply needs to be copied back to cmd. false if the 998 + * cmd doesn't need to be replied. 999 + * @timeout_us: timeout in microseconds. 1000 + * 1001 + * Return: 1002 + * DMUB_STATUS_OK - success 1003 + * DMUB_STATUS_TIMEOUT - DMUB fails to process the command within the timeout 1004 + * interval. 1005 + */ 1006 + enum dmub_status dmub_srv_send_reg_inbox0_cmd( 1007 + struct dmub_srv *dmub, 1008 + union dmub_rb_cmd *cmd, 1009 + bool with_reply, uint32_t timeout_us); 1010 + 1011 + /** 962 1012 * dmub_srv_set_power_state() - Track DC power state in dmub_srv 963 1013 * @dmub: The dmub service 964 1014 * @power_state: DC power state setting ··· 989 999 * void 990 1000 */ 991 1001 void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state); 992 - 993 - /** 994 - * dmub_srv_reg_cmd_execute() - Executes provided command to the dmub 995 - * @dmub: the dmub service 996 - * @cmd: the command packet to be executed 997 - * 998 - * Executes a single command for the dmub. 999 - * 1000 - * Return: 1001 - * DMUB_STATUS_OK - success 1002 - * DMUB_STATUS_INVALID - unspecified error 1003 - */ 1004 - enum dmub_status dmub_srv_reg_cmd_execute(struct dmub_srv *dmub, union dmub_rb_cmd *cmd); 1005 - 1006 - 1007 - /** 1008 - * dmub_srv_cmd_get_response() - Copies return data for command into buffer 1009 - * @dmub: the dmub service 1010 - * @cmd_rsp: response buffer 1011 - * 1012 - * Copies return data for command into buffer 1013 - */ 1014 - void dmub_srv_cmd_get_response(struct dmub_srv *dmub, 1015 - union dmub_rb_cmd *cmd_rsp); 1016 - 1017 - /** 1018 - * dmub_srv_sync_inboxes() - Sync inbox state 1019 - * @dmub: the dmub service 1020 - * 1021 - * Sync inbox state 1022 - * 1023 - * Return: 1024 - * DMUB_STATUS_OK - success 1025 - * DMUB_STATUS_INVALID - unspecified error 1026 - */ 1027 - enum dmub_status dmub_srv_sync_inboxes(struct dmub_srv *dmub); 1028 1002 1029 1003 #endif /* _DMUB_SRV_H_ */

+38 -83

drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c

··· 517 517 union dmub_rb_cmd *cmd) 518 518 { 519 519 uint32_t *dwords = (uint32_t *)cmd; 520 - int32_t payload_size_bytes = cmd->cmd_common.header.payload_bytes; 521 - uint32_t msg_index; 520 + 522 521 static_assert(sizeof(*cmd) == 64, "DMUB command size mismatch"); 523 522 524 - /* read remaining data based on payload size */ 525 - for (msg_index = 0; msg_index < 15; msg_index++) { 526 - if (payload_size_bytes <= msg_index * 4) { 527 - break; 528 - } 529 - 530 - switch (msg_index) { 531 - case 0: 532 - REG_WRITE(DMCUB_REG_INBOX0_MSG0, dwords[msg_index + 1]); 533 - break; 534 - case 1: 535 - REG_WRITE(DMCUB_REG_INBOX0_MSG1, dwords[msg_index + 1]); 536 - break; 537 - case 2: 538 - REG_WRITE(DMCUB_REG_INBOX0_MSG2, dwords[msg_index + 1]); 539 - break; 540 - case 3: 541 - REG_WRITE(DMCUB_REG_INBOX0_MSG3, dwords[msg_index + 1]); 542 - break; 543 - case 4: 544 - REG_WRITE(DMCUB_REG_INBOX0_MSG4, dwords[msg_index + 1]); 545 - break; 546 - case 5: 547 - REG_WRITE(DMCUB_REG_INBOX0_MSG5, dwords[msg_index + 1]); 548 - break; 549 - case 6: 550 - REG_WRITE(DMCUB_REG_INBOX0_MSG6, dwords[msg_index + 1]); 551 - break; 552 - case 7: 553 - REG_WRITE(DMCUB_REG_INBOX0_MSG7, dwords[msg_index + 1]); 554 - break; 555 - case 8: 556 - REG_WRITE(DMCUB_REG_INBOX0_MSG8, dwords[msg_index + 1]); 557 - break; 558 - case 9: 559 - REG_WRITE(DMCUB_REG_INBOX0_MSG9, dwords[msg_index + 1]); 560 - break; 561 - case 10: 562 - REG_WRITE(DMCUB_REG_INBOX0_MSG10, dwords[msg_index + 1]); 563 - break; 564 - case 11: 565 - REG_WRITE(DMCUB_REG_INBOX0_MSG11, dwords[msg_index + 1]); 566 - break; 567 - case 12: 568 - REG_WRITE(DMCUB_REG_INBOX0_MSG12, dwords[msg_index + 1]); 569 - break; 570 - case 13: 571 - REG_WRITE(DMCUB_REG_INBOX0_MSG13, dwords[msg_index + 1]); 572 - break; 573 - case 14: 574 - REG_WRITE(DMCUB_REG_INBOX0_MSG14, dwords[msg_index + 1]); 575 - break; 576 - } 577 - } 578 - 523 + REG_WRITE(DMCUB_REG_INBOX0_MSG0, dwords[0]); 524 + REG_WRITE(DMCUB_REG_INBOX0_MSG1, dwords[1]); 525 + REG_WRITE(DMCUB_REG_INBOX0_MSG2, dwords[2]); 526 + REG_WRITE(DMCUB_REG_INBOX0_MSG3, dwords[3]); 527 + REG_WRITE(DMCUB_REG_INBOX0_MSG4, dwords[4]); 528 + REG_WRITE(DMCUB_REG_INBOX0_MSG5, dwords[5]); 529 + REG_WRITE(DMCUB_REG_INBOX0_MSG6, dwords[6]); 530 + REG_WRITE(DMCUB_REG_INBOX0_MSG7, dwords[7]); 531 + REG_WRITE(DMCUB_REG_INBOX0_MSG8, dwords[8]); 532 + REG_WRITE(DMCUB_REG_INBOX0_MSG9, dwords[9]); 533 + REG_WRITE(DMCUB_REG_INBOX0_MSG10, dwords[10]); 534 + REG_WRITE(DMCUB_REG_INBOX0_MSG11, dwords[11]); 535 + REG_WRITE(DMCUB_REG_INBOX0_MSG12, dwords[12]); 536 + REG_WRITE(DMCUB_REG_INBOX0_MSG13, dwords[13]); 537 + REG_WRITE(DMCUB_REG_INBOX0_MSG14, dwords[14]); 579 538 /* writing to INBOX RDY register will trigger DMUB REG INBOX0 RDY 580 539 * interrupt. 581 540 */ 582 - REG_WRITE(DMCUB_REG_INBOX0_RDY, dwords[0]); 541 + REG_WRITE(DMCUB_REG_INBOX0_RDY, dwords[15]); 583 542 } 584 543 585 544 uint32_t dmub_dcn401_read_reg_inbox0_rsp_int_status(struct dmub_srv *dmub) ··· 556 597 557 598 static_assert(sizeof(*cmd) == 64, "DMUB command size mismatch"); 558 599 559 - dwords[0] = REG_READ(DMCUB_REG_INBOX0_RSP); 560 - dwords[1] = REG_READ(DMCUB_REG_INBOX0_MSG0); 561 - dwords[2] = REG_READ(DMCUB_REG_INBOX0_MSG1); 562 - dwords[3] = REG_READ(DMCUB_REG_INBOX0_MSG2); 563 - dwords[4] = REG_READ(DMCUB_REG_INBOX0_MSG3); 564 - dwords[5] = REG_READ(DMCUB_REG_INBOX0_MSG4); 565 - dwords[6] = REG_READ(DMCUB_REG_INBOX0_MSG5); 566 - dwords[7] = REG_READ(DMCUB_REG_INBOX0_MSG6); 567 - dwords[8] = REG_READ(DMCUB_REG_INBOX0_MSG7); 568 - dwords[9] = REG_READ(DMCUB_REG_INBOX0_MSG8); 569 - dwords[10] = REG_READ(DMCUB_REG_INBOX0_MSG9); 570 - dwords[11] = REG_READ(DMCUB_REG_INBOX0_MSG10); 571 - dwords[12] = REG_READ(DMCUB_REG_INBOX0_MSG11); 572 - dwords[13] = REG_READ(DMCUB_REG_INBOX0_MSG12); 573 - dwords[14] = REG_READ(DMCUB_REG_INBOX0_MSG13); 574 - dwords[15] = REG_READ(DMCUB_REG_INBOX0_MSG14); 600 + dwords[0] = REG_READ(DMCUB_REG_INBOX0_MSG0); 601 + dwords[1] = REG_READ(DMCUB_REG_INBOX0_MSG1); 602 + dwords[2] = REG_READ(DMCUB_REG_INBOX0_MSG2); 603 + dwords[3] = REG_READ(DMCUB_REG_INBOX0_MSG3); 604 + dwords[4] = REG_READ(DMCUB_REG_INBOX0_MSG4); 605 + dwords[5] = REG_READ(DMCUB_REG_INBOX0_MSG5); 606 + dwords[6] = REG_READ(DMCUB_REG_INBOX0_MSG6); 607 + dwords[7] = REG_READ(DMCUB_REG_INBOX0_MSG7); 608 + dwords[8] = REG_READ(DMCUB_REG_INBOX0_MSG8); 609 + dwords[9] = REG_READ(DMCUB_REG_INBOX0_MSG9); 610 + dwords[10] = REG_READ(DMCUB_REG_INBOX0_MSG10); 611 + dwords[11] = REG_READ(DMCUB_REG_INBOX0_MSG11); 612 + dwords[12] = REG_READ(DMCUB_REG_INBOX0_MSG12); 613 + dwords[13] = REG_READ(DMCUB_REG_INBOX0_MSG13); 614 + dwords[14] = REG_READ(DMCUB_REG_INBOX0_MSG14); 615 + dwords[15] = REG_READ(DMCUB_REG_INBOX0_RSP); 575 616 } 576 617 577 618 void dmub_dcn401_write_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub) 578 619 { 579 620 REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_ACK, 1); 580 - } 581 - 582 - void dmub_dcn401_clear_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub) 583 - { 584 621 REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_ACK, 0); 585 - } 586 - 587 - void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable) 588 - { 589 - REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_EN, enable ? 1:0); 590 622 } 591 623 592 624 void dmub_dcn401_write_reg_outbox0_rdy_int_ack(struct dmub_srv *dmub) ··· 602 652 603 653 REG_GET(DMCUB_INTERRUPT_STATUS, DMCUB_REG_OUTBOX0_RSP_INT_STAT, &status); 604 654 return status; 655 + } 656 + 657 + void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable) 658 + { 659 + REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_EN, enable ? 1:0); 605 660 } 606 661 607 662 void dmub_dcn401_enable_reg_outbox0_rdy_int(struct dmub_srv *dmub, bool enable)

+1 -3

drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h

··· 277 277 void dmub_dcn401_read_reg_inbox0_cmd_rsp(struct dmub_srv *dmub, 278 278 union dmub_rb_cmd *cmd); 279 279 void dmub_dcn401_write_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub); 280 - void dmub_dcn401_clear_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub); 281 - void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable); 282 - 283 280 void dmub_dcn401_write_reg_outbox0_rdy_int_ack(struct dmub_srv *dmub); 284 281 void dmub_dcn401_read_reg_outbox0_msg(struct dmub_srv *dmub, uint32_t *msg); 285 282 void dmub_dcn401_write_reg_outbox0_rsp(struct dmub_srv *dmub, uint32_t *msg); 286 283 uint32_t dmub_dcn401_read_reg_outbox0_rsp_int_status(struct dmub_srv *dmub); 284 + void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable); 287 285 void dmub_dcn401_enable_reg_outbox0_rdy_int(struct dmub_srv *dmub, bool enable); 288 286 uint32_t dmub_dcn401_read_reg_outbox0_rdy_int_status(struct dmub_srv *dmub); 289 287

+105 -214

drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c

··· 157 157 { 158 158 struct dmub_srv_hw_funcs *funcs = &dmub->hw_funcs; 159 159 160 - /* default to specifying now inbox type */ 161 - enum dmub_inbox_cmd_interface_type default_inbox_type = DMUB_CMD_INTERFACE_DEFAULT; 162 - 163 160 switch (asic) { 164 161 case DMUB_ASIC_DCN20: 165 162 case DMUB_ASIC_DCN21: ··· 395 398 396 399 funcs->get_current_time = dmub_dcn401_get_current_time; 397 400 funcs->get_diagnostic_data = dmub_dcn401_get_diagnostic_data; 398 - 399 401 funcs->send_reg_inbox0_cmd_msg = dmub_dcn401_send_reg_inbox0_cmd_msg; 400 402 funcs->read_reg_inbox0_rsp_int_status = dmub_dcn401_read_reg_inbox0_rsp_int_status; 401 403 funcs->read_reg_inbox0_cmd_rsp = dmub_dcn401_read_reg_inbox0_cmd_rsp; 402 404 funcs->write_reg_inbox0_rsp_int_ack = dmub_dcn401_write_reg_inbox0_rsp_int_ack; 403 - funcs->clear_reg_inbox0_rsp_int_ack = dmub_dcn401_clear_reg_inbox0_rsp_int_ack; 404 - funcs->enable_reg_inbox0_rsp_int = dmub_dcn401_enable_reg_inbox0_rsp_int; 405 - default_inbox_type = DMUB_CMD_INTERFACE_FB; // still default to FB for now 406 - 407 405 funcs->write_reg_outbox0_rdy_int_ack = dmub_dcn401_write_reg_outbox0_rdy_int_ack; 408 406 funcs->read_reg_outbox0_msg = dmub_dcn401_read_reg_outbox0_msg; 409 407 funcs->write_reg_outbox0_rsp = dmub_dcn401_write_reg_outbox0_rsp; ··· 409 417 break; 410 418 default: 411 419 return false; 412 - } 413 - 414 - /* set default inbox type if not overriden */ 415 - if (dmub->inbox_type == DMUB_CMD_INTERFACE_DEFAULT) { 416 - if (default_inbox_type != DMUB_CMD_INTERFACE_DEFAULT) { 417 - /* use default inbox type as specified by DCN rev */ 418 - dmub->inbox_type = default_inbox_type; 419 - } else if (funcs->send_reg_inbox0_cmd_msg) { 420 - /* prefer reg as default inbox type if present */ 421 - dmub->inbox_type = DMUB_CMD_INTERFACE_REG; 422 - } else { 423 - /* use fb as fallback */ 424 - dmub->inbox_type = DMUB_CMD_INTERFACE_FB; 425 - } 426 420 } 427 421 428 422 return true; ··· 426 448 dmub->asic = params->asic; 427 449 dmub->fw_version = params->fw_version; 428 450 dmub->is_virtual = params->is_virtual; 429 - dmub->inbox_type = params->inbox_type; 430 451 431 452 /* Setup asic dependent hardware funcs. */ 432 453 if (!dmub_srv_hw_setup(dmub, params->asic)) { ··· 695 718 inbox1.base = cw4.region.base; 696 719 inbox1.top = cw4.region.base + DMUB_RB_SIZE; 697 720 outbox1.base = inbox1.top; 698 - outbox1.top = inbox1.top + DMUB_RB_SIZE; 721 + outbox1.top = cw4.region.top; 699 722 700 723 cw5.offset.quad_part = tracebuff_fb->gpu_addr; 701 724 cw5.region.base = DMUB_CW5_BASE; ··· 708 731 cw6.region.base = DMUB_CW6_BASE; 709 732 cw6.region.top = cw6.region.base + fw_state_fb->size; 710 733 711 - dmub->fw_state = fw_state_fb->cpu_addr; 734 + dmub->fw_state = (void *)((uintptr_t)(fw_state_fb->cpu_addr) + DMUB_DEBUG_FW_STATE_OFFSET); 712 735 713 736 region6.offset.quad_part = shared_state_fb->gpu_addr; 714 737 region6.region.base = DMUB_CW6_BASE; ··· 737 760 rb_params.ctx = dmub; 738 761 rb_params.base_address = mail_fb->cpu_addr; 739 762 rb_params.capacity = DMUB_RB_SIZE; 740 - dmub_rb_init(&dmub->inbox1.rb, &rb_params); 763 + dmub_rb_init(&dmub->inbox1_rb, &rb_params); 741 764 742 765 // Initialize outbox1 ring buffer 743 766 rb_params.ctx = dmub; ··· 768 791 return DMUB_STATUS_OK; 769 792 } 770 793 794 + enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub) 795 + { 796 + if (!dmub->sw_init) 797 + return DMUB_STATUS_INVALID; 798 + 799 + if (dmub->hw_funcs.get_inbox1_rptr && dmub->hw_funcs.get_inbox1_wptr) { 800 + uint32_t rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); 801 + uint32_t wptr = dmub->hw_funcs.get_inbox1_wptr(dmub); 802 + 803 + if (rptr > dmub->inbox1_rb.capacity || wptr > dmub->inbox1_rb.capacity) { 804 + return DMUB_STATUS_HW_FAILURE; 805 + } else { 806 + dmub->inbox1_rb.rptr = rptr; 807 + dmub->inbox1_rb.wrpt = wptr; 808 + dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt; 809 + } 810 + } 811 + 812 + return DMUB_STATUS_OK; 813 + } 814 + 771 815 enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub) 772 816 { 773 817 if (!dmub->sw_init) ··· 799 801 800 802 /* mailboxes have been reset in hw, so reset the sw state as well */ 801 803 dmub->inbox1_last_wptr = 0; 802 - dmub->inbox1.rb.wrpt = 0; 803 - dmub->inbox1.rb.rptr = 0; 804 - dmub->inbox1.num_reported = 0; 805 - dmub->inbox1.num_submitted = 0; 806 - dmub->reg_inbox0.num_reported = 0; 807 - dmub->reg_inbox0.num_submitted = 0; 808 - dmub->reg_inbox0.is_pending = 0; 804 + dmub->inbox1_rb.wrpt = 0; 805 + dmub->inbox1_rb.rptr = 0; 809 806 dmub->outbox0_rb.wrpt = 0; 810 807 dmub->outbox0_rb.rptr = 0; 811 808 dmub->outbox1_rb.wrpt = 0; ··· 811 818 return DMUB_STATUS_OK; 812 819 } 813 820 814 - enum dmub_status dmub_srv_fb_cmd_queue(struct dmub_srv *dmub, 821 + enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, 815 822 const union dmub_rb_cmd *cmd) 816 823 { 817 824 if (!dmub->hw_init) ··· 820 827 if (dmub->power_state != DMUB_POWER_STATE_D0) 821 828 return DMUB_STATUS_POWER_STATE_D3; 822 829 823 - if (dmub->inbox1.rb.rptr > dmub->inbox1.rb.capacity || 824 - dmub->inbox1.rb.wrpt > dmub->inbox1.rb.capacity) { 830 + if (dmub->inbox1_rb.rptr > dmub->inbox1_rb.capacity || 831 + dmub->inbox1_rb.wrpt > dmub->inbox1_rb.capacity) { 825 832 return DMUB_STATUS_HW_FAILURE; 826 833 } 827 834 828 - if (dmub_rb_push_front(&dmub->inbox1.rb, cmd)) { 829 - dmub->inbox1.num_submitted++; 835 + if (dmub_rb_push_front(&dmub->inbox1_rb, cmd)) 830 836 return DMUB_STATUS_OK; 831 - } 832 837 833 838 return DMUB_STATUS_QUEUE_FULL; 834 839 } 835 840 836 - enum dmub_status dmub_srv_fb_cmd_execute(struct dmub_srv *dmub) 841 + enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub) 837 842 { 838 843 struct dmub_rb flush_rb; 839 844 ··· 846 855 * been flushed to framebuffer memory. Otherwise DMCUB might 847 856 * read back stale, fully invalid or partially invalid data. 848 857 */ 849 - flush_rb = dmub->inbox1.rb; 858 + flush_rb = dmub->inbox1_rb; 850 859 flush_rb.rptr = dmub->inbox1_last_wptr; 851 860 dmub_rb_flush_pending(&flush_rb); 852 861 853 - dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1.rb.wrpt); 862 + dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1_rb.wrpt); 854 863 855 - dmub->inbox1_last_wptr = dmub->inbox1.rb.wrpt; 864 + dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt; 856 865 857 866 return DMUB_STATUS_OK; 858 867 } ··· 910 919 return DMUB_STATUS_TIMEOUT; 911 920 } 912 921 913 - static void dmub_srv_update_reg_inbox0_status(struct dmub_srv *dmub) 914 - { 915 - if (dmub->reg_inbox0.is_pending) { 916 - dmub->reg_inbox0.is_pending = dmub->hw_funcs.read_reg_inbox0_rsp_int_status && 917 - !dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub); 918 - 919 - if (!dmub->reg_inbox0.is_pending) { 920 - /* ack the rsp interrupt */ 921 - if (dmub->hw_funcs.write_reg_inbox0_rsp_int_ack) 922 - dmub->hw_funcs.write_reg_inbox0_rsp_int_ack(dmub); 923 - 924 - /* only update the reported count if commands aren't being batched */ 925 - if (!dmub->reg_inbox0.is_pending && !dmub->reg_inbox0.is_multi_pending) { 926 - dmub->reg_inbox0.num_reported = dmub->reg_inbox0.num_submitted; 927 - } 928 - } 929 - } 930 - } 931 - 932 - enum dmub_status dmub_srv_wait_for_pending(struct dmub_srv *dmub, 933 - uint32_t timeout_us) 934 - { 935 - uint32_t i; 936 - const uint32_t polling_interval_us = 1; 937 - struct dmub_srv_inbox scratch_reg_inbox0 = dmub->reg_inbox0; 938 - struct dmub_srv_inbox scratch_inbox1 = dmub->inbox1; 939 - const volatile struct dmub_srv_inbox *reg_inbox0 = &dmub->reg_inbox0; 940 - const volatile struct dmub_srv_inbox *inbox1 = &dmub->inbox1; 941 - 942 - if (!dmub->hw_init || 943 - !dmub->hw_funcs.get_inbox1_wptr) 944 - return DMUB_STATUS_INVALID; 945 - 946 - /* take a snapshot of the required mailbox state */ 947 - scratch_inbox1.rb.wrpt = dmub->hw_funcs.get_inbox1_wptr(dmub); 948 - 949 - for (i = 0; i <= timeout_us; i += polling_interval_us) { 950 - scratch_inbox1.rb.rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); 951 - 952 - scratch_reg_inbox0.is_pending = scratch_reg_inbox0.is_pending && 953 - dmub->hw_funcs.read_reg_inbox0_rsp_int_status && 954 - !dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub); 955 - 956 - if (scratch_inbox1.rb.rptr > dmub->inbox1.rb.capacity) 957 - return DMUB_STATUS_HW_FAILURE; 958 - 959 - /* check current HW state first, but use command submission vs reported as a fallback */ 960 - if ((dmub_rb_empty(&scratch_inbox1.rb) || 961 - inbox1->num_reported >= scratch_inbox1.num_submitted) && 962 - (!scratch_reg_inbox0.is_pending || 963 - reg_inbox0->num_reported >= scratch_reg_inbox0.num_submitted)) 964 - return DMUB_STATUS_OK; 965 - 966 - udelay(polling_interval_us); 967 - } 968 - 969 - return DMUB_STATUS_TIMEOUT; 970 - } 971 - 972 922 enum dmub_status dmub_srv_wait_for_idle(struct dmub_srv *dmub, 973 923 uint32_t timeout_us) 974 924 { 975 925 uint32_t i, rptr; 976 - const uint32_t polling_interval_us = 1; 977 926 978 927 if (!dmub->hw_init) 979 928 return DMUB_STATUS_INVALID; 980 929 981 - for (i = 0; i < timeout_us; i += polling_interval_us) { 982 - /* update inbox1 state */ 930 + for (i = 0; i <= timeout_us; ++i) { 983 931 rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); 984 932 985 - if (rptr > dmub->inbox1.rb.capacity) 933 + if (rptr > dmub->inbox1_rb.capacity) 986 934 return DMUB_STATUS_HW_FAILURE; 987 935 988 - if (dmub->inbox1.rb.rptr > rptr) { 989 - /* rb wrapped */ 990 - dmub->inbox1.num_reported += (rptr + dmub->inbox1.rb.capacity - dmub->inbox1.rb.rptr) / DMUB_RB_CMD_SIZE; 991 - } else { 992 - dmub->inbox1.num_reported += (rptr - dmub->inbox1.rb.rptr) / DMUB_RB_CMD_SIZE; 993 - } 994 - dmub->inbox1.rb.rptr = rptr; 936 + dmub->inbox1_rb.rptr = rptr; 995 937 996 - /* update reg_inbox0 */ 997 - dmub_srv_update_reg_inbox0_status(dmub); 998 - 999 - /* check for idle */ 1000 - if (dmub_rb_empty(&dmub->inbox1.rb) && !dmub->reg_inbox0.is_pending) 938 + if (dmub_rb_empty(&dmub->inbox1_rb)) 1001 939 return DMUB_STATUS_OK; 1002 940 1003 - udelay(polling_interval_us); 941 + udelay(1); 1004 942 } 1005 943 1006 944 return DMUB_STATUS_TIMEOUT; ··· 1040 1120 return DMUB_STATUS_OK; 1041 1121 } 1042 1122 1123 + enum dmub_status dmub_srv_cmd_with_reply_data(struct dmub_srv *dmub, 1124 + union dmub_rb_cmd *cmd) 1125 + { 1126 + enum dmub_status status = DMUB_STATUS_OK; 1127 + 1128 + // Queue command 1129 + status = dmub_srv_cmd_queue(dmub, cmd); 1130 + 1131 + if (status != DMUB_STATUS_OK) 1132 + return status; 1133 + 1134 + // Execute command 1135 + status = dmub_srv_cmd_execute(dmub); 1136 + 1137 + if (status != DMUB_STATUS_OK) 1138 + return status; 1139 + 1140 + // Wait for DMUB to process command 1141 + status = dmub_srv_wait_for_idle(dmub, 100000); 1142 + 1143 + if (status != DMUB_STATUS_OK) 1144 + return status; 1145 + 1146 + // Copy data back from ring buffer into command 1147 + dmub_rb_get_return_data(&dmub->inbox1_rb, cmd); 1148 + 1149 + return status; 1150 + } 1151 + 1043 1152 static inline bool dmub_rb_out_trace_buffer_front(struct dmub_rb *rb, 1044 1153 void *entry) 1045 1154 { ··· 1160 1211 } 1161 1212 } 1162 1213 1214 + 1215 + enum dmub_status dmub_srv_send_reg_inbox0_cmd( 1216 + struct dmub_srv *dmub, 1217 + union dmub_rb_cmd *cmd, 1218 + bool with_reply, uint32_t timeout_us) 1219 + { 1220 + uint32_t rsp_ready = 0; 1221 + uint32_t i; 1222 + 1223 + dmub->hw_funcs.send_reg_inbox0_cmd_msg(dmub, cmd); 1224 + 1225 + for (i = 0; i < timeout_us; i++) { 1226 + rsp_ready = dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub); 1227 + if (rsp_ready) 1228 + break; 1229 + udelay(1); 1230 + } 1231 + if (rsp_ready == 0) 1232 + return DMUB_STATUS_TIMEOUT; 1233 + 1234 + if (with_reply) 1235 + dmub->hw_funcs.read_reg_inbox0_cmd_rsp(dmub, cmd); 1236 + 1237 + dmub->hw_funcs.write_reg_inbox0_rsp_int_ack(dmub); 1238 + 1239 + /* wait for rsp int status is cleared to initial state before exit */ 1240 + for (; i <= timeout_us; i++) { 1241 + rsp_ready = dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub); 1242 + if (rsp_ready == 0) 1243 + break; 1244 + udelay(1); 1245 + } 1246 + ASSERT(rsp_ready == 0); 1247 + 1248 + return DMUB_STATUS_OK; 1249 + } 1250 + 1163 1251 void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state) 1164 1252 { 1165 1253 if (!dmub || !dmub->hw_init) 1166 1254 return; 1167 1255 1168 1256 dmub->power_state = dmub_srv_power_state; 1169 - } 1170 - 1171 - enum dmub_status dmub_srv_reg_cmd_execute(struct dmub_srv *dmub, union dmub_rb_cmd *cmd) 1172 - { 1173 - uint32_t num_pending = 0; 1174 - 1175 - if (!dmub->hw_init) 1176 - return DMUB_STATUS_INVALID; 1177 - 1178 - if (dmub->power_state != DMUB_POWER_STATE_D0) 1179 - return DMUB_STATUS_POWER_STATE_D3; 1180 - 1181 - if (!dmub->hw_funcs.send_reg_inbox0_cmd_msg || 1182 - !dmub->hw_funcs.clear_reg_inbox0_rsp_int_ack) 1183 - return DMUB_STATUS_INVALID; 1184 - 1185 - if (dmub->reg_inbox0.num_submitted >= dmub->reg_inbox0.num_reported) 1186 - num_pending = dmub->reg_inbox0.num_submitted - dmub->reg_inbox0.num_reported; 1187 - else 1188 - /* num_submitted wrapped */ 1189 - num_pending = DMUB_REG_INBOX0_RB_MAX_ENTRY - 1190 - (dmub->reg_inbox0.num_reported - dmub->reg_inbox0.num_submitted); 1191 - 1192 - if (num_pending >= DMUB_REG_INBOX0_RB_MAX_ENTRY) 1193 - return DMUB_STATUS_QUEUE_FULL; 1194 - 1195 - /* clear last rsp ack and send message */ 1196 - dmub->hw_funcs.clear_reg_inbox0_rsp_int_ack(dmub); 1197 - dmub->hw_funcs.send_reg_inbox0_cmd_msg(dmub, cmd); 1198 - 1199 - dmub->reg_inbox0.num_submitted++; 1200 - dmub->reg_inbox0.is_pending = true; 1201 - dmub->reg_inbox0.is_multi_pending = cmd->cmd_common.header.multi_cmd_pending; 1202 - 1203 - return DMUB_STATUS_OK; 1204 - } 1205 - 1206 - void dmub_srv_cmd_get_response(struct dmub_srv *dmub, 1207 - union dmub_rb_cmd *cmd_rsp) 1208 - { 1209 - if (dmub) { 1210 - if (dmub->inbox_type == DMUB_CMD_INTERFACE_REG && 1211 - dmub->hw_funcs.read_reg_inbox0_cmd_rsp) { 1212 - dmub->hw_funcs.read_reg_inbox0_cmd_rsp(dmub, cmd_rsp); 1213 - } else { 1214 - dmub_rb_get_return_data(&dmub->inbox1.rb, cmd_rsp); 1215 - } 1216 - } 1217 - } 1218 - 1219 - static enum dmub_status dmub_srv_sync_reg_inbox0(struct dmub_srv *dmub) 1220 - { 1221 - if (!dmub || !dmub->sw_init) 1222 - return DMUB_STATUS_INVALID; 1223 - 1224 - dmub->reg_inbox0.is_pending = 0; 1225 - dmub->reg_inbox0.is_multi_pending = 0; 1226 - 1227 - return DMUB_STATUS_OK; 1228 - } 1229 - 1230 - static enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub) 1231 - { 1232 - if (!dmub->sw_init) 1233 - return DMUB_STATUS_INVALID; 1234 - 1235 - if (dmub->hw_funcs.get_inbox1_rptr && dmub->hw_funcs.get_inbox1_wptr) { 1236 - uint32_t rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); 1237 - uint32_t wptr = dmub->hw_funcs.get_inbox1_wptr(dmub); 1238 - 1239 - if (rptr > dmub->inbox1.rb.capacity || wptr > dmub->inbox1.rb.capacity) { 1240 - return DMUB_STATUS_HW_FAILURE; 1241 - } else { 1242 - dmub->inbox1.rb.rptr = rptr; 1243 - dmub->inbox1.rb.wrpt = wptr; 1244 - dmub->inbox1_last_wptr = dmub->inbox1.rb.wrpt; 1245 - } 1246 - } 1247 - 1248 - return DMUB_STATUS_OK; 1249 - } 1250 - 1251 - enum dmub_status dmub_srv_sync_inboxes(struct dmub_srv *dmub) 1252 - { 1253 - enum dmub_status status; 1254 - 1255 - status = dmub_srv_sync_reg_inbox0(dmub); 1256 - if (status != DMUB_STATUS_OK) 1257 - return status; 1258 - 1259 - status = dmub_srv_sync_inbox1(dmub); 1260 - if (status != DMUB_STATUS_OK) 1261 - return status; 1262 - 1263 - return DMUB_STATUS_OK; 1264 1257 }

+12

drivers/gpu/drm/amd/include/amd_shared.h

··· 358 358 * @DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE: If set, disable support for custom brightness curves 359 359 */ 360 360 DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE = 0x40000, 361 + 362 + /** 363 + * @DC_HDCP_LC_FORCE_FW_ENABLE: If set, use HDCP Locality Check FW 364 + * path regardless of reported HW capabilities. 365 + */ 366 + DC_HDCP_LC_FORCE_FW_ENABLE = 0x80000, 367 + 368 + /** 369 + * @DC_HDCP_LC_ENABLE_SW_FALLBACK If set, upon HDCP Locality Check FW 370 + * path failure, retry using legacy SW path. 371 + */ 372 + DC_HDCP_LC_ENABLE_SW_FALLBACK = 0x100000, 361 373 }; 362 374 363 375 enum amd_dpm_forced_level;

+2 -1

drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h

··· 295 295 }; 296 296 297 297 /* Used to mask smu debug modes */ 298 - #define SMU_DEBUG_HALT_ON_ERROR 0x1 298 + #define SMU_DEBUG_HALT_ON_ERROR BIT(0) 299 + #define SMU_DEBUG_POOL_USE_VRAM BIT(1) 299 300 300 301 #define MAX_SMU_I2C_BUSES 2 301 302

+4 -1

drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c

··· 1027 1027 1028 1028 memory_pool->size = pool_size; 1029 1029 memory_pool->align = PAGE_SIZE; 1030 - memory_pool->domain = AMDGPU_GEM_DOMAIN_GTT; 1030 + memory_pool->domain = 1031 + (adev->pm.smu_debug_mask & SMU_DEBUG_POOL_USE_VRAM) ? 1032 + AMDGPU_GEM_DOMAIN_VRAM : 1033 + AMDGPU_GEM_DOMAIN_GTT; 1031 1034 1032 1035 switch (pool_size) { 1033 1036 case SMU_MEMORY_POOL_SIZE_256_MB:

+2 -2

drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c

··· 478 478 } 479 479 } 480 480 481 - gpu_metrics->xgmi_link_width = SMUQ10_ROUND(metrics->XgmiWidth); 482 - gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(metrics->XgmiBitrate); 481 + gpu_metrics->xgmi_link_width = metrics->XgmiWidth; 482 + gpu_metrics->xgmi_link_speed = metrics->XgmiBitrate; 483 483 484 484 gpu_metrics->firmware_timestamp = metrics->Timestamp; 485 485

+7 -3

drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c

··· 231 231 SMU_13_0_6_FEA_MAP(SMU_FEATURE_FW_CTF_BIT, FEATURE_FW_CTF), 232 232 SMU_13_0_6_FEA_MAP(SMU_FEATURE_THERMAL_BIT, FEATURE_THERMAL), 233 233 SMU_13_0_6_FEA_MAP(SMU_FEATURE_XGMI_PER_LINK_PWR_DWN_BIT, FEATURE_XGMI_PER_LINK_PWR_DOWN), 234 - SMU_13_0_6_FEA_MAP(SMU_FEATURE_DF_CSTATE_BIT, FEATURE_DF_CSTATE), 234 + SMU_13_0_6_FEA_MAP(SMU_FEATURE_DF_CSTATE_BIT, FEATURE_DF_CSTATE), 235 + SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_VCN_BIT, FEATURE_DS_VCN), 236 + SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_MP1CLK_BIT, FEATURE_DS_MP1CLK), 237 + SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_MPIOCLK_BIT, FEATURE_DS_MPIOCLK), 238 + SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_MP0CLK_BIT, FEATURE_DS_MP0CLK), 235 239 }; 236 240 237 241 #define TABLE_PMSTATUSLOG 0 ··· 2686 2682 } 2687 2683 } 2688 2684 2689 - gpu_metrics->xgmi_link_width = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWidth, version)); 2690 - gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiBitrate, version)); 2685 + gpu_metrics->xgmi_link_width = GET_METRIC_FIELD(XgmiWidth, version); 2686 + gpu_metrics->xgmi_link_speed = GET_METRIC_FIELD(XgmiBitrate, version); 2691 2687 2692 2688 gpu_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp, version); 2693 2689

+19 -42

drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c

··· 1203 1203 PP_OD_FEATURE_GFXCLK_BIT)) 1204 1204 break; 1205 1205 1206 - PPTable_t *pptable = smu->smu_table.driver_pptable; 1207 - const OverDriveLimits_t * const overdrive_upperlimits = 1208 - &pptable->SkuTable.OverDriveLimitsBasicMax; 1209 - const OverDriveLimits_t * const overdrive_lowerlimits = 1210 - &pptable->SkuTable.OverDriveLimitsBasicMin; 1211 - 1212 1206 size += sysfs_emit_at(buf, size, "OD_SCLK_OFFSET:\n"); 1213 - size += sysfs_emit_at(buf, size, "0: %dMhz\n1: %uMhz\n", 1214 - overdrive_lowerlimits->GfxclkFoffset, 1215 - overdrive_upperlimits->GfxclkFoffset); 1207 + size += sysfs_emit_at(buf, size, "%dMhz\n", 1208 + od_table->OverDriveTable.GfxclkFoffset); 1216 1209 break; 1217 1210 1218 1211 case SMU_OD_MCLK: ··· 1340 1347 1341 1348 if (smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT)) { 1342 1349 smu_v14_0_2_get_od_setting_limits(smu, 1343 - PP_OD_FEATURE_GFXCLK_FMIN, 1344 - &min_value, 1345 - NULL); 1346 - smu_v14_0_2_get_od_setting_limits(smu, 1347 1350 PP_OD_FEATURE_GFXCLK_FMAX, 1348 - NULL, 1351 + &min_value, 1349 1352 &max_value); 1350 1353 size += sysfs_emit_at(buf, size, "SCLK_OFFSET: %7dMhz %10uMhz\n", 1351 1354 min_value, max_value); ··· 2449 2460 return -ENOTSUPP; 2450 2461 } 2451 2462 2452 - for (i = 0; i < size; i += 2) { 2453 - if (i + 2 > size) { 2454 - dev_info(adev->dev, "invalid number of input parameters %d\n", size); 2455 - return -EINVAL; 2456 - } 2457 - 2458 - switch (input[i]) { 2459 - case 1: 2460 - smu_v14_0_2_get_od_setting_limits(smu, 2461 - PP_OD_FEATURE_GFXCLK_FMAX, 2462 - &minimum, 2463 - &maximum); 2464 - if (input[i + 1] < minimum || 2465 - input[i + 1] > maximum) { 2466 - dev_info(adev->dev, "GfxclkFmax (%ld) must be within [%u, %u]!\n", 2467 - input[i + 1], minimum, maximum); 2468 - return -EINVAL; 2469 - } 2470 - 2471 - od_table->OverDriveTable.GfxclkFoffset = input[i + 1]; 2472 - od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_GFXCLK_BIT; 2473 - break; 2474 - 2475 - default: 2476 - dev_info(adev->dev, "Invalid SCLK_VDDC_TABLE index: %ld\n", input[i]); 2477 - dev_info(adev->dev, "Supported indices: [0:min,1:max]\n"); 2478 - return -EINVAL; 2479 - } 2463 + if (size != 1) { 2464 + dev_info(adev->dev, "invalid number of input parameters %d\n", size); 2465 + return -EINVAL; 2480 2466 } 2481 2467 2468 + smu_v14_0_2_get_od_setting_limits(smu, 2469 + PP_OD_FEATURE_GFXCLK_FMAX, 2470 + &minimum, 2471 + &maximum); 2472 + if (input[0] < minimum || 2473 + input[0] > maximum) { 2474 + dev_info(adev->dev, "GfxclkFoffset must be within [%d, %u]!\n", 2475 + minimum, maximum); 2476 + return -EINVAL; 2477 + } 2478 + 2479 + od_table->OverDriveTable.GfxclkFoffset = input[0]; 2480 + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_GFXCLK_BIT; 2482 2481 break; 2483 2482 2484 2483 case PP_OD_EDIT_MCLK_VDDC_TABLE:

+1 -1

drivers/gpu/drm/radeon/radeon_uvd.c

··· 961 961 unsigned optimal_score = ~0; 962 962 963 963 /* loop through vco from low to high */ 964 - vco_min = max(max(vco_min, vclk), dclk); 964 + vco_min = max3(vco_min, vclk, dclk); 965 965 for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) { 966 966 967 967 uint64_t fb_div = (uint64_t)vco_freq * fb_factor;

Configure Feed

Configure Feed