Merge tag 'drm-next-2024-11-29' of https://gitlab.freedesktop.org/drm/kernel

+4

drivers/gpu/drm/amd/amdgpu/aldebaran.c

··· 330 330 } 331 331 332 332 list_for_each_entry(tmp_adev, reset_device_list, reset_list) { 333 + amdgpu_set_init_level(tmp_adev, 334 + AMDGPU_INIT_LEVEL_RESET_RECOVERY); 333 335 dev_info(tmp_adev->dev, 334 336 "GPU reset succeeded, trying to resume\n"); 335 337 r = aldebaran_mode2_restore_ip(tmp_adev); ··· 377 375 tmp_adev); 378 376 379 377 if (!r) { 378 + amdgpu_set_init_level(tmp_adev, 379 + AMDGPU_INIT_LEVEL_DEFAULT); 380 380 amdgpu_irq_gpu_reset_resume_helper(tmp_adev); 381 381 382 382 r = amdgpu_ib_ring_tests(tmp_adev);

+1

drivers/gpu/drm/amd/amdgpu/amdgpu.h

··· 839 839 enum amdgpu_init_lvl_id { 840 840 AMDGPU_INIT_LEVEL_DEFAULT, 841 841 AMDGPU_INIT_LEVEL_MINIMAL_XGMI, 842 + AMDGPU_INIT_LEVEL_RESET_RECOVERY, 842 843 }; 843 844 844 845 struct amdgpu_init_level {

+24 -5

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

··· 156 156 .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL, 157 157 }; 158 158 159 + struct amdgpu_init_level amdgpu_init_recovery = { 160 + .level = AMDGPU_INIT_LEVEL_RESET_RECOVERY, 161 + .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL, 162 + }; 163 + 159 164 /* 160 165 * Minimal blocks needed to be initialized before a XGMI hive can be reset. This 161 166 * is used for cases like reset on initialization where the entire hive needs to ··· 186 181 switch (lvl) { 187 182 case AMDGPU_INIT_LEVEL_MINIMAL_XGMI: 188 183 adev->init_lvl = &amdgpu_init_minimal_xgmi; 184 + break; 185 + case AMDGPU_INIT_LEVEL_RESET_RECOVERY: 186 + adev->init_lvl = &amdgpu_init_recovery; 189 187 break; 190 188 case AMDGPU_INIT_LEVEL_DEFAULT: 191 189 fallthrough; ··· 3258 3250 return r; 3259 3251 } 3260 3252 3261 - if (!amdgpu_in_reset(adev)) 3253 + if (!amdgpu_reset_in_recovery(adev)) 3262 3254 amdgpu_ras_set_error_query_ready(adev, true); 3263 3255 3264 3256 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); ··· 4677 4669 int idx; 4678 4670 bool px; 4679 4671 4680 - amdgpu_fence_driver_sw_fini(adev); 4681 4672 amdgpu_device_ip_fini(adev); 4673 + amdgpu_fence_driver_sw_fini(adev); 4682 4674 amdgpu_ucode_release(&adev->firmware.gpu_info_fw); 4683 4675 adev->accel_working = false; 4684 4676 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true)); ··· 5427 5419 struct list_head *device_list_handle; 5428 5420 bool full_reset, vram_lost = false; 5429 5421 struct amdgpu_device *tmp_adev; 5430 - int r; 5422 + int r, init_level; 5431 5423 5432 5424 device_list_handle = reset_context->reset_device_list; 5433 5425 ··· 5436 5428 5437 5429 full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 5438 5430 5431 + /** 5432 + * If it's reset on init, it's default init level, otherwise keep level 5433 + * as recovery level. 5434 + */ 5435 + if (reset_context->method == AMD_RESET_METHOD_ON_INIT) 5436 + init_level = AMDGPU_INIT_LEVEL_DEFAULT; 5437 + else 5438 + init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY; 5439 + 5439 5440 r = 0; 5440 5441 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5441 - /* After reset, it's default init level */ 5442 - amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT); 5442 + amdgpu_set_init_level(tmp_adev, init_level); 5443 5443 if (full_reset) { 5444 5444 /* post card */ 5445 5445 amdgpu_ras_set_fed(tmp_adev, false); ··· 5534 5518 5535 5519 out: 5536 5520 if (!r) { 5521 + /* IP init is complete now, set level as default */ 5522 + amdgpu_set_init_level(tmp_adev, 5523 + AMDGPU_INIT_LEVEL_DEFAULT); 5537 5524 amdgpu_irq_gpu_reset_resume_helper(tmp_adev); 5538 5525 r = amdgpu_ib_ring_tests(tmp_adev); 5539 5526 if (r) {

+5 -3

drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

··· 1778 1778 1779 1779 void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) 1780 1780 { 1781 - amdgpu_gfx_sysfs_xcp_fini(adev); 1782 - amdgpu_gfx_sysfs_isolation_shader_fini(adev); 1783 - amdgpu_gfx_sysfs_reset_mask_fini(adev); 1781 + if (adev->dev->kobj.sd) { 1782 + amdgpu_gfx_sysfs_xcp_fini(adev); 1783 + amdgpu_gfx_sysfs_isolation_shader_fini(adev); 1784 + amdgpu_gfx_sysfs_reset_mask_fini(adev); 1785 + } 1784 1786 } 1785 1787 1786 1788 int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,

+4 -2

drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c

··· 447 447 448 448 void amdgpu_jpeg_sysfs_reset_mask_fini(struct amdgpu_device *adev) 449 449 { 450 - if (adev->jpeg.num_jpeg_inst) 451 - device_remove_file(adev->dev, &dev_attr_jpeg_reset_mask); 450 + if (adev->dev->kobj.sd) { 451 + if (adev->jpeg.num_jpeg_inst) 452 + device_remove_file(adev->dev, &dev_attr_jpeg_reset_mask); 453 + } 452 454 }

+2 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c

··· 137 137 if (ret) 138 138 return; 139 139 140 - device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used); 140 + if (adev->dev->kobj.sd) 141 + device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used); 141 142 142 143 ttm_resource_manager_cleanup(man); 143 144 ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, NULL);

+5 -5

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

··· 1298 1298 struct ras_manager *obj; 1299 1299 1300 1300 /* in resume phase, no need to create aca fs node */ 1301 - if (adev->in_suspend || amdgpu_in_reset(adev)) 1301 + if (adev->in_suspend || amdgpu_reset_in_recovery(adev)) 1302 1302 return 0; 1303 1303 1304 1304 obj = get_ras_manager(adev, blk); ··· 3610 3610 ras->event_mgr = hive ? &hive->event_mgr : &ras->__event_mgr; 3611 3611 3612 3612 /* init event manager with node 0 on xgmi system */ 3613 - if (!amdgpu_in_reset(adev)) { 3613 + if (!amdgpu_reset_in_recovery(adev)) { 3614 3614 if (!hive || adev->gmc.xgmi.node_id == 0) 3615 3615 ras_event_mgr_init(ras->event_mgr); 3616 3616 } ··· 3825 3825 3826 3826 r = amdgpu_ras_feature_enable_on_boot(adev, ras_block, 1); 3827 3827 if (r) { 3828 - if (adev->in_suspend || amdgpu_in_reset(adev)) { 3828 + if (adev->in_suspend || amdgpu_reset_in_recovery(adev)) { 3829 3829 /* in resume phase, if fail to enable ras, 3830 3830 * clean up all ras fs nodes, and disable ras */ 3831 3831 goto cleanup; ··· 3837 3837 amdgpu_persistent_edc_harvesting(adev, ras_block); 3838 3838 3839 3839 /* in resume phase, no need to create ras fs node */ 3840 - if (adev->in_suspend || amdgpu_in_reset(adev)) 3840 + if (adev->in_suspend || amdgpu_reset_in_recovery(adev)) 3841 3841 return 0; 3842 3842 3843 3843 ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm); ··· 3967 3967 amdgpu_ras_event_mgr_init(adev); 3968 3968 3969 3969 if (amdgpu_ras_aca_is_supported(adev)) { 3970 - if (amdgpu_in_reset(adev)) { 3970 + if (amdgpu_reset_in_recovery(adev)) { 3971 3971 if (amdgpu_aca_is_enabled(adev)) 3972 3972 r = amdgpu_aca_reset(adev); 3973 3973 else

+5

drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c

··· 342 342 strscpy(buf, "unknown", len); 343 343 } 344 344 } 345 + 346 + bool amdgpu_reset_in_recovery(struct amdgpu_device *adev) 347 + { 348 + return (adev->init_lvl->level == AMDGPU_INIT_LEVEL_RESET_RECOVERY); 349 + }

+2

drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h

··· 158 158 int amdgpu_reset_do_xgmi_reset_on_init( 159 159 struct amdgpu_reset_context *reset_context); 160 160 161 + bool amdgpu_reset_in_recovery(struct amdgpu_device *adev); 162 + 161 163 #endif

+4 -2

drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c

··· 451 451 if (!amdgpu_gpu_recovery) 452 452 return; 453 453 454 - if (adev->sdma.num_instances) 455 - device_remove_file(adev->dev, &dev_attr_sdma_reset_mask); 454 + if (adev->dev->kobj.sd) { 455 + if (adev->sdma.num_instances) 456 + device_remove_file(adev->dev, &dev_attr_sdma_reset_mask); 457 + } 456 458 }

+3 -3

drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c

··· 214 214 215 215 drm_sched_entity_destroy(&adev->vce.entity); 216 216 217 - amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, 218 - (void **)&adev->vce.cpu_addr); 219 - 220 217 for (i = 0; i < adev->vce.num_rings; i++) 221 218 amdgpu_ring_fini(&adev->vce.ring[i]); 222 219 223 220 amdgpu_ucode_release(&adev->vce.fw); 224 221 mutex_destroy(&adev->vce.idle_mutex); 222 + 223 + amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, 224 + (void **)&adev->vce.cpu_addr); 225 225 226 226 return 0; 227 227 }

+37

drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c

··· 1283 1283 1284 1284 return psp_execute_ip_fw_load(&adev->psp, &ucode); 1285 1285 } 1286 + 1287 + static ssize_t amdgpu_get_vcn_reset_mask(struct device *dev, 1288 + struct device_attribute *attr, 1289 + char *buf) 1290 + { 1291 + struct drm_device *ddev = dev_get_drvdata(dev); 1292 + struct amdgpu_device *adev = drm_to_adev(ddev); 1293 + 1294 + if (!adev) 1295 + return -ENODEV; 1296 + 1297 + return amdgpu_show_reset_mask(buf, adev->vcn.supported_reset); 1298 + } 1299 + 1300 + static DEVICE_ATTR(vcn_reset_mask, 0444, 1301 + amdgpu_get_vcn_reset_mask, NULL); 1302 + 1303 + int amdgpu_vcn_sysfs_reset_mask_init(struct amdgpu_device *adev) 1304 + { 1305 + int r = 0; 1306 + 1307 + if (adev->vcn.num_vcn_inst) { 1308 + r = device_create_file(adev->dev, &dev_attr_vcn_reset_mask); 1309 + if (r) 1310 + return r; 1311 + } 1312 + 1313 + return r; 1314 + } 1315 + 1316 + void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev) 1317 + { 1318 + if (adev->dev->kobj.sd) { 1319 + if (adev->vcn.num_vcn_inst) 1320 + device_remove_file(adev->dev, &dev_attr_vcn_reset_mask); 1321 + } 1322 + }

+4

drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h

··· 333 333 334 334 /* IP reg dump */ 335 335 uint32_t *ip_dump; 336 + 337 + uint32_t supported_reset; 336 338 }; 337 339 338 340 struct amdgpu_fw_shared_rb_ptrs_struct { ··· 521 519 int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx, 522 520 enum AMDGPU_UCODE_ID ucode_id); 523 521 int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev); 522 + int amdgpu_vcn_sysfs_reset_mask_init(struct amdgpu_device *adev); 523 + void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev); 524 524 525 525 #endif

+4 -2

drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c

··· 904 904 905 905 void amdgpu_vpe_sysfs_reset_mask_fini(struct amdgpu_device *adev) 906 906 { 907 - if (adev->vpe.num_instances) 908 - device_remove_file(adev->dev, &dev_attr_vpe_reset_mask); 907 + if (adev->dev->kobj.sd) { 908 + if (adev->vpe.num_instances) 909 + device_remove_file(adev->dev, &dev_attr_vpe_reset_mask); 910 + } 909 911 } 910 912 911 913 static const struct amdgpu_ring_funcs vpe_ring_funcs = {

+41

drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c

··· 40 40 #define smnPCS_GOPX1_PCS_ERROR_STATUS 0x12200210 41 41 #define smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK 0x12200218 42 42 43 + #define XGMI_STATE_DISABLE 0xD1 44 + #define XGMI_STATE_LS0 0x81 45 + #define XGMI_LINK_ACTIVE 1 46 + #define XGMI_LINK_INACTIVE 0 47 + 43 48 static DEFINE_MUTEX(xgmi_mutex); 44 49 45 50 #define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE 4 ··· 293 288 {"XGMI3X16 PCS RxCMDPktErr", 294 289 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxCMDPktErr)}, 295 290 }; 291 + 292 + static u32 xgmi_v6_4_get_link_status(struct amdgpu_device *adev, int global_link_num) 293 + { 294 + const u32 smnpcs_xgmi3x16_pcs_state_hist1 = 0x11a00070; 295 + const int xgmi_inst = 2; 296 + u32 link_inst; 297 + u64 addr; 298 + 299 + link_inst = global_link_num % xgmi_inst; 300 + 301 + addr = (smnpcs_xgmi3x16_pcs_state_hist1 | (link_inst << 20)) + 302 + adev->asic_funcs->encode_ext_smn_addressing(global_link_num / xgmi_inst); 303 + 304 + return RREG32_PCIE_EXT(addr); 305 + } 306 + 307 + int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev, int global_link_num) 308 + { 309 + u32 xgmi_state_reg_val; 310 + 311 + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { 312 + case IP_VERSION(6, 4, 0): 313 + xgmi_state_reg_val = xgmi_v6_4_get_link_status(adev, global_link_num); 314 + break; 315 + default: 316 + return -EOPNOTSUPP; 317 + } 318 + 319 + if ((xgmi_state_reg_val & 0xFF) == XGMI_STATE_DISABLE) 320 + return -ENOLINK; 321 + 322 + if ((xgmi_state_reg_val & 0xFF) == XGMI_STATE_LS0) 323 + return XGMI_LINK_ACTIVE; 324 + 325 + return XGMI_LINK_INACTIVE; 326 + } 296 327 297 328 /** 298 329 * DOC: AMDGPU XGMI Support

+2

drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h

··· 84 84 int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev, 85 85 struct amdgpu_hive_info *hive, 86 86 int req_nps_mode); 87 + int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev, 88 + int global_link_num); 87 89 88 90 #endif

+2 -2

drivers/gpu/drm/amd/amdgpu/df_v3_6.c

··· 254 254 255 255 static void df_v3_6_sw_fini(struct amdgpu_device *adev) 256 256 { 257 - 258 - device_remove_file(adev->dev, &dev_attr_df_cntr_avail); 257 + if (adev->dev->kobj.sd) 258 + device_remove_file(adev->dev, &dev_attr_df_cntr_avail); 259 259 260 260 } 261 261

+14 -4

drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c

··· 87 87 88 88 static void gmc_v7_0_mc_stop(struct amdgpu_device *adev) 89 89 { 90 + struct amdgpu_ip_block *ip_block; 90 91 u32 blackout; 91 92 92 - gmc_v7_0_wait_for_idle((void *)adev); 93 + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC); 94 + if (!ip_block) 95 + return; 96 + 97 + gmc_v7_0_wait_for_idle(ip_block); 93 98 94 99 blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL); 95 100 if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) { ··· 256 251 */ 257 252 static void gmc_v7_0_mc_program(struct amdgpu_device *adev) 258 253 { 254 + struct amdgpu_ip_block *ip_block; 259 255 u32 tmp; 260 256 int i, j; 257 + 258 + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC); 259 + if (!ip_block) 260 + return; 261 261 262 262 /* Initialize HDP */ 263 263 for (i = 0, j = 0; i < 32; i++, j += 0x6) { ··· 274 264 } 275 265 WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0); 276 266 277 - if (gmc_v7_0_wait_for_idle((void *)adev)) 267 + if (gmc_v7_0_wait_for_idle(ip_block)) 278 268 dev_warn(adev->dev, "Wait for MC idle timedout !\n"); 279 269 280 270 if (adev->mode_info.num_crtc) { ··· 298 288 WREG32(mmMC_VM_AGP_BASE, 0); 299 289 WREG32(mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 22); 300 290 WREG32(mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 22); 301 - if (gmc_v7_0_wait_for_idle((void *)adev)) 291 + if (gmc_v7_0_wait_for_idle(ip_block)) 302 292 dev_warn(adev->dev, "Wait for MC idle timedout !\n"); 303 293 304 294 WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); ··· 1193 1183 1194 1184 if (srbm_soft_reset) { 1195 1185 gmc_v7_0_mc_stop(adev); 1196 - if (gmc_v7_0_wait_for_idle((void *)adev)) 1186 + if (gmc_v7_0_wait_for_idle(ip_block)) 1197 1187 dev_warn(adev->dev, "Wait for GMC idle timed out !\n"); 1198 1188 1199 1189 tmp = RREG32(mmSRBM_SOFT_RESET);

+1 -1

drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c

··· 604 604 static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring) 605 605 { 606 606 struct amdgpu_device *adev = ring->adev; 607 - bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); 607 + bool set_clocks = !cancel_delayed_work_sync(&adev->jpeg.idle_work); 608 608 int cnt = 0; 609 609 610 610 mutex_lock(&adev->vcn.vcn1_jpeg1_workaround);

+1 -1

drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c

··· 150 150 { 151 151 struct amdgpu_device *adev = ip_block->adev; 152 152 153 - cancel_delayed_work_sync(&adev->vcn.idle_work); 153 + cancel_delayed_work_sync(&adev->jpeg.idle_work); 154 154 155 155 if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && 156 156 RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))

+1 -1

drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c

··· 211 211 struct amdgpu_device *adev = ip_block->adev; 212 212 int i; 213 213 214 - cancel_delayed_work_sync(&adev->vcn.idle_work); 214 + cancel_delayed_work_sync(&adev->jpeg.idle_work); 215 215 216 216 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 217 217 if (adev->jpeg.harvest_config & (1 << i))

+1 -1

drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c

··· 164 164 { 165 165 struct amdgpu_device *adev = ip_block->adev; 166 166 167 - cancel_delayed_work_sync(&adev->vcn.idle_work); 167 + cancel_delayed_work_sync(&adev->jpeg.idle_work); 168 168 169 169 if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && 170 170 RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))

+1 -1

drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c

··· 202 202 { 203 203 struct amdgpu_device *adev = ip_block->adev; 204 204 205 - cancel_delayed_work_sync(&adev->vcn.idle_work); 205 + cancel_delayed_work_sync(&adev->jpeg.idle_work); 206 206 if (!amdgpu_sriov_vf(adev)) { 207 207 if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && 208 208 RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))

+1 -1

drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c

··· 227 227 struct amdgpu_device *adev = ip_block->adev; 228 228 int i; 229 229 230 - cancel_delayed_work_sync(&adev->vcn.idle_work); 230 + cancel_delayed_work_sync(&adev->jpeg.idle_work); 231 231 232 232 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 233 233 if (adev->jpeg.harvest_config & (1 << i))

+1 -1

drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c

··· 168 168 { 169 169 struct amdgpu_device *adev = ip_block->adev; 170 170 171 - cancel_delayed_work_sync(&adev->vcn.idle_work); 171 + cancel_delayed_work_sync(&adev->jpeg.idle_work); 172 172 173 173 if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && 174 174 RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))

+9

drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c

··· 275 275 if (def != data) 276 276 WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data); 277 277 278 + switch (adev->ip_versions[NBIO_HWIP][0]) { 279 + case IP_VERSION(7, 11, 0): 280 + case IP_VERSION(7, 11, 1): 281 + case IP_VERSION(7, 11, 2): 282 + case IP_VERSION(7, 11, 3): 283 + data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4) & ~BIT(23); 284 + WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4, data); 285 + break; 286 + } 278 287 } 279 288 280 289 static void nbio_v7_11_update_medium_grain_clock_gating(struct amdgpu_device *adev,

+2

drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c

··· 220 220 int r; 221 221 struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle; 222 222 223 + amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_RESET_RECOVERY); 223 224 dev_info(tmp_adev->dev, 224 225 "GPU reset succeeded, trying to resume\n"); 225 226 r = sienna_cichlid_mode2_restore_ip(tmp_adev); ··· 238 237 239 238 amdgpu_irq_gpu_reset_resume_helper(tmp_adev); 240 239 240 + amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT); 241 241 r = amdgpu_ib_ring_tests(tmp_adev); 242 242 if (r) { 243 243 dev_err(tmp_adev->dev,

+2

drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c

··· 221 221 int r; 222 222 struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle; 223 223 224 + amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_RESET_RECOVERY); 224 225 dev_info(tmp_adev->dev, 225 226 "GPU reset succeeded, trying to resume\n"); 226 227 r = smu_v13_0_10_mode2_restore_ip(tmp_adev); ··· 235 234 236 235 amdgpu_irq_gpu_reset_resume_helper(tmp_adev); 237 236 237 + amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT); 238 238 r = amdgpu_ib_ring_tests(tmp_adev); 239 239 if (r) { 240 240 dev_err(tmp_adev->dev,

+9

drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c

··· 225 225 vcn_v4_0_fw_shared_init(adev, i); 226 226 } 227 227 228 + /* TODO: Add queue reset mask when FW fully supports it */ 229 + adev->vcn.supported_reset = 230 + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); 231 + 228 232 if (amdgpu_sriov_vf(adev)) { 229 233 r = amdgpu_virt_alloc_mm_table(adev); 230 234 if (r) ··· 250 246 } else { 251 247 adev->vcn.ip_dump = ptr; 252 248 } 249 + 250 + r = amdgpu_vcn_sysfs_reset_mask_init(adev); 251 + if (r) 252 + return r; 253 253 254 254 return 0; 255 255 } ··· 292 284 if (r) 293 285 return r; 294 286 287 + amdgpu_vcn_sysfs_reset_mask_fini(adev); 295 288 r = amdgpu_vcn_sw_fini(adev); 296 289 297 290 kfree(adev->vcn.ip_dump);

+31 -8

drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c

··· 123 123 return amdgpu_vcn_early_init(adev); 124 124 } 125 125 126 + static int vcn_v4_0_3_fw_shared_init(struct amdgpu_device *adev, int inst_idx) 127 + { 128 + struct amdgpu_vcn4_fw_shared *fw_shared; 129 + 130 + fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; 131 + fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); 132 + fw_shared->sq.is_enabled = 1; 133 + 134 + if (amdgpu_vcnfw_log) 135 + amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]); 136 + 137 + return 0; 138 + } 139 + 126 140 /** 127 141 * vcn_v4_0_3_sw_init - sw init for VCN block 128 142 * ··· 169 155 return r; 170 156 171 157 for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 172 - volatile struct amdgpu_vcn4_fw_shared *fw_shared; 173 - 174 158 vcn_inst = GET_INST(VCN, i); 175 159 176 160 ring = &adev->vcn.inst[i].ring_enc[0]; ··· 191 179 if (r) 192 180 return r; 193 181 194 - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; 195 - fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); 196 - fw_shared->sq.is_enabled = true; 197 - 198 - if (amdgpu_vcnfw_log) 199 - amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); 182 + vcn_v4_0_3_fw_shared_init(adev, i); 200 183 } 184 + 185 + /* TODO: Add queue reset mask when FW fully supports it */ 186 + adev->vcn.supported_reset = 187 + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); 201 188 202 189 if (amdgpu_sriov_vf(adev)) { 203 190 r = amdgpu_virt_alloc_mm_table(adev); ··· 223 212 } else { 224 213 adev->vcn.ip_dump = ptr; 225 214 } 215 + 216 + r = amdgpu_vcn_sysfs_reset_mask_init(adev); 217 + if (r) 218 + return r; 226 219 227 220 return 0; 228 221 } ··· 261 246 if (r) 262 247 return r; 263 248 249 + amdgpu_vcn_sysfs_reset_mask_fini(adev); 264 250 r = amdgpu_vcn_sw_fini(adev); 265 251 266 252 kfree(adev->vcn.ip_dump); ··· 296 280 } 297 281 } else { 298 282 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 283 + struct amdgpu_vcn4_fw_shared *fw_shared; 284 + 299 285 vcn_inst = GET_INST(VCN, i); 300 286 ring = &adev->vcn.inst[i].ring_enc[0]; 301 287 ··· 320 302 VCN, GET_INST(VCN, ring->me), 321 303 regVCN_RB1_DB_CTRL); 322 304 } 305 + 306 + /* Re-init fw_shared when RAS fatal error occurred */ 307 + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; 308 + if (!fw_shared->sq.is_enabled) 309 + vcn_v4_0_3_fw_shared_init(adev, i); 323 310 324 311 r = amdgpu_ring_test_helper(ring); 325 312 if (r)

+10

drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c

··· 170 170 amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); 171 171 } 172 172 173 + /* TODO: Add queue reset mask when FW fully supports it */ 174 + adev->vcn.supported_reset = 175 + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); 176 + 173 177 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) 174 178 adev->vcn.pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode; 175 179 ··· 185 181 } else { 186 182 adev->vcn.ip_dump = ptr; 187 183 } 184 + 185 + r = amdgpu_vcn_sysfs_reset_mask_init(adev); 186 + if (r) 187 + return r; 188 + 188 189 return 0; 189 190 } 190 191 ··· 224 215 if (r) 225 216 return r; 226 217 218 + amdgpu_vcn_sysfs_reset_mask_fini(adev); 227 219 r = amdgpu_vcn_sw_fini(adev); 228 220 229 221 kfree(adev->vcn.ip_dump);

+6 -1

drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c

··· 125 125 126 126 memset(kq->pq_kernel_addr, 0, queue_size); 127 127 memset(kq->rptr_kernel, 0, sizeof(*kq->rptr_kernel)); 128 - memset(kq->wptr_kernel, 0, sizeof(*kq->wptr_kernel)); 128 + memset(kq->wptr_kernel, 0, dev->kfd->device_info.doorbell_size); 129 129 130 130 prop.queue_size = queue_size; 131 131 prop.is_interop = false; ··· 306 306 if (amdgpu_amdkfd_is_fed(kq->dev->adev)) 307 307 return -EIO; 308 308 309 + /* Make sure ring buffer is updated before wptr updated */ 310 + mb(); 311 + 309 312 if (kq->dev->kfd->device_info.doorbell_size == 8) { 310 313 *kq->wptr64_kernel = kq->pending_wptr64; 314 + mb(); /* Make sure wptr updated before ring doorbell */ 311 315 write_kernel_doorbell64(kq->queue->properties.doorbell_ptr, 312 316 kq->pending_wptr64); 313 317 } else { 314 318 *kq->wptr_kernel = kq->pending_wptr; 319 + mb(); /* Make sure wptr updated before ring doorbell */ 315 320 write_kernel_doorbell(kq->queue->properties.doorbell_ptr, 316 321 kq->pending_wptr); 317 322 }

+8 -7

drivers/gpu/drm/amd/display/Kconfig

··· 7 7 config DRM_AMD_DC 8 8 bool "AMD DC - Enable new display engine" 9 9 default y 10 - depends on BROKEN || !CC_IS_CLANG || ARM64 || RISCV || SPARC64 || X86_64 10 + depends on BROKEN || !CC_IS_CLANG || ARM64 || LOONGARCH || RISCV || SPARC64 || X86_64 11 11 select SND_HDA_COMPONENT if SND_HDA_CORE 12 12 # !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752 13 - select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && !(CC_IS_CLANG && (ARM64 || RISCV)) 13 + select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && !(CC_IS_CLANG && (ARM64 || LOONGARCH || RISCV)) 14 14 help 15 15 Choose this option if you want to use the new display engine 16 16 support for AMDGPU. This adds required support for Vega and 17 17 Raven ASICs. 18 18 19 - calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64) 20 - architectures built with Clang (all released versions), whereby the stack 21 - frame gets blown up to well over 5k. This would cause an immediate kernel 22 - panic on most architectures. We'll revert this when the following bug report 23 - has been resolved: https://github.com/llvm/llvm-project/issues/41896. 19 + calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || 20 + ARM64 || LOONGARCH || RISCV) architectures built with Clang (all released 21 + versions), whereby the stack frame gets blown up to well over 5k. This 22 + would cause an immediate kernel panic on most architectures. We'll revert 23 + this when the following bug report has been resolved: 24 + https://github.com/llvm/llvm-project/issues/41896. 24 25 25 26 config DRM_AMD_DC_FP 26 27 def_bool n

+5 -2

drivers/gpu/drm/amd/display/dc/core/dc.c

··· 3141 3141 return; 3142 3142 3143 3143 for (i = 0; i < status->plane_count; i++) { 3144 + /* refcount will always be valid, restore everything else */ 3145 + struct kref refcount = status->plane_states[i]->refcount; 3144 3146 *status->plane_states[i] = scratch->plane_states[i]; 3147 + status->plane_states[i]->refcount = refcount; 3145 3148 } 3146 3149 *stream = scratch->stream_state; 3147 3150 } ··· 6100 6097 { 6101 6098 struct dc_power_profile profile = { 0 }; 6102 6099 6103 - if (!context || !context->clk_mgr || !context->clk_mgr->ctx || !context->clk_mgr->ctx->dc) 6100 + profile.power_level = !context->bw_ctx.bw.dcn.clk.p_state_change_support; 6101 + if (!context->clk_mgr || !context->clk_mgr->ctx || !context->clk_mgr->ctx->dc) 6104 6102 return profile; 6105 6103 struct dc *dc = context->clk_mgr->ctx->dc; 6106 - 6107 6104 6108 6105 if (dc->res_pool->funcs->get_power_profile) 6109 6106 profile.power_level = dc->res_pool->funcs->get_power_profile(context);

+3

drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c

··· 898 898 struct dpp *dpp = pipe_ctx->plane_res.dpp; 899 899 struct dc_plane_state *plane_state = pipe_ctx->plane_state; 900 900 901 + if (!plane_state) 902 + return; 903 + 901 904 if (dpp && dpp->funcs->dpp_setup) { 902 905 // program the input csc 903 906 dpp->funcs->dpp_setup(dpp,

+8

drivers/gpu/drm/amd/display/dc/core/dc_resource.c

··· 1501 1501 res = spl_calculate_scaler_params(spl_in, spl_out); 1502 1502 // Convert respective out params from SPL to scaler data 1503 1503 translate_SPL_out_params_to_pipe_ctx(pipe_ctx, spl_out); 1504 + 1505 + /* Ignore scaler failure if pipe context plane is phantom plane */ 1506 + if (!res && plane_state->is_phantom) 1507 + res = true; 1504 1508 } else { 1505 1509 #endif 1506 1510 /* depends on h_active */ ··· 1574 1570 &pipe_ctx->plane_res.scl_data, 1575 1571 &plane_state->scaling_quality); 1576 1572 } 1573 + 1574 + /* Ignore scaler failure if pipe context plane is phantom plane */ 1575 + if (!res && plane_state->is_phantom) 1576 + res = true; 1577 1577 1578 1578 if (res && (pipe_ctx->plane_res.scl_data.taps.v_taps != temp.v_taps || 1579 1579 pipe_ctx->plane_res.scl_data.taps.h_taps != temp.h_taps ||

+1 -1

drivers/gpu/drm/amd/display/dc/dc.h

··· 55 55 struct set_config_cmd_payload; 56 56 struct dmub_notification; 57 57 58 - #define DC_VER "3.2.309" 58 + #define DC_VER "3.2.310" 59 59 60 60 #define MAX_SURFACES 3 61 61 #define MAX_PLANES 6

+9 -6

drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c

··· 1392 1392 1393 1393 /* The recommended programming sequence to enable DTBCLK DTO to generate 1394 1394 * valid pixel HPO DPSTREAM ENCODER, specifies that DTO source select should 1395 - * be set only after DTO is enabled 1395 + * be set only after DTO is enabled. 1396 + * PIPEx_DTO_SRC_SEL should not be programmed during DTBCLK update since OTG may still be on, and the 1397 + * programming is handled in program_pix_clk() regardless, so it can be removed from here. 1396 1398 */ 1397 - REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst], 1398 - PIPE_DTO_SRC_SEL[params->otg_inst], 2); 1399 1399 } else { 1400 1400 switch (params->otg_inst) { 1401 1401 case 0: ··· 1412 1412 break; 1413 1413 } 1414 1414 1415 - REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst], 1416 - DTBCLK_DTO_ENABLE[params->otg_inst], 0, 1417 - PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1); 1415 + /** 1416 + * PIPEx_DTO_SRC_SEL should not be programmed during DTBCLK update since OTG may still be on, and the 1417 + * programming is handled in program_pix_clk() regardless, so it can be removed from here. 1418 + */ 1419 + REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst], 1420 + DTBCLK_DTO_ENABLE[params->otg_inst], 0); 1418 1421 1419 1422 REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0); 1420 1423 REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0);

+6

drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c

··· 11 11 12 12 #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 13 13 #define DML_MAX_NUM_OF_SLICES_PER_DSC 4 14 + #define ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE 14 15 15 16 const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) 16 17 { ··· 3887 3886 #endif 3888 3887 3889 3888 *p->hw_debug5 = false; 3889 + #ifdef ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE 3890 + if (p->NumberOfActiveSurfaces > 1) 3891 + *p->hw_debug5 = true; 3892 + #else 3890 3893 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { 3891 3894 if (!(p->mrq_present) && (!(*p->UnboundedRequestEnabled)) && (TotalActiveDPP == 1) 3892 3895 && p->display_cfg->plane_descriptors[k].surface.dcc.enable ··· 3906 3901 dml2_printf("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5); 3907 3902 #endif 3908 3903 } 3904 + #endif 3909 3905 } 3910 3906 3911 3907 static enum dml2_odm_mode DecideODMMode(unsigned int HActive,

+22 -1

drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c

··· 258 258 * However this condition comes with a caveat. We need to ignore pipes that will 259 259 * require a change in OPP but still have the same stream id. For example during 260 260 * an MPC to ODM transiton. 261 + * 262 + * Adding check to avoid pipe select on the head pipe by utilizing dc resource 263 + * helper function resource_get_primary_dpp_pipe and comparing the pipe index. 261 264 */ 262 265 if (existing_state) { 263 266 for (i = 0; i < pipe_count; i++) { 264 267 if (existing_state->res_ctx.pipe_ctx[i].stream && existing_state->res_ctx.pipe_ctx[i].stream->stream_id == stream_id) { 268 + struct pipe_ctx *head_pipe = 269 + resource_is_pipe_type(&existing_state->res_ctx.pipe_ctx[i], DPP_PIPE) ? 270 + resource_get_primary_dpp_pipe(&existing_state->res_ctx.pipe_ctx[i]) : 271 + NULL; 272 + 273 + // we should always respect the head pipe from selection 274 + if (head_pipe && head_pipe->pipe_idx == i) 275 + continue; 265 276 if (existing_state->res_ctx.pipe_ctx[i].plane_res.hubp && 266 - existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i) 277 + existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i && 278 + (existing_state->res_ctx.pipe_ctx[i].prev_odm_pipe || 279 + existing_state->res_ctx.pipe_ctx[i].next_odm_pipe)) 267 280 continue; 268 281 269 282 preferred_pipe_candidates[num_preferred_candidates++] = i; ··· 305 292 */ 306 293 if (existing_state) { 307 294 for (i = 0; i < pipe_count; i++) { 295 + struct pipe_ctx *head_pipe = 296 + resource_is_pipe_type(&existing_state->res_ctx.pipe_ctx[i], DPP_PIPE) ? 297 + resource_get_primary_dpp_pipe(&existing_state->res_ctx.pipe_ctx[i]) : 298 + NULL; 299 + 300 + // we should always respect the head pipe from selection 301 + if (head_pipe && head_pipe->pipe_idx == i) 302 + continue; 308 303 if ((existing_state->res_ctx.pipe_ctx[i].plane_res.hubp && 309 304 existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i) || 310 305 existing_state->res_ctx.pipe_ctx[i].stream_res.tg)

+5 -8

drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c

··· 1093 1093 if (!is_dsc_possible) 1094 1094 goto done; 1095 1095 1096 - // Final decission: can we do DSC or not? 1097 - if (is_dsc_possible) { 1098 - // Fill out the rest of DSC settings 1099 - dsc_cfg->block_pred_enable = dsc_common_caps.is_block_pred_supported; 1100 - dsc_cfg->linebuf_depth = dsc_common_caps.lb_bit_depth; 1101 - dsc_cfg->version_minor = (dsc_common_caps.dsc_version & 0xf0) >> 4; 1102 - dsc_cfg->is_dp = dsc_sink_caps->is_dp; 1103 - } 1096 + /* Fill out the rest of DSC settings */ 1097 + dsc_cfg->block_pred_enable = dsc_common_caps.is_block_pred_supported; 1098 + dsc_cfg->linebuf_depth = dsc_common_caps.lb_bit_depth; 1099 + dsc_cfg->version_minor = (dsc_common_caps.dsc_version & 0xf0) >> 4; 1100 + dsc_cfg->is_dp = dsc_sink_caps->is_dp; 1104 1101 1105 1102 done: 1106 1103 if (!is_dsc_possible)

+7 -1

drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h

··· 200 200 uint32_t DCHUBBUB_ARB_FRAC_URG_BW_MALL_B; 201 201 uint32_t DCHUBBUB_TIMEOUT_DETECTION_CTRL1; 202 202 uint32_t DCHUBBUB_TIMEOUT_DETECTION_CTRL2; 203 + uint32_t DCHUBBUB_CTRL_STATUS; 203 204 }; 204 205 205 206 #define HUBBUB_REG_FIELD_LIST_DCN32(type) \ ··· 321 320 type DCHUBBUB_TIMEOUT_REQ_STALL_THRESHOLD;\ 322 321 type DCHUBBUB_TIMEOUT_PSTATE_STALL_THRESHOLD;\ 323 322 type DCHUBBUB_TIMEOUT_DETECTION_EN;\ 324 - type DCHUBBUB_TIMEOUT_TIMER_RESET 323 + type DCHUBBUB_TIMEOUT_TIMER_RESET;\ 324 + type ROB_UNDERFLOW_STATUS;\ 325 + type ROB_OVERFLOW_STATUS;\ 326 + type ROB_OVERFLOW_CLEAR;\ 327 + type DCHUBBUB_HW_DEBUG;\ 328 + type CSTATE_SWATH_CHK_GOOD_MODE 325 329 326 330 #define HUBBUB_STUTTER_REG_FIELD_LIST(type) \ 327 331 type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A;\

+1

drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.h

··· 96 96 unsigned int det1_size; 97 97 unsigned int det2_size; 98 98 unsigned int det3_size; 99 + bool allow_sdpif_rate_limit_when_cstate_req; 99 100 }; 100 101 101 102 void hubbub2_construct(struct dcn20_hubbub *hubbub,

+22 -2

drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c

··· 1192 1192 } 1193 1193 } 1194 1194 1195 - static void dcn401_program_timeout_thresholds(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs) 1195 + static bool dcn401_program_arbiter(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs, bool safe_to_lower) 1196 1196 { 1197 1197 struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); 1198 + 1199 + bool wm_pending = false; 1200 + uint32_t temp; 1198 1201 1199 1202 /* request backpressure and outstanding return threshold (unused)*/ 1200 1203 //REG_UPDATE(DCHUBBUB_TIMEOUT_DETECTION_CTRL1, DCHUBBUB_TIMEOUT_REQ_STALL_THRESHOLD, arb_regs->req_stall_threshold); 1201 1204 1202 1205 /* P-State stall threshold */ 1203 1206 REG_UPDATE(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_PSTATE_STALL_THRESHOLD, arb_regs->pstate_stall_threshold); 1207 + 1208 + if (safe_to_lower || arb_regs->allow_sdpif_rate_limit_when_cstate_req > hubbub2->allow_sdpif_rate_limit_when_cstate_req) { 1209 + hubbub2->allow_sdpif_rate_limit_when_cstate_req = arb_regs->allow_sdpif_rate_limit_when_cstate_req; 1210 + 1211 + /* only update the required bits */ 1212 + REG_GET(DCHUBBUB_CTRL_STATUS, DCHUBBUB_HW_DEBUG, &temp); 1213 + if (hubbub2->allow_sdpif_rate_limit_when_cstate_req) { 1214 + temp |= (1 << 5); 1215 + } else { 1216 + temp &= ~(1 << 5); 1217 + } 1218 + REG_UPDATE(DCHUBBUB_CTRL_STATUS, DCHUBBUB_HW_DEBUG, temp); 1219 + } else { 1220 + wm_pending = true; 1221 + } 1222 + 1223 + return wm_pending; 1204 1224 } 1205 1225 1206 1226 static const struct hubbub_funcs hubbub4_01_funcs = { ··· 1246 1226 .program_det_segments = dcn401_program_det_segments, 1247 1227 .program_compbuf_segments = dcn401_program_compbuf_segments, 1248 1228 .wait_for_det_update = dcn401_wait_for_det_update, 1249 - .program_timeout_thresholds = dcn401_program_timeout_thresholds, 1229 + .program_arbiter = dcn401_program_arbiter, 1250 1230 }; 1251 1231 1252 1232 void hubbub401_construct(struct dcn20_hubbub *hubbub2,

+6 -1

drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h

··· 128 128 HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL1, DCHUBBUB_TIMEOUT_REQ_STALL_THRESHOLD, mask_sh),\ 129 129 HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_PSTATE_STALL_THRESHOLD, mask_sh),\ 130 130 HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_DETECTION_EN, mask_sh),\ 131 - HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_TIMER_RESET, mask_sh) 131 + HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_TIMER_RESET, mask_sh),\ 132 + HUBBUB_SF(DCHUBBUB_CTRL_STATUS, ROB_UNDERFLOW_STATUS, mask_sh),\ 133 + HUBBUB_SF(DCHUBBUB_CTRL_STATUS, ROB_OVERFLOW_STATUS, mask_sh),\ 134 + HUBBUB_SF(DCHUBBUB_CTRL_STATUS, ROB_OVERFLOW_CLEAR, mask_sh),\ 135 + HUBBUB_SF(DCHUBBUB_CTRL_STATUS, DCHUBBUB_HW_DEBUG, mask_sh),\ 136 + HUBBUB_SF(DCHUBBUB_CTRL_STATUS, CSTATE_SWATH_CHK_GOOD_MODE, mask_sh) 132 137 133 138 bool hubbub401_program_urgent_watermarks( 134 139 struct hubbub *hubbub,

+3 -3

drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c

··· 1925 1925 dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->hubp_regs.det_size); 1926 1926 } 1927 1927 1928 - if (pipe_ctx->update_flags.raw || 1929 - (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.raw) || 1930 - pipe_ctx->stream->update_flags.raw) 1928 + if (pipe_ctx->plane_state && (pipe_ctx->update_flags.raw || 1929 + pipe_ctx->plane_state->update_flags.raw || 1930 + pipe_ctx->stream->update_flags.raw)) 1931 1931 dcn20_update_dchubp_dpp(dc, pipe_ctx, context); 1932 1932 1933 1933 if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable ||

+8 -5

drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c

··· 1488 1488 &context->bw_ctx.bw.dcn.watermarks, 1489 1489 dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, 1490 1490 false); 1491 + /* update timeout thresholds */ 1492 + if (hubbub->funcs->program_arbiter) { 1493 + dc->wm_optimized_required |= hubbub->funcs->program_arbiter(hubbub, &context->bw_ctx.bw.dcn.arb_regs, false); 1494 + } 1491 1495 1492 1496 /* decrease compbuf size */ 1493 1497 if (hubbub->funcs->program_compbuf_segments) { ··· 1533 1529 &context->bw_ctx.bw.dcn.watermarks, 1534 1530 dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, 1535 1531 true); 1532 + /* update timeout thresholds */ 1533 + if (hubbub->funcs->program_arbiter) { 1534 + hubbub->funcs->program_arbiter(hubbub, &context->bw_ctx.bw.dcn.arb_regs, true); 1535 + } 1536 1536 1537 1537 if (dc->clk_mgr->dc_mode_softmax_enabled) 1538 1538 if (dc->clk_mgr->clks.dramclk_khz > dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000 && ··· 1561 1553 pipe_ctx->plane_res.hubp->funcs->program_extended_blank(pipe_ctx->plane_res.hubp, 1562 1554 pipe_ctx->dlg_regs.min_dst_y_next_start); 1563 1555 } 1564 - } 1565 - 1566 - /* update timeout thresholds */ 1567 - if (hubbub->funcs->program_timeout_thresholds) { 1568 - hubbub->funcs->program_timeout_thresholds(hubbub, &context->bw_ctx.bw.dcn.arb_regs); 1569 1556 } 1570 1557 } 1571 1558

+1 -1

drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h

··· 228 228 void (*program_det_segments)(struct hubbub *hubbub, int hubp_inst, unsigned det_buffer_size_seg); 229 229 void (*program_compbuf_segments)(struct hubbub *hubbub, unsigned compbuf_size_seg, bool safe_to_increase); 230 230 void (*wait_for_det_update)(struct hubbub *hubbub, int hubp_inst); 231 - void (*program_timeout_thresholds)(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs); 231 + bool (*program_arbiter)(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs, bool safe_to_lower); 232 232 }; 233 233 234 234 struct hubbub {

+2 -1

drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h

··· 612 612 SR(DCHUBBUB_SDPIF_CFG1), \ 613 613 SR(DCHUBBUB_MEM_PWR_MODE_CTRL), \ 614 614 SR(DCHUBBUB_TIMEOUT_DETECTION_CTRL1), \ 615 - SR(DCHUBBUB_TIMEOUT_DETECTION_CTRL2) 615 + SR(DCHUBBUB_TIMEOUT_DETECTION_CTRL2), \ 616 + SR(DCHUBBUB_CTRL_STATUS) 616 617 617 618 /* DCCG */ 618 619

+66 -31

drivers/gpu/drm/amd/display/dc/spl/dc_spl.c

··· 99 99 * 100 100 * recout_x = 128 + round(plane_x * 2304 / 1920) 101 101 * recout_w = 128 + round((plane_x + plane_w) * 2304 / 1920) - recout_x 102 - * recout_y = 0 + round(plane_y * 1440 / 1280) 102 + * recout_y = 0 + round(plane_y * 1440 / 1200) 103 103 * recout_h = 0 + round((plane_y + plane_h) * 1440 / 1200) - recout_y 104 104 * 105 105 * NOTE: fixed point division is not error free. To reduce errors ··· 739 739 return SCL_MODE_SCALING_444_RGB_ENABLE; 740 740 } 741 741 742 - /* Bypass YUV if at 1:1 with no ISHARP or if doing 2:1 YUV 743 - * downscale without EASF 742 + /* 743 + * Bypass YUV if Y is 1:1 with no ISHARP 744 + * Do not bypass UV at 1:1 for cositing to be applied 744 745 */ 745 - if ((!enable_isharp) && (!enable_easf)) { 746 + if (!enable_isharp) { 746 747 if (data->ratios.horz.value == one && data->ratios.vert.value == one) 747 748 return SCL_MODE_SCALING_420_LUMA_BYPASS; 748 - if (data->ratios.horz_c.value == one && data->ratios.vert_c.value == one) 749 - return SCL_MODE_SCALING_420_CHROMA_BYPASS; 750 749 } 751 750 752 751 return SCL_MODE_SCALING_420_YCBCR_ENABLE; ··· 932 933 int min_taps_y, min_taps_c; 933 934 enum lb_memory_config lb_config; 934 935 bool skip_easf = false; 936 + bool is_ycbcr = spl_dscl_is_video_format(spl_in->basic_in.format); 935 937 936 938 if (spl_scratch->scl_data.viewport.width > spl_scratch->scl_data.h_active && 937 939 max_downscale_src_width != 0 && ··· 1074 1074 1075 1075 /* Sharpener requires scaler to be enabled, including for 1:1 1076 1076 * Check if ISHARP can be enabled 1077 - * If ISHARP is not enabled, for 1:1, set taps to 1 and disable 1078 - * EASF 1079 - * For case of 2:1 YUV where chroma is 1:1, set taps to 1 if 1080 - * EASF is not enabled 1077 + * If ISHARP is not enabled, set taps to 1 if ratio is 1:1 1078 + * except for chroma taps. Keep previous taps so it can 1079 + * handle cositing 1081 1080 */ 1082 1081 1083 1082 *enable_isharp = spl_get_isharp_en(spl_in, spl_scratch); ··· 1086 1087 spl_scratch->scl_data.taps.h_taps = 1; 1087 1088 spl_scratch->scl_data.taps.v_taps = 1; 1088 1089 1089 - if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c)) 1090 + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c) && !is_ycbcr) 1090 1091 spl_scratch->scl_data.taps.h_taps_c = 1; 1091 1092 1092 - if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c)) 1093 + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c) && !is_ycbcr) 1093 1094 spl_scratch->scl_data.taps.v_taps_c = 1; 1094 1095 1095 1096 *enable_easf_v = false; 1096 1097 *enable_easf_h = false; 1097 1098 } else { 1098 1099 if ((!*enable_easf_h) && 1100 + (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz))) 1101 + spl_scratch->scl_data.taps.h_taps = 1; 1102 + 1103 + if ((!*enable_easf_v) && 1104 + (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert))) 1105 + spl_scratch->scl_data.taps.v_taps = 1; 1106 + 1107 + if ((!*enable_easf_h) && !is_ycbcr && 1099 1108 (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c))) 1100 1109 spl_scratch->scl_data.taps.h_taps_c = 1; 1101 1110 1102 - if ((!*enable_easf_v) && 1111 + if ((!*enable_easf_v) && !is_ycbcr && 1103 1112 (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c))) 1104 1113 spl_scratch->scl_data.taps.v_taps_c = 1; 1105 1114 } ··· 1118 1111 static void spl_set_black_color_data(enum spl_pixel_format format, 1119 1112 struct scl_black_color *scl_black_color) 1120 1113 { 1121 - bool ycbcr = format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN 1122 - && format <= SPL_PIXEL_FORMAT_VIDEO_END; 1114 + bool ycbcr = spl_dscl_is_video_format(format); 1123 1115 if (ycbcr) { 1124 1116 scl_black_color->offset_rgb_y = BLACK_OFFSET_RGB_Y; 1125 1117 scl_black_color->offset_rgb_cbcr = BLACK_OFFSET_CBCR; ··· 1752 1746 spl_set_blur_scale_data(dscl_prog_data, data); 1753 1747 } 1754 1748 1749 + /* Calculate recout, scaling ratio, and viewport, then get optimal number of taps */ 1750 + static bool spl_calculate_number_of_taps(struct spl_in *spl_in, struct spl_scratch *spl_scratch, struct spl_out *spl_out, 1751 + bool *enable_easf_v, bool *enable_easf_h, bool *enable_isharp) 1752 + { 1753 + bool res = false; 1754 + 1755 + memset(spl_scratch, 0, sizeof(struct spl_scratch)); 1756 + spl_scratch->scl_data.h_active = spl_in->h_active; 1757 + spl_scratch->scl_data.v_active = spl_in->v_active; 1758 + 1759 + // All SPL calls 1760 + /* recout calculation */ 1761 + /* depends on h_active */ 1762 + spl_calculate_recout(spl_in, spl_scratch, spl_out); 1763 + /* depends on pixel format */ 1764 + spl_calculate_scaling_ratios(spl_in, spl_scratch, spl_out); 1765 + /* depends on scaling ratios and recout, does not calculate offset yet */ 1766 + spl_calculate_viewport_size(spl_in, spl_scratch); 1767 + 1768 + res = spl_get_optimal_number_of_taps( 1769 + spl_in->basic_out.max_downscale_src_width, spl_in, 1770 + spl_scratch, &spl_in->scaling_quality, enable_easf_v, 1771 + enable_easf_h, enable_isharp); 1772 + return res; 1773 + } 1774 + 1755 1775 /* Calculate scaler parameters */ 1756 1776 bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) 1757 1777 { ··· 1792 1760 bool enable_isharp = false; 1793 1761 const struct spl_scaler_data *data = &spl_scratch.scl_data; 1794 1762 1795 - memset(&spl_scratch, 0, sizeof(struct spl_scratch)); 1796 - spl_scratch.scl_data.h_active = spl_in->h_active; 1797 - spl_scratch.scl_data.v_active = spl_in->v_active; 1763 + res = spl_calculate_number_of_taps(spl_in, &spl_scratch, spl_out, 1764 + &enable_easf_v, &enable_easf_h, &enable_isharp); 1798 1765 1799 - // All SPL calls 1800 - /* recout calculation */ 1801 - /* depends on h_active */ 1802 - spl_calculate_recout(spl_in, &spl_scratch, spl_out); 1803 - /* depends on pixel format */ 1804 - spl_calculate_scaling_ratios(spl_in, &spl_scratch, spl_out); 1805 - /* depends on scaling ratios and recout, does not calculate offset yet */ 1806 - spl_calculate_viewport_size(spl_in, &spl_scratch); 1807 - 1808 - res = spl_get_optimal_number_of_taps( 1809 - spl_in->basic_out.max_downscale_src_width, spl_in, 1810 - &spl_scratch, &spl_in->scaling_quality, &enable_easf_v, 1811 - &enable_easf_h, &enable_isharp); 1812 1766 /* 1813 1767 * Depends on recout, scaling ratios, h_active and taps 1814 1768 * May need to re-check lb size after this in some obscure scenario ··· 1840 1822 spl_in->lls_pref, spl_in->basic_in.format, data, isharp_scale_ratio, setup, 1841 1823 spl_in->debug.scale_to_sharpness_policy); 1842 1824 1825 + return res; 1826 + } 1827 + 1828 + /* External interface to get number of taps only */ 1829 + bool spl_get_number_of_taps(struct spl_in *spl_in, struct spl_out *spl_out) 1830 + { 1831 + bool res = false; 1832 + bool enable_easf_v = false; 1833 + bool enable_easf_h = false; 1834 + bool enable_isharp = false; 1835 + struct spl_scratch spl_scratch; 1836 + struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data; 1837 + const struct spl_scaler_data *data = &spl_scratch.scl_data; 1838 + 1839 + res = spl_calculate_number_of_taps(spl_in, &spl_scratch, spl_out, 1840 + &enable_easf_v, &enable_easf_h, &enable_isharp); 1841 + spl_set_taps_data(dscl_prog_data, data); 1843 1842 return res; 1844 1843 }

+2

drivers/gpu/drm/amd/display/dc/spl/dc_spl.h

··· 13 13 14 14 bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out); 15 15 16 + bool spl_get_number_of_taps(struct spl_in *spl_in, struct spl_out *spl_out); 17 + 16 18 #endif /* __DC_SPL_H__ */

+2

drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h

··· 7571 7571 // base address: 0x10100000 7572 7572 #define regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0 0xd000 7573 7573 #define regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0_BASE_IDX 5 7574 + #define regRCC_DEV0_EPF5_STRAP4 0xd284 7575 + #define regRCC_DEV0_EPF5_STRAP4_BASE_IDX 5 7574 7576 7575 7577 7576 7578 // addressBlock: nbio_nbif0_bif_rst_bif_rst_regblk

+13

drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h

··· 50665 50665 #define RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_D1_SUPPORT_DEV0_F0_MASK 0x40000000L 50666 50666 #define RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_D2_SUPPORT_DEV0_F0_MASK 0x80000000L 50667 50667 50668 + //RCC_DEV0_EPF5_STRAP4 50669 + #define RCC_DEV0_EPF5_STRAP4__STRAP_ATOMIC_64BIT_EN_DEV0_F5__SHIFT 0x14 50670 + #define RCC_DEV0_EPF5_STRAP4__STRAP_ATOMIC_EN_DEV0_F5__SHIFT 0x15 50671 + #define RCC_DEV0_EPF5_STRAP4__STRAP_FLR_EN_DEV0_F5__SHIFT 0x16 50672 + #define RCC_DEV0_EPF5_STRAP4__STRAP_PME_SUPPORT_DEV0_F5__SHIFT 0x17 50673 + #define RCC_DEV0_EPF5_STRAP4__STRAP_INTERRUPT_PIN_DEV0_F5__SHIFT 0x1c 50674 + #define RCC_DEV0_EPF5_STRAP4__STRAP_AUXPWR_SUPPORT_DEV0_F5__SHIFT 0x1f 50675 + #define RCC_DEV0_EPF5_STRAP4__STRAP_ATOMIC_64BIT_EN_DEV0_F5_MASK 0x00100000L 50676 + #define RCC_DEV0_EPF5_STRAP4__STRAP_ATOMIC_EN_DEV0_F5_MASK 0x00200000L 50677 + #define RCC_DEV0_EPF5_STRAP4__STRAP_FLR_EN_DEV0_F5_MASK 0x00400000L 50678 + #define RCC_DEV0_EPF5_STRAP4__STRAP_PME_SUPPORT_DEV0_F5_MASK 0x0F800000L 50679 + #define RCC_DEV0_EPF5_STRAP4__STRAP_INTERRUPT_PIN_DEV0_F5_MASK 0x70000000L 50680 + #define RCC_DEV0_EPF5_STRAP4__STRAP_AUXPWR_SUPPORT_DEV0_F5_MASK 0x80000000L 50668 50681 50669 50682 // addressBlock: nbio_nbif0_bif_rst_bif_rst_regblk 50670 50683 //HARD_RST_CTRL

+114 -4

drivers/gpu/drm/amd/include/kgd_pp_interface.h

··· 357 357 358 358 struct amdgpu_xcp_metrics { 359 359 /* Utilization Instantaneous (%) */ 360 - u32 gfx_busy_inst[MAX_XCC]; 361 - u16 jpeg_busy[NUM_JPEG_ENG]; 362 - u16 vcn_busy[NUM_VCN]; 360 + uint32_t gfx_busy_inst[MAX_XCC]; 361 + uint16_t jpeg_busy[NUM_JPEG_ENG]; 362 + uint16_t vcn_busy[NUM_VCN]; 363 363 /* Utilization Accumulated (%) */ 364 - u64 gfx_busy_acc[MAX_XCC]; 364 + uint64_t gfx_busy_acc[MAX_XCC]; 365 + }; 366 + 367 + struct amdgpu_xcp_metrics_v1_1 { 368 + /* Utilization Instantaneous (%) */ 369 + uint32_t gfx_busy_inst[MAX_XCC]; 370 + uint16_t jpeg_busy[NUM_JPEG_ENG]; 371 + uint16_t vcn_busy[NUM_VCN]; 372 + /* Utilization Accumulated (%) */ 373 + uint64_t gfx_busy_acc[MAX_XCC]; 374 + /* Total App Clock Counter Accumulated */ 375 + uint64_t gfx_below_host_limit_acc[MAX_XCC]; 365 376 }; 366 377 367 378 struct amd_pm_funcs { ··· 983 972 984 973 /* XCP metrics stats */ 985 974 struct amdgpu_xcp_metrics xcp_stats[NUM_XCP]; 975 + 976 + /* PCIE other end recovery counter */ 977 + uint32_t pcie_lc_perf_other_end_recovery; 978 + }; 979 + 980 + struct gpu_metrics_v1_7 { 981 + struct metrics_table_header common_header; 982 + 983 + /* Temperature (Celsius) */ 984 + uint16_t temperature_hotspot; 985 + uint16_t temperature_mem; 986 + uint16_t temperature_vrsoc; 987 + 988 + /* Power (Watts) */ 989 + uint16_t curr_socket_power; 990 + 991 + /* Utilization (%) */ 992 + uint16_t average_gfx_activity; 993 + uint16_t average_umc_activity; // memory controller 994 + 995 + /* VRAM max bandwidthi (in GB/sec) at max memory clock */ 996 + uint64_t mem_max_bandwidth; 997 + 998 + /* Energy (15.259uJ (2^-16) units) */ 999 + uint64_t energy_accumulator; 1000 + 1001 + /* Driver attached timestamp (in ns) */ 1002 + uint64_t system_clock_counter; 1003 + 1004 + /* Accumulation cycle counter */ 1005 + uint32_t accumulation_counter; 1006 + 1007 + /* Accumulated throttler residencies */ 1008 + uint32_t prochot_residency_acc; 1009 + uint32_t ppt_residency_acc; 1010 + uint32_t socket_thm_residency_acc; 1011 + uint32_t vr_thm_residency_acc; 1012 + uint32_t hbm_thm_residency_acc; 1013 + 1014 + /* Clock Lock Status. Each bit corresponds to clock instance */ 1015 + uint32_t gfxclk_lock_status; 1016 + 1017 + /* Link width (number of lanes) and speed (in 0.1 GT/s) */ 1018 + uint16_t pcie_link_width; 1019 + uint16_t pcie_link_speed; 1020 + 1021 + /* XGMI bus width and bitrate (in Gbps) */ 1022 + uint16_t xgmi_link_width; 1023 + uint16_t xgmi_link_speed; 1024 + 1025 + /* Utilization Accumulated (%) */ 1026 + uint32_t gfx_activity_acc; 1027 + uint32_t mem_activity_acc; 1028 + 1029 + /*PCIE accumulated bandwidth (GB/sec) */ 1030 + uint64_t pcie_bandwidth_acc; 1031 + 1032 + /*PCIE instantaneous bandwidth (GB/sec) */ 1033 + uint64_t pcie_bandwidth_inst; 1034 + 1035 + /* PCIE L0 to recovery state transition accumulated count */ 1036 + uint64_t pcie_l0_to_recov_count_acc; 1037 + 1038 + /* PCIE replay accumulated count */ 1039 + uint64_t pcie_replay_count_acc; 1040 + 1041 + /* PCIE replay rollover accumulated count */ 1042 + uint64_t pcie_replay_rover_count_acc; 1043 + 1044 + /* PCIE NAK sent accumulated count */ 1045 + uint32_t pcie_nak_sent_count_acc; 1046 + 1047 + /* PCIE NAK received accumulated count */ 1048 + uint32_t pcie_nak_rcvd_count_acc; 1049 + 1050 + /* XGMI accumulated data transfer size(KiloBytes) */ 1051 + uint64_t xgmi_read_data_acc[NUM_XGMI_LINKS]; 1052 + uint64_t xgmi_write_data_acc[NUM_XGMI_LINKS]; 1053 + 1054 + /* XGMI link status(active/inactive) */ 1055 + uint16_t xgmi_link_status[NUM_XGMI_LINKS]; 1056 + 1057 + uint16_t padding; 1058 + 1059 + /* PMFW attached timestamp (10ns resolution) */ 1060 + uint64_t firmware_timestamp; 1061 + 1062 + /* Current clocks (Mhz) */ 1063 + uint16_t current_gfxclk[MAX_GFX_CLKS]; 1064 + uint16_t current_socclk[MAX_CLKS]; 1065 + uint16_t current_vclk0[MAX_CLKS]; 1066 + uint16_t current_dclk0[MAX_CLKS]; 1067 + uint16_t current_uclk; 1068 + 1069 + /* Number of current partition */ 1070 + uint16_t num_partition; 1071 + 1072 + /* XCP metrics stats */ 1073 + struct amdgpu_xcp_metrics_v1_1 xcp_stats[NUM_XCP]; 986 1074 987 1075 /* PCIE other end recovery counter */ 988 1076 uint32_t pcie_lc_perf_other_end_recovery;

+11 -13

drivers/gpu/drm/amd/pm/powerplay/smumgr/vega12_smumgr.c

··· 257 257 priv->smu_tables.entry[TABLE_WATERMARKS].size = sizeof(Watermarks_t); 258 258 259 259 tools_size = 0x19000; 260 - if (tools_size) { 261 - ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev, 262 - tools_size, 263 - PAGE_SIZE, 264 - AMDGPU_GEM_DOMAIN_VRAM, 265 - &priv->smu_tables.entry[TABLE_PMSTATUSLOG].handle, 266 - &priv->smu_tables.entry[TABLE_PMSTATUSLOG].mc_addr, 267 - &priv->smu_tables.entry[TABLE_PMSTATUSLOG].table); 268 - if (ret) 269 - goto err1; 260 + ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev, 261 + tools_size, 262 + PAGE_SIZE, 263 + AMDGPU_GEM_DOMAIN_VRAM, 264 + &priv->smu_tables.entry[TABLE_PMSTATUSLOG].handle, 265 + &priv->smu_tables.entry[TABLE_PMSTATUSLOG].mc_addr, 266 + &priv->smu_tables.entry[TABLE_PMSTATUSLOG].table); 267 + if (ret) 268 + goto err1; 270 269 271 - priv->smu_tables.entry[TABLE_PMSTATUSLOG].version = 0x01; 272 - priv->smu_tables.entry[TABLE_PMSTATUSLOG].size = tools_size; 273 - } 270 + priv->smu_tables.entry[TABLE_PMSTATUSLOG].version = 0x01; 271 + priv->smu_tables.entry[TABLE_PMSTATUSLOG].size = tools_size; 274 272 275 273 /* allocate space for AVFS Fuse table */ 276 274 ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev,

+6 -2

drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c

··· 1704 1704 return ret; 1705 1705 } 1706 1706 1707 - if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4) 1707 + if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) 1708 + pcie_gen = 4; 1709 + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4) 1708 1710 pcie_gen = 3; 1709 1711 else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) 1710 1712 pcie_gen = 2; ··· 1719 1717 * Bit 15:8: PCIE GEN, 0 to 3 corresponds to GEN1 to GEN4 1720 1718 * Bit 7:0: PCIE lane width, 1 to 7 corresponds is x1 to x32 1721 1719 */ 1722 - if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16) 1720 + if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X32) 1721 + pcie_width = 7; 1722 + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16) 1723 1723 pcie_width = 6; 1724 1724 else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12) 1725 1725 pcie_width = 5;

+1 -1

drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h

··· 53 53 #define CTF_OFFSET_MEM 5 54 54 55 55 extern const int decoded_link_speed[5]; 56 - extern const int decoded_link_width[7]; 56 + extern const int decoded_link_width[8]; 57 57 58 58 #define DECODE_GEN_SPEED(gen_speed_idx) (decoded_link_speed[gen_speed_idx]) 59 59 #define DECODE_LANE_WIDTH(lane_width_idx) (decoded_link_width[lane_width_idx])

+5 -1

drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c

··· 1344 1344 *default_power_limit = power_limit; 1345 1345 if (max_power_limit) 1346 1346 *max_power_limit = power_limit; 1347 + /** 1348 + * No lower bound is imposed on the limit. Any unreasonable limit set 1349 + * will result in frequent throttling. 1350 + */ 1347 1351 if (min_power_limit) 1348 - *min_power_limit = power_limit; 1352 + *min_power_limit = 0; 1349 1353 1350 1354 return 0; 1351 1355 }

+7 -5

drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c

··· 96 96 #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK 0xE0 97 97 #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0x5 98 98 #define LINK_SPEED_MAX 4 99 - 100 99 #define SMU_13_0_6_DSCLK_THRESHOLD 140 101 100 102 101 #define MCA_BANK_IPID(_ip, _hwid, _type) \ ··· 369 370 return -ENOMEM; 370 371 smu_table->metrics_time = 0; 371 372 372 - smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_6); 373 + smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_7); 373 374 smu_table->gpu_metrics_table = 374 375 kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL); 375 376 if (!smu_table->gpu_metrics_table) { ··· 2320 2321 { 2321 2322 bool per_inst, smu_13_0_6_per_inst, smu_13_0_14_per_inst, apu_per_inst; 2322 2323 struct smu_table_context *smu_table = &smu->smu_table; 2323 - struct gpu_metrics_v1_6 *gpu_metrics = 2324 - (struct gpu_metrics_v1_6 *)smu_table->gpu_metrics_table; 2324 + struct gpu_metrics_v1_7 *gpu_metrics = 2325 + (struct gpu_metrics_v1_7 *)smu_table->gpu_metrics_table; 2325 2326 bool flag = smu_v13_0_6_is_unified_metrics(smu); 2326 2327 int ret = 0, xcc_id, inst, i, j, k, idx; 2327 2328 struct amdgpu_device *adev = smu->adev; ··· 2340 2341 2341 2342 metrics_a = (MetricsTableA_t *)metrics_x; 2342 2343 2343 - smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 6); 2344 + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 7); 2344 2345 2345 2346 gpu_metrics->temperature_hotspot = 2346 2347 SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature, flag)); ··· 2447 2448 SMUQ10_ROUND(GET_METRIC_FIELD(XgmiReadDataSizeAcc, flag)[i]); 2448 2449 gpu_metrics->xgmi_write_data_acc[i] = 2449 2450 SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWriteDataSizeAcc, flag)[i]); 2451 + ret = amdgpu_get_xgmi_link_status(adev, i); 2452 + if (ret >= 0) 2453 + gpu_metrics->xgmi_link_status[i] = ret; 2450 2454 } 2451 2455 2452 2456 gpu_metrics->num_partition = adev->xcp_mgr->num_xcps;

+2

drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c

··· 2147 2147 gpu_metrics->average_dclk1_frequency = metrics->AverageDclk1Frequency; 2148 2148 2149 2149 gpu_metrics->current_gfxclk = metrics->CurrClock[PPCLK_GFXCLK]; 2150 + gpu_metrics->current_socclk = metrics->CurrClock[PPCLK_SOCCLK]; 2151 + gpu_metrics->current_uclk = metrics->CurrClock[PPCLK_UCLK]; 2150 2152 gpu_metrics->current_vclk0 = metrics->CurrClock[PPCLK_VCLK_0]; 2151 2153 gpu_metrics->current_dclk0 = metrics->CurrClock[PPCLK_DCLK_0]; 2152 2154 gpu_metrics->current_vclk1 = metrics->CurrClock[PPCLK_VCLK_1];

+1 -1

drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c

··· 49 49 #define regMP1_SMN_IH_SW_INT_CTRL_mp1_14_0_0_BASE_IDX 0 50 50 51 51 const int decoded_link_speed[5] = {1, 2, 3, 4, 5}; 52 - const int decoded_link_width[7] = {0, 1, 2, 4, 8, 12, 16}; 52 + const int decoded_link_width[8] = {0, 1, 2, 4, 8, 12, 16, 32}; 53 53 /* 54 54 * DO NOT use these for err/warn/info/debug messages. 55 55 * Use dev_err, dev_warn, dev_info and dev_dbg instead.

+29 -8

drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c

··· 1173 1173 (pcie_table->pcie_gen[i] == 0) ? "2.5GT/s," : 1174 1174 (pcie_table->pcie_gen[i] == 1) ? "5.0GT/s," : 1175 1175 (pcie_table->pcie_gen[i] == 2) ? "8.0GT/s," : 1176 - (pcie_table->pcie_gen[i] == 3) ? "16.0GT/s," : "", 1176 + (pcie_table->pcie_gen[i] == 3) ? "16.0GT/s," : 1177 + (pcie_table->pcie_gen[i] == 4) ? "32.0GT/s," : "", 1177 1178 (pcie_table->pcie_lane[i] == 1) ? "x1" : 1178 1179 (pcie_table->pcie_lane[i] == 2) ? "x2" : 1179 1180 (pcie_table->pcie_lane[i] == 3) ? "x4" : 1180 1181 (pcie_table->pcie_lane[i] == 4) ? "x8" : 1181 1182 (pcie_table->pcie_lane[i] == 5) ? "x12" : 1182 - (pcie_table->pcie_lane[i] == 6) ? "x16" : "", 1183 + (pcie_table->pcie_lane[i] == 6) ? "x16" : 1184 + (pcie_table->pcie_lane[i] == 7) ? "x32" : "", 1183 1185 pcie_table->clk_freq[i], 1184 1186 (gen_speed == DECODE_GEN_SPEED(pcie_table->pcie_gen[i])) && 1185 1187 (lane_width == DECODE_LANE_WIDTH(pcie_table->pcie_lane[i])) ? ··· 1465 1463 struct smu_14_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 1466 1464 struct smu_14_0_pcie_table *pcie_table = 1467 1465 &dpm_context->dpm_tables.pcie_table; 1466 + int num_of_levels = pcie_table->num_of_link_levels; 1468 1467 uint32_t smu_pcie_arg; 1469 1468 int ret, i; 1470 1469 1471 - for (i = 0; i < pcie_table->num_of_link_levels; i++) { 1472 - if (pcie_table->pcie_gen[i] > pcie_gen_cap) 1473 - pcie_table->pcie_gen[i] = pcie_gen_cap; 1474 - if (pcie_table->pcie_lane[i] > pcie_width_cap) 1475 - pcie_table->pcie_lane[i] = pcie_width_cap; 1470 + if (!num_of_levels) 1471 + return 0; 1476 1472 1473 + if (!(smu->adev->pm.pp_feature & PP_PCIE_DPM_MASK)) { 1474 + if (pcie_table->pcie_gen[num_of_levels - 1] < pcie_gen_cap) 1475 + pcie_gen_cap = pcie_table->pcie_gen[num_of_levels - 1]; 1476 + 1477 + if (pcie_table->pcie_lane[num_of_levels - 1] < pcie_width_cap) 1478 + pcie_width_cap = pcie_table->pcie_lane[num_of_levels - 1]; 1479 + 1480 + /* Force all levels to use the same settings */ 1481 + for (i = 0; i < num_of_levels; i++) { 1482 + pcie_table->pcie_gen[i] = pcie_gen_cap; 1483 + pcie_table->pcie_lane[i] = pcie_width_cap; 1484 + } 1485 + } else { 1486 + for (i = 0; i < num_of_levels; i++) { 1487 + if (pcie_table->pcie_gen[i] > pcie_gen_cap) 1488 + pcie_table->pcie_gen[i] = pcie_gen_cap; 1489 + if (pcie_table->pcie_lane[i] > pcie_width_cap) 1490 + pcie_table->pcie_lane[i] = pcie_width_cap; 1491 + } 1492 + } 1493 + 1494 + for (i = 0; i < num_of_levels; i++) { 1477 1495 smu_pcie_arg = i << 16; 1478 1496 smu_pcie_arg |= pcie_table->pcie_gen[i] << 8; 1479 1497 smu_pcie_arg |= pcie_table->pcie_lane[i]; ··· 2775 2753 .get_unique_id = smu_v14_0_2_get_unique_id, 2776 2754 .get_power_limit = smu_v14_0_2_get_power_limit, 2777 2755 .set_power_limit = smu_v14_0_2_set_power_limit, 2778 - .set_power_source = smu_v14_0_set_power_source, 2779 2756 .get_power_profile_mode = smu_v14_0_2_get_power_profile_mode, 2780 2757 .set_power_profile_mode = smu_v14_0_2_set_power_profile_mode, 2781 2758 .run_btc = smu_v14_0_run_btc,

+3

drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c

··· 1081 1081 case METRICS_VERSION(1, 6): 1082 1082 structure_size = sizeof(struct gpu_metrics_v1_6); 1083 1083 break; 1084 + case METRICS_VERSION(1, 7): 1085 + structure_size = sizeof(struct gpu_metrics_v1_7); 1086 + break; 1084 1087 case METRICS_VERSION(2, 0): 1085 1088 structure_size = sizeof(struct gpu_metrics_v2_0); 1086 1089 break;

+20 -12

drivers/gpu/drm/i915/display/intel_hdcp.c

··· 1503 1503 static int hdcp2_authentication_key_exchange(struct intel_connector *connector) 1504 1504 { 1505 1505 struct intel_display *display = to_intel_display(connector); 1506 + struct intel_digital_port *dig_port = 1507 + intel_attached_dig_port(connector); 1506 1508 struct intel_hdcp *hdcp = &connector->hdcp; 1507 1509 union { 1508 1510 struct hdcp2_ake_init ake_init; ··· 1515 1513 } msgs; 1516 1514 const struct intel_hdcp_shim *shim = hdcp->shim; 1517 1515 size_t size; 1518 - int ret, i; 1516 + int ret, i, max_retries; 1519 1517 1520 1518 /* Init for seq_num */ 1521 1519 hdcp->seq_num_v = 0; 1522 1520 hdcp->seq_num_m = 0; 1521 + 1522 + if (intel_encoder_is_dp(&dig_port->base) || 1523 + intel_encoder_is_mst(&dig_port->base)) 1524 + max_retries = 10; 1525 + else 1526 + max_retries = 1; 1523 1527 1524 1528 ret = hdcp2_prepare_ake_init(connector, &msgs.ake_init); 1525 1529 if (ret < 0) ··· 1533 1525 1534 1526 /* 1535 1527 * Retry the first read and write to downstream at least 10 times 1536 - * with a 50ms delay if not hdcp2 capable(dock decides to stop advertising 1537 - * hdcp2 capability for some reason). The reason being that 1538 - * during suspend resume dock usually keeps the HDCP2 registers inaccesible 1539 - * causing AUX error. This wouldn't be a big problem if the userspace 1540 - * just kept retrying with some delay while it continues to play low 1541 - * value content but most userpace applications end up throwing an error 1542 - * when it receives one from KMD. This makes sure we give the dock 1543 - * and the sink devices to complete its power cycle and then try HDCP 1544 - * authentication. The values of 10 and delay of 50ms was decided based 1545 - * on multiple trial and errors. 1528 + * with a 50ms delay if not hdcp2 capable for DP/DPMST encoders 1529 + * (dock decides to stop advertising hdcp2 capability for some reason). 1530 + * The reason being that during suspend resume dock usually keeps the 1531 + * HDCP2 registers inaccesible causing AUX error. This wouldn't be a 1532 + * big problem if the userspace just kept retrying with some delay while 1533 + * it continues to play low value content but most userpace applications 1534 + * end up throwing an error when it receives one from KMD. This makes 1535 + * sure we give the dock and the sink devices to complete its power cycle 1536 + * and then try HDCP authentication. The values of 10 and delay of 50ms 1537 + * was decided based on multiple trial and errors. 1546 1538 */ 1547 - for (i = 0; i < 10; i++) { 1539 + for (i = 0; i < max_retries; i++) { 1548 1540 if (!intel_hdcp2_get_capability(connector)) { 1549 1541 msleep(50); 1550 1542 continue;

+8 -4

drivers/gpu/drm/radeon/radeon_audio.c

··· 760 760 if (!rdev->audio.enabled || !rdev->mode_info.mode_config_initialized) 761 761 return 0; 762 762 763 - list_for_each_entry(encoder, &rdev_to_drm(rdev)->mode_config.encoder_list, head) { 763 + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { 764 + const struct drm_connector_helper_funcs *connector_funcs = 765 + connector->helper_private; 766 + encoder = connector_funcs->best_encoder(connector); 767 + 768 + if (!encoder) 769 + continue; 770 + 764 771 if (!radeon_encoder_is_digital(encoder)) 765 772 continue; 766 773 radeon_encoder = to_radeon_encoder(encoder); 767 774 dig = radeon_encoder->enc_priv; 768 775 if (!dig->pin || dig->pin->id != port) 769 - continue; 770 - connector = radeon_get_connector_for_encoder(encoder); 771 - if (!connector) 772 776 continue; 773 777 *enabled = true; 774 778 ret = drm_eld_size(connector->eld);

-10

drivers/gpu/drm/radeon/radeon_connectors.c

··· 1255 1255 goto exit; 1256 1256 } 1257 1257 } 1258 - 1259 - if (dret && radeon_connector->hpd.hpd != RADEON_HPD_NONE && 1260 - !radeon_hpd_sense(rdev, radeon_connector->hpd.hpd) && 1261 - connector->connector_type == DRM_MODE_CONNECTOR_HDMIA) { 1262 - DRM_DEBUG_KMS("EDID is readable when HPD disconnected\n"); 1263 - schedule_delayed_work(&rdev->hotplug_work, msecs_to_jiffies(1000)); 1264 - ret = connector_status_disconnected; 1265 - goto exit; 1266 - } 1267 - 1268 1258 if (dret) { 1269 1259 radeon_connector->detected_by_load = false; 1270 1260 radeon_connector_free_edid(connector);

+1 -2

drivers/gpu/drm/radeon/radeon_drv.c

··· 248 248 MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)"); 249 249 module_param_named(cik_support, radeon_cik_support, int, 0444); 250 250 251 - static struct pci_device_id pciidlist[] = { 251 + static const struct pci_device_id pciidlist[] = { 252 252 radeon_PCI_IDS 253 253 }; 254 - 255 254 MODULE_DEVICE_TABLE(pci, pciidlist); 256 255 257 256 static const struct drm_driver kms_driver;

+1 -2

drivers/gpu/drm/radeon/radeon_ttm.c

··· 219 219 if (old_mem->mem_type == TTM_PL_TT && 220 220 new_mem->mem_type == TTM_PL_SYSTEM) { 221 221 radeon_ttm_tt_unbind(bo->bdev, bo->ttm); 222 - ttm_resource_free(bo, &bo->resource); 223 - ttm_bo_assign_mem(bo, new_mem); 222 + ttm_bo_move_null(bo, new_mem); 224 223 goto out; 225 224 } 226 225 if (rdev->ring[radeon_copy_ring_index(rdev)].ready &&

+1 -1

drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c

··· 82 82 * comment in rk_hdptx_phy_power_on() from 83 83 * drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c 84 84 */ 85 - phy_set_bus_width(hdmi->phy, rate / 100); 85 + phy_set_bus_width(hdmi->phy, div_u64(rate, 100)); 86 86 } 87 87 } 88 88

+6

drivers/gpu/drm/xe/xe_devcoredump.c

··· 23 23 #include "xe_guc_submit.h" 24 24 #include "xe_hw_engine.h" 25 25 #include "xe_module.h" 26 + #include "xe_pm.h" 26 27 #include "xe_sched_job.h" 27 28 #include "xe_vm.h" 28 29 ··· 159 158 { 160 159 struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); 161 160 struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot); 161 + struct xe_device *xe = coredump_to_xe(coredump); 162 162 unsigned int fw_ref; 163 + 164 + xe_pm_runtime_get(xe); 163 165 164 166 /* keep going if fw fails as we still want to save the memory and SW data */ 165 167 fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); ··· 171 167 xe_vm_snapshot_capture_delayed(ss->vm); 172 168 xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); 173 169 xe_force_wake_put(gt_to_fw(ss->gt), fw_ref); 170 + 171 + xe_pm_runtime_put(xe); 174 172 175 173 /* Calculate devcoredump size */ 176 174 ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump);

+2 -1

drivers/gpu/drm/xe/xe_device.c

··· 350 350 INIT_LIST_HEAD(&xe->pinned.external_vram); 351 351 INIT_LIST_HEAD(&xe->pinned.evicted); 352 352 353 - xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0); 353 + xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 354 + WQ_MEM_RECLAIM); 354 355 xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); 355 356 xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); 356 357 xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0);

+25 -9

drivers/gpu/drm/xe/xe_guc_submit.c

··· 767 767 struct xe_exec_queue *q) 768 768 { 769 769 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 770 - struct xe_device *xe = guc_to_xe(guc); 771 770 int ret; 772 771 773 772 set_min_preemption_timeout(guc, q); 774 773 smp_rmb(); 775 - ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) || 776 - xe_guc_read_stopped(guc), HZ * 5); 774 + ret = wait_event_timeout(guc->ct.wq, 775 + (!exec_queue_pending_enable(q) && 776 + !exec_queue_pending_disable(q)) || 777 + xe_guc_read_stopped(guc), 778 + HZ * 5); 777 779 if (!ret) { 778 780 struct xe_gpu_scheduler *sched = &q->guc->sched; 779 781 780 - drm_warn(&xe->drm, "Pending enable failed to respond"); 782 + xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n"); 781 783 xe_sched_submission_start(sched); 782 784 xe_gt_reset_async(q->gt); 783 785 xe_sched_tdr_queue_imm(sched); ··· 1101 1099 * modifying state 1102 1100 */ 1103 1101 ret = wait_event_timeout(guc->ct.wq, 1104 - !exec_queue_pending_enable(q) || 1102 + (!exec_queue_pending_enable(q) && 1103 + !exec_queue_pending_disable(q)) || 1105 1104 xe_guc_read_stopped(guc), HZ * 5); 1106 1105 if (!ret || xe_guc_read_stopped(guc)) 1107 1106 goto trigger_reset; ··· 1331 1328 1332 1329 if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && 1333 1330 exec_queue_enabled(q)) { 1334 - wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING || 1335 - xe_guc_read_stopped(guc)); 1331 + wait_event(guc->ct.wq, (q->guc->resume_time != RESUME_PENDING || 1332 + xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q)); 1336 1333 1337 1334 if (!xe_guc_read_stopped(guc)) { 1338 1335 s64 since_resume_ms = ··· 1869 1866 xe_gt_assert(guc_to_gt(guc), runnable_state == 0); 1870 1867 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); 1871 1868 1872 - clear_exec_queue_pending_disable(q); 1873 1869 if (q->guc->suspend_pending) { 1874 1870 suspend_fence_signal(q); 1871 + clear_exec_queue_pending_disable(q); 1875 1872 } else { 1876 1873 if (exec_queue_banned(q) || check_timeout) { 1877 1874 smp_wmb(); 1878 1875 wake_up_all(&guc->ct.wq); 1879 1876 } 1880 - if (!check_timeout) 1877 + if (!check_timeout && exec_queue_destroyed(q)) { 1878 + /* 1879 + * Make sure to clear the pending_disable only 1880 + * after sampling the destroyed state. We want 1881 + * to ensure we don't trigger the unregister too 1882 + * early with something intending to only 1883 + * disable scheduling. The caller doing the 1884 + * destroy must wait for an ongoing 1885 + * pending_disable before marking as destroyed. 1886 + */ 1887 + clear_exec_queue_pending_disable(q); 1881 1888 deregister_exec_queue(guc, q); 1889 + } else { 1890 + clear_exec_queue_pending_disable(q); 1891 + } 1882 1892 } 1883 1893 } 1884 1894 }

+4 -2

drivers/gpu/drm/xe/xe_migrate.c

··· 209 209 num_entries * XE_PAGE_SIZE, 210 210 ttm_bo_type_kernel, 211 211 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 212 - XE_BO_FLAG_PINNED); 212 + XE_BO_FLAG_PINNED | 213 + XE_BO_FLAG_PAGETABLE); 213 214 if (IS_ERR(bo)) 214 215 return PTR_ERR(bo); 215 216 ··· 1351 1350 1352 1351 /* For sysmem PTE's, need to map them in our hole.. */ 1353 1352 if (!IS_DGFX(xe)) { 1353 + u16 pat_index = xe->pat.idx[XE_CACHE_WB]; 1354 1354 u32 ptes, ofs; 1355 1355 1356 1356 ppgtt_ofs = NUM_KERNEL_PDE - 1; ··· 1411 1409 pt_bo->update_index = current_update; 1412 1410 1413 1411 addr = vm->pt_ops->pte_encode_bo(pt_bo, 0, 1414 - XE_CACHE_WB, 0); 1412 + pat_index, 0); 1415 1413 bb->cs[bb->len++] = lower_32_bits(addr); 1416 1414 bb->cs[bb->len++] = upper_32_bits(addr); 1417 1415 }

+1 -1

drivers/gpu/drm/xe/xe_pci.c

··· 174 174 GENMASK(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0) 175 175 176 176 static const struct xe_graphics_desc graphics_xe2 = { 177 - .name = "Xe2_LPG / Xe2_HPG", 177 + .name = "Xe2_LPG / Xe2_HPG / Xe3_LPG", 178 178 179 179 XE2_GFX_FEATURES, 180 180 };

+5 -1

drivers/gpu/drm/xe/xe_sync.c

··· 87 87 drm_dbg(&ufence->xe->drm, "mmget_not_zero() failed, ufence wasn't signaled\n"); 88 88 } 89 89 90 - wake_up_all(&ufence->xe->ufence_wq); 90 + /* 91 + * Wake up waiters only after updating the ufence state, allowing the UMD 92 + * to safely reuse the same ufence without encountering -EBUSY errors. 93 + */ 91 94 WRITE_ONCE(ufence->signalled, 1); 95 + wake_up_all(&ufence->xe->ufence_wq); 92 96 user_fence_put(ufence); 93 97 } 94 98

Configure Feed

Configure Feed