Merge tag 'drm-next-2024-01-19' of git://anongit.freedesktop.org/drm/drm

+1 -2

drivers/gpu/drm/amd/amdgpu/amdgpu.h

··· 254 254 255 255 extern int amdgpu_wbrf; 256 256 257 - extern int fw_bo_location; 258 - 259 257 #define AMDGPU_VM_MAX_NUM_CTX 4096 260 258 #define AMDGPU_SG_THRESHOLD (256*1024*1024) 261 259 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 ··· 1144 1146 bool debug_vm; 1145 1147 bool debug_largebar; 1146 1148 bool debug_disable_soft_recovery; 1149 + bool debug_use_vram_fw_buf; 1147 1150 }; 1148 1151 1149 1152 static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,

+6 -5

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

··· 138 138 amdgpu_device_gpu_recover(adev, NULL, &reset_context); 139 139 } 140 140 141 + static const struct drm_client_funcs kfd_client_funcs = { 142 + .unregister = drm_client_release, 143 + }; 141 144 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) 142 145 { 143 146 int i; ··· 164 161 .enable_mes = adev->enable_mes, 165 162 }; 166 163 167 - ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", NULL); 164 + ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs); 168 165 if (ret) { 169 166 dev_err(adev->dev, "Failed to init DRM client: %d\n", ret); 170 167 return; ··· 698 695 void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle) 699 696 { 700 697 enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE; 701 - /* Temporary workaround to fix issues observed in some 702 - * compute applications when GFXOFF is enabled on GFX11. 703 - */ 704 - if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11) { 698 + if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 && 699 + ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) { 705 700 pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled"); 706 701 amdgpu_gfx_off_ctrl(adev, idle); 707 702 } else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) &&

+1 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

··· 311 311 int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo); 312 312 313 313 int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, 314 - struct dma_fence **ef); 314 + struct dma_fence __rcu **ef); 315 315 int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, 316 316 struct kfd_vm_fault_info *info); 317 317 int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd,

+2 -2

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

··· 2802 2802 put_task_struct(usertask); 2803 2803 } 2804 2804 2805 - static void replace_eviction_fence(struct dma_fence **ef, 2805 + static void replace_eviction_fence(struct dma_fence __rcu **ef, 2806 2806 struct dma_fence *new_ef) 2807 2807 { 2808 2808 struct dma_fence *old_ef = rcu_replace_pointer(*ef, new_ef, true ··· 2837 2837 * 7. Add fence to all PD and PT BOs. 2838 2838 * 8. Unreserve all BOs 2839 2839 */ 2840 - int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) 2840 + int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu **ef) 2841 2841 { 2842 2842 struct amdkfd_process_info *process_info = info; 2843 2843 struct amdgpu_vm *peer_vm;

+2 -31

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

··· 1544 1544 return true; 1545 1545 1546 1546 fw_ver = *((uint32_t *)adev->pm.fw->data + 69); 1547 + release_firmware(adev->pm.fw); 1547 1548 if (fw_ver < 0x00160e00) 1548 1549 return true; 1549 1550 } ··· 5246 5245 struct amdgpu_device *tmp_adev = NULL; 5247 5246 bool need_full_reset, skip_hw_reset, vram_lost = false; 5248 5247 int r = 0; 5249 - bool gpu_reset_for_dev_remove = 0; 5250 5248 5251 5249 /* Try reset handler method first */ 5252 5250 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, ··· 5264 5264 need_full_reset = 5265 5265 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 5266 5266 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags); 5267 - 5268 - gpu_reset_for_dev_remove = 5269 - test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) && 5270 - test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 5271 5267 5272 5268 /* 5273 5269 * ASIC reset has to be done on all XGMI hive nodes ASAP ··· 5305 5309 } 5306 5310 5307 5311 amdgpu_ras_intr_cleared(); 5308 - } 5309 - 5310 - /* Since the mode1 reset affects base ip blocks, the 5311 - * phase1 ip blocks need to be resumed. Otherwise there 5312 - * will be a BIOS signature error and the psp bootloader 5313 - * can't load kdb on the next amdgpu install. 5314 - */ 5315 - if (gpu_reset_for_dev_remove) { 5316 - list_for_each_entry(tmp_adev, device_list_handle, reset_list) 5317 - amdgpu_device_ip_resume_phase1(tmp_adev); 5318 - 5319 - goto end; 5320 5312 } 5321 5313 5322 5314 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { ··· 5543 5559 int i, r = 0; 5544 5560 bool need_emergency_restart = false; 5545 5561 bool audio_suspended = false; 5546 - bool gpu_reset_for_dev_remove = false; 5547 - 5548 - gpu_reset_for_dev_remove = 5549 - test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) && 5550 - test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 5551 5562 5552 5563 /* 5553 5564 * Special case: RAS triggered and full reset isn't supported ··· 5580 5601 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) { 5581 5602 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { 5582 5603 list_add_tail(&tmp_adev->reset_list, &device_list); 5583 - if (gpu_reset_for_dev_remove && adev->shutdown) 5604 + if (adev->shutdown) 5584 5605 tmp_adev->shutdown = true; 5585 5606 } 5586 5607 if (!list_is_first(&adev->reset_list, &device_list)) ··· 5665 5686 5666 5687 retry: /* Rest of adevs pre asic reset from XGMI hive. */ 5667 5688 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5668 - if (gpu_reset_for_dev_remove) { 5669 - /* Workaroud for ASICs need to disable SMC first */ 5670 - amdgpu_device_smu_fini_early(tmp_adev); 5671 - } 5672 5689 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context); 5673 5690 /*TODO Should we stop ?*/ 5674 5691 if (r) { ··· 5696 5721 r = amdgpu_do_asic_reset(device_list_handle, reset_context); 5697 5722 if (r && r == -EAGAIN) 5698 5723 goto retry; 5699 - 5700 - if (!r && gpu_reset_for_dev_remove) 5701 - goto recover_end; 5702 5724 } 5703 5725 5704 5726 skip_hw_reset: ··· 5751 5779 amdgpu_ras_set_error_query_ready(tmp_adev, true); 5752 5780 } 5753 5781 5754 - recover_end: 5755 5782 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, 5756 5783 reset_list); 5757 5784 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);

-2

drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c

··· 1963 1963 amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); 1964 1964 break; 1965 1965 case IP_VERSION(9, 4, 3): 1966 - if (!amdgpu_exp_hw_support) 1967 - return -EINVAL; 1968 1966 amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block); 1969 1967 break; 1970 1968 case IP_VERSION(10, 1, 10):

+8 -39

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

··· 128 128 AMDGPU_DEBUG_VM = BIT(0), 129 129 AMDGPU_DEBUG_LARGEBAR = BIT(1), 130 130 AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2), 131 + AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3), 131 132 }; 132 133 133 134 unsigned int amdgpu_vram_limit = UINT_MAX; ··· 211 210 uint amdgpu_debug_mask; 212 211 int amdgpu_agp = -1; /* auto */ 213 212 int amdgpu_wbrf = -1; 214 - int fw_bo_location = -1; 215 213 216 214 static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); 217 215 ··· 989 989 MODULE_PARM_DESC(wbrf, 990 990 "Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)"); 991 991 module_param_named(wbrf, amdgpu_wbrf, int, 0444); 992 - 993 - MODULE_PARM_DESC(fw_bo_location, 994 - "location to put firmware bo for frontdoor loading (-1 = auto (default), 0 = on ram, 1 = on vram"); 995 - module_param(fw_bo_location, int, 0644); 996 992 997 993 /* These devices are not supported by amdgpu. 998 994 * They are supported by the mach64, r128, radeon drivers ··· 2118 2122 pr_info("debug: soft reset for GPU recovery disabled\n"); 2119 2123 adev->debug_disable_soft_recovery = true; 2120 2124 } 2125 + 2126 + if (amdgpu_debug_mask & AMDGPU_DEBUG_USE_VRAM_FW_BUF) { 2127 + pr_info("debug: place fw in vram for frontdoor loading\n"); 2128 + adev->debug_use_vram_fw_buf = true; 2129 + } 2121 2130 } 2122 2131 2123 2132 static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) ··· 2234 2233 2235 2234 pci_set_drvdata(pdev, ddev); 2236 2235 2236 + amdgpu_init_debug_options(adev); 2237 + 2237 2238 ret = amdgpu_driver_load_kms(adev, flags); 2238 2239 if (ret) 2239 2240 goto err_pci; ··· 2316 2313 amdgpu_get_secondary_funcs(adev); 2317 2314 } 2318 2315 2319 - amdgpu_init_debug_options(adev); 2320 - 2321 2316 return 0; 2322 2317 2323 2318 err_pci: ··· 2335 2334 if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { 2336 2335 pm_runtime_get_sync(dev->dev); 2337 2336 pm_runtime_forbid(dev->dev); 2338 - } 2339 - 2340 - if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 2) && 2341 - !amdgpu_sriov_vf(adev)) { 2342 - bool need_to_reset_gpu = false; 2343 - 2344 - if (adev->gmc.xgmi.num_physical_nodes > 1) { 2345 - struct amdgpu_hive_info *hive; 2346 - 2347 - hive = amdgpu_get_xgmi_hive(adev); 2348 - if (hive->device_remove_count == 0) 2349 - need_to_reset_gpu = true; 2350 - hive->device_remove_count++; 2351 - amdgpu_put_xgmi_hive(hive); 2352 - } else { 2353 - need_to_reset_gpu = true; 2354 - } 2355 - 2356 - /* Workaround for ASICs need to reset SMU. 2357 - * Called only when the first device is removed. 2358 - */ 2359 - if (need_to_reset_gpu) { 2360 - struct amdgpu_reset_context reset_context; 2361 - 2362 - adev->shutdown = true; 2363 - memset(&reset_context, 0, sizeof(reset_context)); 2364 - reset_context.method = AMD_RESET_METHOD_NONE; 2365 - reset_context.reset_req_dev = adev; 2366 - set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); 2367 - set_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context.flags); 2368 - amdgpu_device_gpu_recover(adev, NULL, &reset_context); 2369 - } 2370 2337 } 2371 2338 2372 2339 amdgpu_driver_unload_kms(dev);

+14 -7

drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c

··· 1045 1045 * seconds, so here, we just pick up three parts for emulation. 1046 1046 */ 1047 1047 ret = memcmp(vram_ptr, cptr, 10); 1048 - if (ret) 1049 - return ret; 1048 + if (ret) { 1049 + ret = -EIO; 1050 + goto release_buffer; 1051 + } 1050 1052 1051 1053 ret = memcmp(vram_ptr + (size / 2), cptr, 10); 1052 - if (ret) 1053 - return ret; 1054 + if (ret) { 1055 + ret = -EIO; 1056 + goto release_buffer; 1057 + } 1054 1058 1055 1059 ret = memcmp(vram_ptr + size - 10, cptr, 10); 1056 - if (ret) 1057 - return ret; 1060 + if (ret) { 1061 + ret = -EIO; 1062 + goto release_buffer; 1063 + } 1058 1064 1065 + release_buffer: 1059 1066 amdgpu_bo_free_kernel(&vram_bo, &vram_gpu, 1060 1067 &vram_ptr); 1061 1068 1062 - return 0; 1069 + return ret; 1063 1070 } 1064 1071 1065 1072 static ssize_t current_memory_partition_show(

+6 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

··· 1105 1105 if (amdgpu_dpm_read_sensor(adev, 1106 1106 AMDGPU_PP_SENSOR_GPU_AVG_POWER, 1107 1107 (void *)&ui32, &ui32_size)) { 1108 - return -EINVAL; 1108 + /* fall back to input power for backwards compat */ 1109 + if (amdgpu_dpm_read_sensor(adev, 1110 + AMDGPU_PP_SENSOR_GPU_INPUT_POWER, 1111 + (void *)&ui32, &ui32_size)) { 1112 + return -EINVAL; 1113 + } 1109 1114 } 1110 1115 ui32 >>= 8; 1111 1116 break;

+1 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c

··· 466 466 } 467 467 468 468 ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, 469 - (amdgpu_sriov_vf(adev) || fw_bo_location == 1) ? 469 + (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ? 470 470 AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, 471 471 &psp->fw_pri_bo, 472 472 &psp->fw_pri_mc_addr,

+15 -11

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

··· 305 305 return -EINVAL; 306 306 307 307 data->head.block = block_id; 308 - /* only ue and ce errors are supported */ 308 + /* only ue, ce and poison errors are supported */ 309 309 if (!memcmp("ue", err, 2)) 310 310 data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; 311 311 else if (!memcmp("ce", err, 2)) 312 312 data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE; 313 + else if (!memcmp("poison", err, 6)) 314 + data->head.type = AMDGPU_RAS_ERROR__POISON; 313 315 else 314 316 return -EINVAL; 315 317 ··· 433 431 * The block is one of: umc, sdma, gfx, etc. 434 432 * see ras_block_string[] for details 435 433 * 436 - * The error type is one of: ue, ce, where, 434 + * The error type is one of: ue, ce and poison where, 437 435 * ue is multi-uncorrectable 438 436 * ce is single-correctable 437 + * poison is poison 439 438 * 440 439 * The sub-block is a the sub-block index, pass 0 if there is no sub-block. 441 440 * The address and value are hexadecimal numbers, leading 0x is optional. ··· 1070 1067 mcm_info = &err_info->mcm_info; 1071 1068 if (err_info->ce_count) { 1072 1069 dev_info(adev->dev, "socket: %d, die: %d, " 1073 - "%lld new correctable hardware errors detected in %s block, " 1074 - "no user action is needed\n", 1070 + "%lld new correctable hardware errors detected in %s block\n", 1075 1071 mcm_info->socket_id, 1076 1072 mcm_info->die_id, 1077 1073 err_info->ce_count, ··· 1082 1080 err_info = &err_node->err_info; 1083 1081 mcm_info = &err_info->mcm_info; 1084 1082 dev_info(adev->dev, "socket: %d, die: %d, " 1085 - "%lld correctable hardware errors detected in total in %s block, " 1086 - "no user action is needed\n", 1083 + "%lld correctable hardware errors detected in total in %s block\n", 1087 1084 mcm_info->socket_id, mcm_info->die_id, err_info->ce_count, blk_name); 1088 1085 } 1089 1086 } ··· 1109 1108 adev->smuio.funcs->get_die_id) { 1110 1109 dev_info(adev->dev, "socket: %d, die: %d " 1111 1110 "%ld correctable hardware errors " 1112 - "detected in %s block, no user " 1113 - "action is needed.\n", 1111 + "detected in %s block\n", 1114 1112 adev->smuio.funcs->get_socket_id(adev), 1115 1113 adev->smuio.funcs->get_die_id(adev), 1116 1114 ras_mgr->err_data.ce_count, 1117 1115 blk_name); 1118 1116 } else { 1119 1117 dev_info(adev->dev, "%ld correctable hardware errors " 1120 - "detected in %s block, no user " 1121 - "action is needed.\n", 1118 + "detected in %s block\n", 1122 1119 ras_mgr->err_data.ce_count, 1123 1120 blk_name); 1124 1121 } ··· 1919 1920 struct amdgpu_iv_entry *entry) 1920 1921 { 1921 1922 dev_info(obj->adev->dev, 1922 - "Poison is created, no user action is needed.\n"); 1923 + "Poison is created\n"); 1923 1924 } 1924 1925 1925 1926 static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj, ··· 2918 2919 } 2919 2920 2920 2921 amdgpu_ras_query_poison_mode(adev); 2922 + 2923 + /* Packed socket_id to ras feature mask bits[31:29] */ 2924 + if (adev->smuio.funcs && 2925 + adev->smuio.funcs->get_socket_id) 2926 + con->features |= ((adev->smuio.funcs->get_socket_id(adev)) << 29); 2921 2927 2922 2928 /* Get RAS schema for particular SOC */ 2923 2929 con->schema = amdgpu_get_ras_schema(adev);

-1

drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h

··· 32 32 33 33 AMDGPU_NEED_FULL_RESET = 0, 34 34 AMDGPU_SKIP_HW_RESET = 1, 35 - AMDGPU_RESET_FOR_DEVICE_REMOVE = 2, 36 35 }; 37 36 38 37 struct amdgpu_reset_context {

+1 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c

··· 1062 1062 { 1063 1063 if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) { 1064 1064 amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE, 1065 - (amdgpu_sriov_vf(adev) || fw_bo_location == 1) ? 1065 + (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ? 1066 1066 AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, 1067 1067 &adev->firmware.fw_buf, 1068 1068 &adev->firmware.fw_buf_mc,

+1 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

··· 116 116 #define AMDGPU_VM_FAULT_STOP_FIRST 1 117 117 #define AMDGPU_VM_FAULT_STOP_ALWAYS 2 118 118 119 - /* Reserve 4MB VRAM for page tables */ 119 + /* How much VRAM be reserved for page tables */ 120 120 #define AMDGPU_VM_RESERVED_VRAM (8ULL << 20) 121 121 122 122 /*

+2 -8

drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c

··· 59 59 60 60 static uint16_t vpe_u1_8_from_fraction(uint16_t numerator, uint16_t denominator) 61 61 { 62 - bool arg1_negative = numerator < 0; 63 - bool arg2_negative = denominator < 0; 64 - 65 - uint16_t arg1_value = (uint16_t)(arg1_negative ? -numerator : numerator); 66 - uint16_t arg2_value = (uint16_t)(arg2_negative ? -denominator : denominator); 62 + u16 arg1_value = numerator; 63 + u16 arg2_value = denominator; 67 64 68 65 uint16_t remainder; 69 66 ··· 96 99 97 100 res_value += summand; 98 101 } 99 - 100 - if (arg1_negative ^ arg2_negative) 101 - res_value = -res_value; 102 102 103 103 return res_value; 104 104 }

-1

drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h

··· 43 43 } pstate; 44 44 45 45 struct amdgpu_reset_domain *reset_domain; 46 - uint32_t device_remove_count; 47 46 atomic_t ras_recovery; 48 47 }; 49 48

+8

drivers/gpu/drm/amd/amdgpu/athub_v3_0.c

··· 30 30 31 31 #define regATHUB_MISC_CNTL_V3_0_1 0x00d7 32 32 #define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX 0 33 + #define regATHUB_MISC_CNTL_V3_3_0 0x00d8 34 + #define regATHUB_MISC_CNTL_V3_3_0_BASE_IDX 0 33 35 34 36 35 37 static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev) ··· 41 39 switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) { 42 40 case IP_VERSION(3, 0, 1): 43 41 data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1); 42 + break; 43 + case IP_VERSION(3, 3, 0): 44 + data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0); 44 45 break; 45 46 default: 46 47 data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); ··· 57 52 switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) { 58 53 case IP_VERSION(3, 0, 1): 59 54 WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data); 55 + break; 56 + case IP_VERSION(3, 3, 0): 57 + WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0, data); 60 58 break; 61 59 default: 62 60 WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);

+6 -9

drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c

··· 3996 3996 3997 3997 if (!amdgpu_sriov_vf(adev)) { 3998 3998 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); 3999 - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name); 4000 - /* don't check this. There are apparently firmwares in the wild with 4001 - * incorrect size in the header 4002 - */ 4003 - if (err == -ENODEV) 4004 - goto out; 3999 + err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 4005 4000 if (err) 4006 - dev_dbg(adev->dev, 4007 - "gfx10: amdgpu_ucode_request() failed \"%s\"\n", 4008 - fw_name); 4001 + goto out; 4002 + 4003 + /* don't validate this firmware. There are apparently firmwares 4004 + * in the wild with incorrect size in the header 4005 + */ 4009 4006 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 4010 4007 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 4011 4008 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);

+4 -1

drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

··· 115 115 SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), 116 116 SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL, 0xffffffff, 0xf37fff3f), 117 117 SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xfffffffb, 0x00f40188), 118 - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL4, 0xf0ffffff, 0x8000b007), 118 + SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL4, 0xf0ffffff, 0x80009007), 119 119 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf1ffffff, 0x00880007), 120 120 SOC15_REG_GOLDEN_VALUE(GC, 0, regPC_CONFIG_CNTL_1, 0xffffffff, 0x00010000), 121 121 SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000), ··· 6383 6383 mutex_lock(&adev->grbm_idx_mutex); 6384 6384 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 6385 6385 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 6386 + bitmap = i * adev->gfx.config.max_sh_per_se + j; 6387 + if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1)) 6388 + continue; 6386 6389 mask = 1; 6387 6390 counter = 0; 6388 6391 gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0);

+6 -4

drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c

··· 456 456 WREG32_SOC15_RLC(GC, GET_INST(GC, j), regMC_VM_MX_L1_TLB_CNTL, tmp); 457 457 458 458 /* Setup L2 cache */ 459 - tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL); 460 - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); 461 - WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp); 462 - WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0); 459 + if (!amdgpu_sriov_vf(adev)) { 460 + tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL); 461 + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); 462 + WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp); 463 + WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0); 464 + } 463 465 } 464 466 } 465 467

+1 -2

drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c

··· 401 401 402 402 if (err_data.ce_count) 403 403 dev_info(adev->dev, "%ld correctable hardware " 404 - "errors detected in %s block, " 405 - "no user action is needed.\n", 404 + "errors detected in %s block\n", 406 405 obj->err_data.ce_count, 407 406 get_ras_block_str(adev->nbio.ras_if)); 408 407

+1 -2

drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c

··· 597 597 598 598 if (err_data.ce_count) 599 599 dev_info(adev->dev, "%ld correctable hardware " 600 - "errors detected in %s block, " 601 - "no user action is needed.\n", 600 + "errors detected in %s block\n", 602 601 obj->err_data.ce_count, 603 602 get_ras_block_str(adev->nbio.ras_if)); 604 603

+1 -1

drivers/gpu/drm/amd/amdgpu/umc_v6_7.c

··· 64 64 uint64_t reg_value; 65 65 66 66 if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) 67 - dev_info(adev->dev, "Deferred error, no user action is needed.\n"); 67 + dev_info(adev->dev, "Deferred error\n"); 68 68 69 69 if (mc_umc_status) 70 70 dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset);

+3 -3

drivers/gpu/drm/amd/amdkfd/kfd_migrate.c

··· 1026 1026 } else { 1027 1027 res = devm_request_free_mem_region(adev->dev, &iomem_resource, size); 1028 1028 if (IS_ERR(res)) 1029 - return -ENOMEM; 1029 + return PTR_ERR(res); 1030 1030 pgmap->range.start = res->start; 1031 1031 pgmap->range.end = res->end; 1032 1032 pgmap->type = MEMORY_DEVICE_PRIVATE; ··· 1042 1042 r = devm_memremap_pages(adev->dev, pgmap); 1043 1043 if (IS_ERR(r)) { 1044 1044 pr_err("failed to register HMM device memory\n"); 1045 - /* Disable SVM support capability */ 1046 - pgmap->type = 0; 1047 1045 if (pgmap->type == MEMORY_DEVICE_PRIVATE) 1048 1046 devm_release_mem_region(adev->dev, res->start, resource_size(res)); 1047 + /* Disable SVM support capability */ 1048 + pgmap->type = 0; 1049 1049 return PTR_ERR(r); 1050 1050 } 1051 1051

+1 -1

drivers/gpu/drm/amd/amdkfd/kfd_priv.h

··· 917 917 * fence will be triggered during eviction and new one will be created 918 918 * during restore 919 919 */ 920 - struct dma_fence *ef; 920 + struct dma_fence __rcu *ef; 921 921 922 922 /* Work items for evicting and restoring BOs */ 923 923 struct delayed_work eviction_work;

+5 -2

drivers/gpu/drm/amd/amdkfd/kfd_process.c

··· 1110 1110 { 1111 1111 struct kfd_process *p = container_of(work, struct kfd_process, 1112 1112 release_work); 1113 + struct dma_fence *ef; 1113 1114 1114 1115 kfd_process_dequeue_from_all_devices(p); 1115 1116 pqm_uninit(&p->pqm); ··· 1119 1118 * destroyed. This allows any BOs to be freed without 1120 1119 * triggering pointless evictions or waiting for fences. 1121 1120 */ 1122 - dma_fence_signal(p->ef); 1121 + synchronize_rcu(); 1122 + ef = rcu_access_pointer(p->ef); 1123 + dma_fence_signal(ef); 1123 1124 1124 1125 kfd_process_remove_sysfs(p); 1125 1126 ··· 1130 1127 svm_range_list_fini(p); 1131 1128 1132 1129 kfd_process_destroy_pdds(p); 1133 - dma_fence_put(p->ef); 1130 + dma_fence_put(ef); 1134 1131 1135 1132 kfd_event_free_process(p); 1136 1133

+19 -23

drivers/gpu/drm/amd/amdkfd/kfd_svm.c

··· 404 404 spin_lock(&svm_bo->list_lock); 405 405 } 406 406 spin_unlock(&svm_bo->list_lock); 407 - if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) { 408 - /* We're not in the eviction worker. 409 - * Signal the fence and synchronize with any 410 - * pending eviction work. 411 - */ 407 + if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) 408 + /* We're not in the eviction worker. Signal the fence. */ 412 409 dma_fence_signal(&svm_bo->eviction_fence->base); 413 - cancel_work_sync(&svm_bo->eviction_work); 414 - } 415 410 dma_fence_put(&svm_bo->eviction_fence->base); 416 411 amdgpu_bo_unref(&svm_bo->bo); 417 412 kfree(svm_bo); ··· 2340 2345 mutex_unlock(&svms->lock); 2341 2346 mmap_write_unlock(mm); 2342 2347 2343 - /* Pairs with mmget in svm_range_add_list_work */ 2344 - mmput(mm); 2348 + /* Pairs with mmget in svm_range_add_list_work. If dropping the 2349 + * last mm refcount, schedule release work to avoid circular locking 2350 + */ 2351 + mmput_async(mm); 2345 2352 2346 2353 spin_lock(&svms->deferred_list_lock); 2347 2354 } ··· 2654 2657 { 2655 2658 struct vm_area_struct *vma; 2656 2659 struct interval_tree_node *node; 2660 + struct rb_node *rb_node; 2657 2661 unsigned long start_limit, end_limit; 2658 2662 2659 2663 vma = vma_lookup(p->mm, addr << PAGE_SHIFT); ··· 2674 2676 if (node) { 2675 2677 end_limit = min(end_limit, node->start); 2676 2678 /* Last range that ends before the fault address */ 2677 - node = container_of(rb_prev(&node->rb), 2678 - struct interval_tree_node, rb); 2679 + rb_node = rb_prev(&node->rb); 2679 2680 } else { 2680 2681 /* Last range must end before addr because 2681 2682 * there was no range after addr 2682 2683 */ 2683 - node = container_of(rb_last(&p->svms.objects.rb_root), 2684 - struct interval_tree_node, rb); 2684 + rb_node = rb_last(&p->svms.objects.rb_root); 2685 2685 } 2686 - if (node) { 2686 + if (rb_node) { 2687 + node = container_of(rb_node, struct interval_tree_node, rb); 2687 2688 if (node->last >= addr) { 2688 2689 WARN(1, "Overlap with prev node and page fault addr\n"); 2689 2690 return -EFAULT; ··· 3429 3432 3430 3433 int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence) 3431 3434 { 3432 - if (!fence) 3433 - return -EINVAL; 3434 - 3435 - if (dma_fence_is_signaled(&fence->base)) 3436 - return 0; 3437 - 3438 - if (fence->svm_bo) { 3435 + /* Dereferencing fence->svm_bo is safe here because the fence hasn't 3436 + * signaled yet and we're under the protection of the fence->lock. 3437 + * After the fence is signaled in svm_range_bo_release, we cannot get 3438 + * here any more. 3439 + * 3440 + * Reference is dropped in svm_range_evict_svm_bo_worker. 3441 + */ 3442 + if (svm_bo_ref_unless_zero(fence->svm_bo)) { 3439 3443 WRITE_ONCE(fence->svm_bo->evicting, 1); 3440 3444 schedule_work(&fence->svm_bo->eviction_work); 3441 3445 } ··· 3451 3453 int r = 0; 3452 3454 3453 3455 svm_bo = container_of(work, struct svm_range_bo, eviction_work); 3454 - if (!svm_bo_ref_unless_zero(svm_bo)) 3455 - return; /* svm_bo was freed while eviction was pending */ 3456 3456 3457 3457 if (mmget_not_zero(svm_bo->eviction_fence->mm)) { 3458 3458 mm = svm_bo->eviction_fence->mm;

+4 -4

drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

··· 9292 9292 if (!new_con_state->writeback_job) 9293 9293 continue; 9294 9294 9295 - new_crtc_state = NULL; 9295 + new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base); 9296 9296 9297 - if (acrtc) 9298 - new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base); 9297 + if (!new_crtc_state) 9298 + continue; 9299 9299 9300 9300 if (acrtc->wb_enabled) 9301 9301 continue; ··· 10752 10752 DRM_DEBUG_DRIVER("drm_dp_mst_atomic_check() failed\n"); 10753 10753 goto fail; 10754 10754 } 10755 - status = dc_validate_global_state(dc, dm_state->context, false); 10755 + status = dc_validate_global_state(dc, dm_state->context, true); 10756 10756 if (status != DC_OK) { 10757 10757 DRM_DEBUG_DRIVER("DC global validation failure: %s (%d)", 10758 10758 dc_status_to_str(status), status);

+1 -1

drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c

··· 1239 1239 if (has_crtc_cm_degamma && ret != -EINVAL) { 1240 1240 drm_dbg_kms(crtc->base.crtc->dev, 1241 1241 "doesn't support plane and CRTC degamma at the same time\n"); 1242 - return -EINVAL; 1242 + return -EINVAL; 1243 1243 } 1244 1244 1245 1245 /* If we are here, it means we don't have plane degamma settings, check

+5

drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c

··· 979 979 struct aux_payload *payload, 980 980 enum aux_return_code_type *operation_result) 981 981 { 982 + if (!link->hpd_status) { 983 + *operation_result = AUX_RET_ERROR_HPD_DISCON; 984 + return -1; 985 + } 986 + 982 987 return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload, 983 988 operation_result); 984 989 }

+79 -13

drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c

··· 87 87 #define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L 88 88 #define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L 89 89 90 + #define regCLK1_CLK2_BYPASS_CNTL 0x029c 91 + #define regCLK1_CLK2_BYPASS_CNTL_BASE_IDX 0 92 + 93 + #define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL__SHIFT 0x0 94 + #define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV__SHIFT 0x10 95 + #define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L 96 + #define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L 97 + 98 + #define regCLK6_0_CLK6_spll_field_8 0x464b 99 + #define regCLK6_0_CLK6_spll_field_8_BASE_IDX 0 100 + 101 + #define CLK6_0_CLK6_spll_field_8__spll_ssc_en__SHIFT 0xd 102 + #define CLK6_0_CLK6_spll_field_8__spll_ssc_en_MASK 0x00002000L 103 + 90 104 #define REG(reg_name) \ 91 105 (CLK_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) 92 106 ··· 145 131 return display_count; 146 132 } 147 133 148 - static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable) 134 + static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, 135 + bool safe_to_lower, bool disable) 149 136 { 150 137 struct dc *dc = clk_mgr_base->ctx->dc; 151 138 int i; 152 139 153 140 for (i = 0; i < dc->res_pool->pipe_count; ++i) { 154 - struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; 141 + struct pipe_ctx *pipe = safe_to_lower 142 + ? &context->res_ctx.pipe_ctx[i] 143 + : &dc->current_state->res_ctx.pipe_ctx[i]; 155 144 156 145 if (pipe->top_pipe || pipe->prev_odm_pipe) 157 146 continue; 158 147 if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) { 159 - struct stream_encoder *stream_enc = pipe->stream_res.stream_enc; 160 - 161 148 if (disable) { 162 - if (stream_enc && stream_enc->funcs->disable_fifo) 163 - pipe->stream_res.stream_enc->funcs->disable_fifo(stream_enc); 149 + if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc) 150 + pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); 164 151 165 - pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); 166 152 reset_sync_context_for_pipe(dc, context, i); 167 153 } else { 168 154 pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg); 169 - 170 - if (stream_enc && stream_enc->funcs->enable_fifo) 171 - pipe->stream_res.stream_enc->funcs->enable_fifo(stream_enc); 172 155 } 173 156 } 174 157 } 158 + } 159 + 160 + bool dcn314_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base) 161 + { 162 + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); 163 + uint32_t ssc_enable; 164 + 165 + REG_GET(CLK6_0_CLK6_spll_field_8, spll_ssc_en, &ssc_enable); 166 + 167 + return ssc_enable == 1; 168 + } 169 + 170 + void dcn314_init_clocks(struct clk_mgr *clk_mgr) 171 + { 172 + struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr); 173 + uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz; 174 + 175 + memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); 176 + // Assumption is that boot state always supports pstate 177 + clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk 178 + clk_mgr->clks.p_state_change_support = true; 179 + clk_mgr->clks.prev_p_state_change_support = true; 180 + clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN; 181 + clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN; 182 + 183 + // to adjust dp_dto reference clock if ssc is enable otherwise to apply dprefclk 184 + if (dcn314_is_spll_ssc_enabled(clk_mgr)) 185 + clk_mgr->dp_dto_source_clock_in_khz = 186 + dce_adjust_dp_ref_freq_for_ss(clk_mgr_int, clk_mgr->dprefclk_khz); 187 + else 188 + clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz; 175 189 } 176 190 177 191 void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, ··· 294 252 } 295 253 296 254 if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { 297 - dcn314_disable_otg_wa(clk_mgr_base, context, true); 255 + dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); 298 256 299 257 clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; 300 258 dcn314_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); 301 - dcn314_disable_otg_wa(clk_mgr_base, context, false); 259 + dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); 302 260 303 261 update_dispclk = true; 304 262 } ··· 477 435 static DpmClocks314_t dummy_clocks; 478 436 479 437 static struct dcn314_watermarks dummy_wms = { 0 }; 438 + 439 + static struct dcn314_ss_info_table ss_info_table = { 440 + .ss_divider = 1000, 441 + .ss_percentage = {0, 0, 375, 375, 375} 442 + }; 480 443 481 444 static void dcn314_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn314_watermarks *table) 482 445 { ··· 755 708 .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, 756 709 .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz, 757 710 .update_clocks = dcn314_update_clocks, 758 - .init_clocks = dcn31_init_clocks, 711 + .init_clocks = dcn314_init_clocks, 759 712 .enable_pme_wa = dcn314_enable_pme_wa, 760 713 .are_clock_states_equal = dcn314_are_clock_states_equal, 761 714 .notify_wm_ranges = dcn314_notify_wm_ranges 762 715 }; 763 716 extern struct clk_mgr_funcs dcn3_fpga_funcs; 717 + 718 + static void dcn314_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr) 719 + { 720 + uint32_t clock_source; 721 + //uint32_t ssc_enable; 722 + 723 + REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source); 724 + //REG_GET(CLK6_0_CLK6_spll_field_8, spll_ssc_en, &ssc_enable); 725 + 726 + if (dcn314_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) { 727 + clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source]; 728 + 729 + if (clk_mgr->dprefclk_ss_percentage != 0) { 730 + clk_mgr->ss_on_dprefclk = true; 731 + clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider; 732 + } 733 + } 734 + } 764 735 765 736 void dcn314_clk_mgr_construct( 766 737 struct dc_context *ctx, ··· 847 782 clk_mgr->base.base.dprefclk_khz = 600000; 848 783 clk_mgr->base.base.clks.ref_dtbclk_khz = 600000; 849 784 dce_clock_read_ss_info(&clk_mgr->base); 785 + dcn314_read_ss_info_from_lut(&clk_mgr->base); 850 786 /*if bios enabled SS, driver needs to adjust dtb clock, only enable with correct bios*/ 851 787 852 788 clk_mgr->base.base.bw_params = &dcn314_bw_params;

+11

drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h

··· 28 28 #define __DCN314_CLK_MGR_H__ 29 29 #include "clk_mgr_internal.h" 30 30 31 + #define DCN314_NUM_CLOCK_SOURCES 5 32 + 31 33 struct dcn314_watermarks; 32 34 33 35 struct dcn314_smu_watermark_set { ··· 42 40 struct dcn314_smu_watermark_set smu_wm_set; 43 41 }; 44 42 43 + struct dcn314_ss_info_table { 44 + uint32_t ss_divider; 45 + uint32_t ss_percentage[DCN314_NUM_CLOCK_SOURCES]; 46 + }; 47 + 45 48 bool dcn314_are_clock_states_equal(struct dc_clocks *a, 46 49 struct dc_clocks *b); 50 + 51 + bool dcn314_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base); 52 + 53 + void dcn314_init_clocks(struct clk_mgr *clk_mgr); 47 54 48 55 void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, 49 56 struct dc_state *context,

+12 -6

drivers/gpu/drm/amd/display/dc/core/dc.c

··· 411 411 * avoid conflicting with firmware updates. 412 412 */ 413 413 if (dc->ctx->dce_version > DCE_VERSION_MAX) 414 - if (dc->optimized_required) 414 + if (dc->optimized_required || dc->wm_optimized_required) 415 415 return false; 416 - 417 - if (!memcmp(&stream->adjust, adjust, sizeof(*adjust))) 418 - return true; 419 416 420 417 stream->adjust.v_total_max = adjust->v_total_max; 421 418 stream->adjust.v_total_mid = adjust->v_total_mid; ··· 2227 2230 } 2228 2231 2229 2232 dc->optimized_required = false; 2233 + dc->wm_optimized_required = false; 2230 2234 } 2231 2235 2232 2236 bool dc_set_generic_gpio_for_stereo(bool enable, ··· 2650 2652 } else if (memcmp(&dc->current_state->bw_ctx.bw.dcn.clk, &dc->clk_mgr->clks, offsetof(struct dc_clocks, prev_p_state_change_support)) != 0) { 2651 2653 dc->optimized_required = true; 2652 2654 } 2655 + 2656 + dc->optimized_required |= dc->wm_optimized_required; 2653 2657 } 2654 2658 2655 2659 return type; ··· 2858 2858 2859 2859 if (update->vrr_active_fixed) 2860 2860 stream->vrr_active_fixed = *update->vrr_active_fixed; 2861 + 2862 + if (update->crtc_timing_adjust) 2863 + stream->adjust = *update->crtc_timing_adjust; 2861 2864 2862 2865 if (update->dpms_off) 2863 2866 stream->dpms_off = *update->dpms_off; ··· 3522 3519 top_pipe_to_program = resource_get_otg_master_for_stream( 3523 3520 &context->res_ctx, 3524 3521 stream); 3525 - 3522 + ASSERT(top_pipe_to_program != NULL); 3526 3523 for (i = 0; i < dc->res_pool->pipe_count; i++) { 3527 3524 struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; 3528 3525 ··· 4291 4288 stream_update->mst_bw_update || 4292 4289 stream_update->func_shaper || 4293 4290 stream_update->lut3d_func || 4294 - stream_update->pending_test_pattern)) 4291 + stream_update->pending_test_pattern || 4292 + stream_update->crtc_timing_adjust)) 4295 4293 return true; 4296 4294 4297 4295 if (stream) { ··· 4345 4341 4346 4342 cur_pipe = resource_get_otg_master_for_stream(&dc->current_state->res_ctx, stream); 4347 4343 new_pipe = resource_get_otg_master_for_stream(&context->res_ctx, stream); 4344 + if (!cur_pipe || !new_pipe) 4345 + return false; 4348 4346 cur_is_odm_in_use = resource_get_odm_slice_count(cur_pipe) > 1; 4349 4347 new_is_odm_in_use = resource_get_odm_slice_count(new_pipe) > 1; 4350 4348 if (cur_is_odm_in_use == new_is_odm_in_use)

+4 -14

drivers/gpu/drm/amd/display/dc/core/dc_resource.c

··· 2194 2194 for (stream_idx = 0; stream_idx < state->stream_count; stream_idx++) { 2195 2195 otg_master = resource_get_otg_master_for_stream( 2196 2196 &state->res_ctx, state->streams[stream_idx]); 2197 + if (!otg_master || otg_master->stream_res.tg == NULL) { 2198 + DC_LOG_DC("topology update: otg_master NULL stream_idx %d!\n", stream_idx); 2199 + return; 2200 + } 2197 2201 slice_count = resource_get_opp_heads_for_otg_master(otg_master, 2198 2202 &state->res_ctx, opp_heads); 2199 2203 for (slice_idx = 0; slice_idx < slice_count; slice_idx++) { ··· 4988 4984 } 4989 4985 4990 4986 return DC_OK; 4991 - } 4992 - 4993 - bool resource_subvp_in_use(struct dc *dc, 4994 - struct dc_state *context) 4995 - { 4996 - uint32_t i; 4997 - 4998 - for (i = 0; i < dc->res_pool->pipe_count; i++) { 4999 - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; 5000 - 5001 - if (dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE) 5002 - return true; 5003 - } 5004 - return false; 5005 4987 } 5006 4988 5007 4989 bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_state *stream)

+5 -3

drivers/gpu/drm/amd/display/dc/core/dc_state.c

··· 267 267 state->clk_mgr = dc->clk_mgr; 268 268 269 269 /* Initialise DIG link encoder resource tracking variables. */ 270 - link_enc_cfg_init(dc, state); 270 + if (dc->res_pool) 271 + link_enc_cfg_init(dc, state); 271 272 } 272 273 273 274 void dc_state_destruct(struct dc_state *state) ··· 434 433 435 434 otg_master_pipe = resource_get_otg_master_for_stream( 436 435 &state->res_ctx, stream); 437 - added = resource_append_dpp_pipes_for_plane_composition(state, 438 - dc->current_state, pool, otg_master_pipe, plane_state); 436 + if (otg_master_pipe) 437 + added = resource_append_dpp_pipes_for_plane_composition(state, 438 + dc->current_state, pool, otg_master_pipe, plane_state); 439 439 440 440 if (added) { 441 441 stream_status->plane_states[stream_status->plane_count] =

+2 -1

drivers/gpu/drm/amd/display/dc/dc.h

··· 51 51 struct set_config_cmd_payload; 52 52 struct dmub_notification; 53 53 54 - #define DC_VER "3.2.265" 54 + #define DC_VER "3.2.266" 55 55 56 56 #define MAX_SURFACES 3 57 57 #define MAX_PLANES 6 ··· 1036 1036 1037 1037 /* Require to optimize clocks and bandwidth for added/removed planes */ 1038 1038 bool optimized_required; 1039 + bool wm_optimized_required; 1039 1040 bool idle_optimizations_allowed; 1040 1041 bool enable_c20_dtm_b0; 1041 1042

+2

drivers/gpu/drm/amd/display/dc/dc_stream.h

··· 139 139 uint32_t wb_update:1; 140 140 uint32_t dsc_changed : 1; 141 141 uint32_t mst_bw : 1; 142 + uint32_t crtc_timing_adjust : 1; 142 143 uint32_t fams_changed : 1; 143 144 } bits; 144 145 ··· 326 325 struct dc_3dlut *lut3d_func; 327 326 328 327 struct test_pattern *pending_test_pattern; 328 + struct dc_crtc_timing_adjust *crtc_timing_adjust; 329 329 }; 330 330 331 331 bool dc_is_stream_unchanged(

+7 -5

drivers/gpu/drm/amd/display/dc/dc_types.h

··· 1140 1140 } ilr; 1141 1141 }; 1142 1142 1143 + #define MAX_SINKS_PER_LINK 4 1144 + 1143 1145 /* 1144 1146 * USB4 DPIA BW ALLOCATION STRUCTS 1145 1147 */ 1146 1148 struct dc_dpia_bw_alloc { 1147 - int sink_verified_bw; // The Verified BW that sink can allocated and use that has been verified already 1148 - int sink_allocated_bw; // The Actual Allocated BW that sink currently allocated 1149 - int sink_max_bw; // The Max BW that sink can require/support 1149 + int remote_sink_req_bw[MAX_SINKS_PER_LINK]; // BW requested by remote sinks 1150 + int link_verified_bw; // The Verified BW that link can allocated and use that has been verified already 1151 + int link_max_bw; // The Max BW that link can require/support 1152 + int allocated_bw; // The Actual Allocated BW for this DPIA 1150 1153 int estimated_bw; // The estimated available BW for this DPIA 1151 1154 int bw_granularity; // BW Granularity 1155 + int dp_overhead; // DP overhead in dp tunneling 1152 1156 bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3: DP-Tx & Dpia & CM 1153 1157 bool response_ready; // Response ready from the CM side 1154 1158 uint8_t nrd_max_lane_count; // Non-reduced max lane count 1155 1159 uint8_t nrd_max_link_rate; // Non-reduced max link rate 1156 1160 }; 1157 - 1158 - #define MAX_SINKS_PER_LINK 4 1159 1161 1160 1162 enum dc_hpd_enable_select { 1161 1163 HPD_EN_FOR_ALL_EDP = 0,

+1 -1

drivers/gpu/drm/amd/display/dc/dce/dce_audio.c

··· 782 782 /*audio_dto_module = dpDtoSourceClockInkhz * 10,000; 783 783 * [khz] ->[100Hz] */ 784 784 azalia_clock_info->audio_dto_module = 785 - pll_info->dp_dto_source_clock_in_khz * 10; 785 + pll_info->audio_dto_source_clock_in_khz * 10; 786 786 } 787 787 788 788 void dce_aud_wall_dto_setup(

+8 -1

drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c

··· 975 975 look_up_in_video_optimized_rate_tlb(pix_clk_params->requested_pix_clk_100hz / 10); 976 976 struct bp_pixel_clock_parameters bp_pc_params = {0}; 977 977 enum transmitter_color_depth bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24; 978 + 979 + if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0) 980 + dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz; 978 981 // For these signal types Driver to program DP_DTO without calling VBIOS Command table 979 982 if (dc_is_dp_signal(pix_clk_params->signal_type) || dc_is_virtual_signal(pix_clk_params->signal_type)) { 980 983 if (e) { ··· 1091 1088 struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source); 1092 1089 unsigned int clock_hz = 0; 1093 1090 unsigned int modulo_hz = 0; 1091 + unsigned int dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dprefclk_khz; 1092 + 1093 + if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0) 1094 + dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz; 1094 1095 1095 1096 if (clock_source->id == CLOCK_SOURCE_ID_DP_DTO) { 1096 1097 clock_hz = REG_READ(PHASE[inst]); ··· 1107 1100 modulo_hz = REG_READ(MODULO[inst]); 1108 1101 if (modulo_hz) 1109 1102 *pixel_clk_khz = div_u64((uint64_t)clock_hz* 1110 - clock_source->ctx->dc->clk_mgr->dprefclk_khz*10, 1103 + dp_dto_ref_khz*10, 1111 1104 modulo_hz); 1112 1105 else 1113 1106 *pixel_clk_khz = 0;

+14

drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c

··· 183 183 return true; 184 184 } 185 185 186 + bool dcn32_subvp_in_use(struct dc *dc, 187 + struct dc_state *context) 188 + { 189 + uint32_t i; 190 + 191 + for (i = 0; i < dc->res_pool->pipe_count; i++) { 192 + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; 193 + 194 + if (dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE) 195 + return true; 196 + } 197 + return false; 198 + } 199 + 186 200 bool dcn32_mpo_in_use(struct dc_state *context) 187 201 { 188 202 uint32_t i;

+5 -6

drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c

··· 33 33 #include "dcn30/dcn30_resource.h" 34 34 #include "link.h" 35 35 #include "dc_state_priv.h" 36 - #include "resource.h" 37 36 38 37 #define DC_LOGGER_INIT(logger) 39 38 ··· 291 292 292 293 /* for subvp + DRR case, if subvp pipes are still present we support pstate */ 293 294 if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported && 294 - resource_subvp_in_use(dc, context)) 295 + dcn32_subvp_in_use(dc, context)) 295 296 vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = temp_clock_change_support; 296 297 297 298 if (vlevel < context->bw_ctx.dml.vba.soc.num_states && ··· 2272 2273 unsigned int dummy_latency_index = 0; 2273 2274 int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; 2274 2275 unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; 2275 - bool subvp_active = resource_subvp_in_use(dc, context); 2276 + bool subvp_in_use = dcn32_subvp_in_use(dc, context); 2276 2277 unsigned int min_dram_speed_mts_margin; 2277 2278 bool need_fclk_lat_as_dummy = false; 2278 2279 bool is_subvp_p_drr = false; ··· 2281 2282 dc_assert_fp_enabled(); 2282 2283 2283 2284 /* need to find dummy latency index for subvp */ 2284 - if (subvp_active) { 2285 + if (subvp_in_use) { 2285 2286 /* Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK */ 2286 2287 if (!pstate_en) { 2287 2288 context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp; ··· 2467 2468 dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16; 2468 2469 } 2469 2470 2470 - if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_active) { 2471 + if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_in_use) { 2471 2472 /* find largest table entry that is lower than dram speed, 2472 2473 * but lower than DPM0 still uses DPM0 2473 2474 */ ··· 3527 3528 void dcn32_override_min_req_memclk(struct dc *dc, struct dc_state *context) 3528 3529 { 3529 3530 // WA: restrict FPO and SubVP to use first non-strobe mode (DCN32 BW issue) 3530 - if ((context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || resource_subvp_in_use(dc, context)) && 3531 + if ((context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dcn32_subvp_in_use(dc, context)) && 3531 3532 dc->dml.soc.num_chans <= 8) { 3532 3533 int num_mclk_levels = dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels; 3533 3534

+3 -3

drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c

··· 166 166 .num_states = 5, 167 167 .sr_exit_time_us = 14.0, 168 168 .sr_enter_plus_exit_time_us = 16.0, 169 - .sr_exit_z8_time_us = 525.0, 170 - .sr_enter_plus_exit_z8_time_us = 715.0, 171 - .fclk_change_latency_us = 20.0, 169 + .sr_exit_z8_time_us = 210.0, 170 + .sr_enter_plus_exit_z8_time_us = 320.0, 171 + .fclk_change_latency_us = 24.0, 172 172 .usr_retraining_latency_us = 2, 173 173 .writeback_latency_us = 12.0, 174 174

+16 -16

drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c

··· 6229 6229 CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; 6230 6230 CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; 6231 6231 CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; 6232 - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; 6232 + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; 6233 6233 CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; 6234 6234 CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; 6235 6235 CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; ··· 6329 6329 mode_lib->ms.NoOfDPPThisState, 6330 6330 mode_lib->ms.dpte_group_bytes, 6331 6331 s->HostVMInefficiencyFactor, 6332 - mode_lib->ms.soc.hostvm_min_page_size_kbytes, 6332 + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, 6333 6333 mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); 6334 6334 6335 6335 s->NextMaxVStartup = s->MaxVStartupAllPlanes[j]; ··· 6542 6542 mode_lib->ms.cache_display_cfg.plane.HostVMEnable, 6543 6543 mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, 6544 6544 mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, 6545 - mode_lib->ms.soc.hostvm_min_page_size_kbytes, 6545 + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, 6546 6546 mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k], 6547 6547 mode_lib->ms.MetaRowBytes[j][k], 6548 6548 mode_lib->ms.DPTEBytesPerRow[j][k], ··· 7687 7687 CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; 7688 7688 CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; 7689 7689 CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; 7690 - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; 7690 + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; 7691 7691 CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; 7692 7692 CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; 7693 7693 CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState; ··· 7957 7957 UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; 7958 7958 UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; 7959 7959 UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; 7960 - UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; 7960 + UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; 7961 7961 UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; 7962 7962 UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; 7963 7963 UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal; ··· 8699 8699 CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; 8700 8700 CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; 8701 8701 CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; 8702 - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; 8702 + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; 8703 8703 CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; 8704 8704 CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; 8705 8705 CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0]; ··· 8805 8805 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, 8806 8806 locals->dpte_group_bytes, 8807 8807 s->HostVMInefficiencyFactor, 8808 - mode_lib->ms.soc.hostvm_min_page_size_kbytes, 8808 + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, 8809 8809 mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); 8810 8810 8811 8811 locals->TCalc = 24.0 / locals->DCFCLKDeepSleep; ··· 8995 8995 CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; 8996 8996 CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; 8997 8997 CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; 8998 - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; 8998 + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; 8999 8999 CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; 9000 9000 CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; 9001 9001 CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; ··· 9240 9240 mode_lib->ms.cache_display_cfg.plane.HostVMEnable, 9241 9241 mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, 9242 9242 mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, 9243 - mode_lib->ms.soc.hostvm_min_page_size_kbytes, 9243 + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, 9244 9244 locals->PDEAndMetaPTEBytesFrame[k], 9245 9245 locals->MetaRowByte[k], 9246 9246 locals->PixelPTEBytesPerRow[k], ··· 9446 9446 CalculateWatermarks_params->CompressedBufferSizeInkByte = locals->CompressedBufferSizeInkByte; 9447 9447 9448 9448 // Output 9449 - CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark 9450 - CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[0]; 9451 - CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0][0]; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[] 9452 - CalculateWatermarks_params->SubViewportLinesNeededInMALL = &mode_lib->ms.SubViewportLinesNeededInMALL[j]; // dml_uint_t SubViewportLinesNeededInMALL[] 9453 - CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[0]; 9454 - CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // dml_float_t *MaxActiveFCLKChangeLatencySupported 9455 - CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[0]; 9449 + CalculateWatermarks_params->Watermark = &locals->Watermark; // Watermarks *Watermark 9450 + CalculateWatermarks_params->DRAMClockChangeSupport = &locals->DRAMClockChangeSupport; 9451 + CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = locals->MaxActiveDRAMClockChangeLatencySupported; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[] 9452 + CalculateWatermarks_params->SubViewportLinesNeededInMALL = locals->SubViewportLinesNeededInMALL; // dml_uint_t SubViewportLinesNeededInMALL[] 9453 + CalculateWatermarks_params->FCLKChangeSupport = &locals->FCLKChangeSupport; 9454 + CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &locals->MaxActiveFCLKChangeLatencySupported; // dml_float_t *MaxActiveFCLKChangeLatencySupported 9455 + CalculateWatermarks_params->USRRetrainingSupport = &locals->USRRetrainingSupport; 9456 9456 9457 9457 CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( 9458 9458 &mode_lib->scratch,

+1 -1

drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c

··· 626 626 if (is_dp2p0_output_encoder(pipe)) 627 627 out->OutputEncoder[location] = dml_dp2p0; 628 628 break; 629 - out->OutputEncoder[location] = dml_edp; 630 629 case SIGNAL_TYPE_EDP: 630 + out->OutputEncoder[location] = dml_edp; 631 631 break; 632 632 case SIGNAL_TYPE_HDMI_TYPE_A: 633 633 case SIGNAL_TYPE_DVI_SINGLE_LINK:

+1 -1

drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c

··· 1354 1354 if (state->clk_mgr && 1355 1355 (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT || 1356 1356 pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)) { 1357 - audio_output->pll_info.dp_dto_source_clock_in_khz = 1357 + audio_output->pll_info.audio_dto_source_clock_in_khz = 1358 1358 state->clk_mgr->funcs->get_dp_ref_clk_frequency( 1359 1359 state->clk_mgr); 1360 1360 }

+1 -1

drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c

··· 3076 3076 context, 3077 3077 false); 3078 3078 3079 - dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub, 3079 + dc->wm_optimized_required = hubbub->funcs->program_watermarks(hubbub, 3080 3080 &context->bw_ctx.bw.dcn.watermarks, 3081 3081 dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, 3082 3082 true);

+4 -41

drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c

··· 1882 1882 } 1883 1883 } 1884 1884 1885 - static void update_vmin_vmax_fams(struct dc *dc, 1886 - struct dc_state *context) 1887 - { 1888 - uint32_t i; 1889 - struct drr_params params = {0}; 1890 - bool subvp_in_use = resource_subvp_in_use(dc, context); 1891 - 1892 - for (i = 0; i < dc->res_pool->pipe_count; i++) { 1893 - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; 1894 - 1895 - if (resource_is_pipe_type(pipe, OTG_MASTER) && 1896 - ((subvp_in_use && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM && 1897 - pipe->stream->allow_freesync) || (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && pipe->stream->fpo_in_use))) { 1898 - if (!pipe->stream->vrr_active_variable && !pipe->stream->vrr_active_fixed) { 1899 - struct timing_generator *tg = context->res_ctx.pipe_ctx[i].stream_res.tg; 1900 - 1901 - /* DRR should be configured already if we're in active variable 1902 - * or active fixed, so only program if we're not in this state 1903 - */ 1904 - params.vertical_total_min = pipe->stream->timing.v_total; 1905 - params.vertical_total_max = pipe->stream->timing.v_total; 1906 - tg->funcs->set_drr(tg, &params); 1907 - } 1908 - } else { 1909 - if (resource_is_pipe_type(pipe, OTG_MASTER) && 1910 - !pipe->stream->vrr_active_variable && 1911 - !pipe->stream->vrr_active_fixed) { 1912 - struct timing_generator *tg = context->res_ctx.pipe_ctx[i].stream_res.tg; 1913 - params.vertical_total_min = 0; 1914 - params.vertical_total_max = 0; 1915 - tg->funcs->set_drr(tg, &params); 1916 - } 1917 - } 1918 - } 1919 - } 1920 - 1921 1885 void dcn20_program_front_end_for_ctx( 1922 1886 struct dc *dc, 1923 1887 struct dc_state *context) ··· 1958 1994 && context->res_ctx.pipe_ctx[i].stream) 1959 1995 hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true); 1960 1996 1961 - update_vmin_vmax_fams(dc, context); 1962 1997 1963 1998 /* Disconnect mpcc */ 1964 1999 for (i = 0; i < dc->res_pool->pipe_count; i++) ··· 2159 2196 } 2160 2197 2161 2198 /* program dchubbub watermarks: 2162 - * For assigning optimized_required, use |= operator since we don't want 2199 + * For assigning wm_optimized_required, use |= operator since we don't want 2163 2200 * to clear the value if the optimize has not happened yet 2164 2201 */ 2165 - dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub, 2202 + dc->wm_optimized_required |= hubbub->funcs->program_watermarks(hubbub, 2166 2203 &context->bw_ctx.bw.dcn.watermarks, 2167 2204 dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, 2168 2205 false); ··· 2175 2212 if (hubbub->funcs->program_compbuf_size) { 2176 2213 if (context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes) { 2177 2214 compbuf_size_kb = context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes; 2178 - dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes); 2215 + dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes); 2179 2216 } else { 2180 2217 compbuf_size_kb = context->bw_ctx.bw.dcn.compbuf_size_kb; 2181 - dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb); 2218 + dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb); 2182 2219 } 2183 2220 2184 2221 hubbub->funcs->program_compbuf_size(hubbub, compbuf_size_kb, false);

+1

drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h

··· 333 333 bool force_smu_not_present; 334 334 bool dc_mode_softmax_enabled; 335 335 int dprefclk_khz; // Used by program pixel clock in clock source funcs, need to figureout where this goes 336 + int dp_dto_source_clock_in_khz; // Used to program DP DTO with ss adjustment on DCN314 336 337 int dentist_vco_freq_khz; 337 338 struct clk_state_registers_and_bypass boot_snapshot; 338 339 struct clk_bw_params *bw_params;

-3

drivers/gpu/drm/amd/display/dc/inc/resource.h

··· 609 609 struct pipe_ctx *sec_pipe, 610 610 bool odm); 611 611 612 - bool resource_subvp_in_use(struct dc *dc, 613 - struct dc_state *context); 614 - 615 612 /* A test harness interface that modifies dp encoder resources in the given dc 616 613 * state and bypasses the need to revalidate. The interface assumes that the 617 614 * test harness interface is called with pre-validated link config stored in the

+40 -10

drivers/gpu/drm/amd/display/dc/link/link_dpms.c

··· 900 900 { 901 901 struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc; 902 902 struct dc_stream_state *stream = pipe_ctx->stream; 903 - DC_LOGGER_INIT(dsc->ctx->logger); 904 903 905 - if (!pipe_ctx->stream->timing.flags.DSC || !dsc) 904 + if (!pipe_ctx->stream->timing.flags.DSC) 906 905 return false; 906 + 907 + if (!dsc) 908 + return false; 909 + 910 + DC_LOGGER_INIT(dsc->ctx->logger); 907 911 908 912 if (enable) { 909 913 struct dsc_config dsc_cfg; ··· 2009 2005 } 2010 2006 } 2011 2007 2012 - /* 2013 - * If the link is DP-over-USB4 do the following: 2014 - * - Train with fallback when enabling DPIA link. Conventional links are 2008 + /* Train with fallback when enabling DPIA link. Conventional links are 2015 2009 * trained with fallback during sink detection. 2016 - * - Allocate only what the stream needs for bw in Gbps. Inform the CM 2017 - * in case stream needs more or less bw from what has been allocated 2018 - * earlier at plug time. 2019 2010 */ 2020 - if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { 2011 + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) 2021 2012 do_fallback = true; 2022 - } 2023 2013 2024 2014 /* 2025 2015 * Temporary w/a to get DP2.0 link rates to work with SST. ··· 2195 2197 return status; 2196 2198 } 2197 2199 2200 + static bool allocate_usb4_bandwidth_for_stream(struct dc_stream_state *stream, int bw) 2201 + { 2202 + return true; 2203 + } 2204 + 2205 + static bool allocate_usb4_bandwidth(struct dc_stream_state *stream) 2206 + { 2207 + bool ret; 2208 + 2209 + int bw = dc_bandwidth_in_kbps_from_timing(&stream->timing, 2210 + dc_link_get_highest_encoding_format(stream->sink->link)); 2211 + 2212 + ret = allocate_usb4_bandwidth_for_stream(stream, bw); 2213 + 2214 + return ret; 2215 + } 2216 + 2217 + static bool deallocate_usb4_bandwidth(struct dc_stream_state *stream) 2218 + { 2219 + bool ret; 2220 + 2221 + ret = allocate_usb4_bandwidth_for_stream(stream, 0); 2222 + 2223 + return ret; 2224 + } 2225 + 2198 2226 void link_set_dpms_off(struct pipe_ctx *pipe_ctx) 2199 2227 { 2200 2228 struct dc *dc = pipe_ctx->stream->ctx->dc; ··· 2255 2231 2256 2232 update_psp_stream_config(pipe_ctx, true); 2257 2233 dc->hwss.blank_stream(pipe_ctx); 2234 + 2235 + if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) 2236 + deallocate_usb4_bandwidth(pipe_ctx->stream); 2258 2237 2259 2238 if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) 2260 2239 deallocate_mst_payload(pipe_ctx); ··· 2500 2473 link_set_dsc_pps_packet(pipe_ctx, true, true); 2501 2474 } 2502 2475 } 2476 + 2477 + if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) 2478 + allocate_usb4_bandwidth(pipe_ctx->stream); 2503 2479 2504 2480 if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) 2505 2481 allocate_mst_payload(pipe_ctx);

+49 -11

drivers/gpu/drm/amd/display/dc/link/link_validation.c

··· 346 346 return DC_OK; 347 347 } 348 348 349 + /* 350 + * This function calculates the bandwidth required for the stream timing 351 + * and aggregates the stream bandwidth for the respective dpia link 352 + * 353 + * @stream: pointer to the dc_stream_state struct instance 354 + * @num_streams: number of streams to be validated 355 + * 356 + * return: true if validation is succeeded 357 + */ 349 358 bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const unsigned int num_streams) 350 359 { 351 - bool ret = true; 352 - int bw_needed[MAX_DPIA_NUM]; 353 - struct dc_link *link[MAX_DPIA_NUM]; 354 - 355 - if (!num_streams || num_streams > MAX_DPIA_NUM) 356 - return ret; 360 + int bw_needed[MAX_DPIA_NUM] = {0}; 361 + struct dc_link *dpia_link[MAX_DPIA_NUM] = {0}; 362 + int num_dpias = 0; 357 363 358 364 for (uint8_t i = 0; i < num_streams; ++i) { 365 + if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT) { 366 + /* new dpia sst stream, check whether it exceeds max dpia */ 367 + if (num_dpias >= MAX_DPIA_NUM) 368 + return false; 359 369 360 - link[i] = stream[i].link; 361 - bw_needed[i] = dc_bandwidth_in_kbps_from_timing(&stream[i].timing, 362 - dc_link_get_highest_encoding_format(link[i])); 370 + dpia_link[num_dpias] = stream[i].link; 371 + bw_needed[num_dpias] = dc_bandwidth_in_kbps_from_timing(&stream[i].timing, 372 + dc_link_get_highest_encoding_format(dpia_link[num_dpias])); 373 + num_dpias++; 374 + } else if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { 375 + uint8_t j = 0; 376 + /* check whether its a known dpia link */ 377 + for (; j < num_dpias; ++j) { 378 + if (dpia_link[j] == stream[i].link) 379 + break; 380 + } 381 + 382 + if (j == num_dpias) { 383 + /* new dpia mst stream, check whether it exceeds max dpia */ 384 + if (num_dpias >= MAX_DPIA_NUM) 385 + return false; 386 + else { 387 + dpia_link[j] = stream[i].link; 388 + num_dpias++; 389 + } 390 + } 391 + 392 + bw_needed[j] += dc_bandwidth_in_kbps_from_timing(&stream[i].timing, 393 + dc_link_get_highest_encoding_format(dpia_link[j])); 394 + } 363 395 } 364 396 365 - ret = dpia_validate_usb4_bw(link, bw_needed, num_streams); 397 + /* Include dp overheads */ 398 + for (uint8_t i = 0; i < num_dpias; ++i) { 399 + int dp_overhead = 0; 366 400 367 - return ret; 401 + dp_overhead = link_dp_dpia_get_dp_overhead_in_dp_tunneling(dpia_link[i]); 402 + bw_needed[i] += dp_overhead; 403 + } 404 + 405 + return dpia_validate_usb4_bw(dpia_link, bw_needed, num_dpias); 368 406 }

+21 -13

drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c

··· 82 82 { 83 83 union dmub_rb_cmd cmd = {0}; 84 84 struct dc_dmub_srv *dmub_srv = link->ctx->dmub_srv; 85 - bool is_hpd_high = false; 86 85 87 86 /* prepare QUERY_HPD command */ 88 87 cmd.query_hpd.header.type = DMUB_CMD__QUERY_HPD_STATE; 89 88 cmd.query_hpd.data.instance = link->link_id.enum_id - ENUM_ID_1; 90 89 cmd.query_hpd.data.ch_type = AUX_CHANNEL_DPIA; 91 90 92 - /* Return HPD status reported by DMUB if query successfully executed. */ 93 - if (dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && 94 - cmd.query_hpd.data.status == AUX_RET_SUCCESS) 95 - is_hpd_high = cmd.query_hpd.data.result; 91 + /* Query dpia hpd status from dmub */ 92 + if (dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, 93 + DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && 94 + cmd.query_hpd.data.status == AUX_RET_SUCCESS) { 95 + DC_LOG_DEBUG("%s: for link(%d) dpia(%d) success, current_hpd_status(%d) new_hpd_status(%d)\n", 96 + __func__, 97 + link->link_index, 98 + link->link_id.enum_id - ENUM_ID_1, 99 + link->hpd_status, 100 + cmd.query_hpd.data.result); 101 + link->hpd_status = cmd.query_hpd.data.result; 102 + } else { 103 + DC_LOG_ERROR("%s: for link(%d) dpia(%d) failed with status(%d), current_hpd_status(%d) new_hpd_status(0)\n", 104 + __func__, 105 + link->link_index, 106 + link->link_id.enum_id - ENUM_ID_1, 107 + cmd.query_hpd.data.status, 108 + link->hpd_status); 109 + link->hpd_status = false; 110 + } 96 111 97 - DC_LOG_DEBUG("%s: link(%d) dpia(%d) cmd_status(%d) result(%d)\n", 98 - __func__, 99 - link->link_index, 100 - link->link_id.enum_id - ENUM_ID_1, 101 - cmd.query_hpd.data.status, 102 - cmd.query_hpd.data.result); 103 - 104 - return is_hpd_high; 112 + return link->hpd_status; 105 113 } 106 114

+46 -14

drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c

··· 54 54 static void reset_bw_alloc_struct(struct dc_link *link) 55 55 { 56 56 link->dpia_bw_alloc_config.bw_alloc_enabled = false; 57 - link->dpia_bw_alloc_config.sink_verified_bw = 0; 58 - link->dpia_bw_alloc_config.sink_max_bw = 0; 57 + link->dpia_bw_alloc_config.link_verified_bw = 0; 58 + link->dpia_bw_alloc_config.link_max_bw = 0; 59 + link->dpia_bw_alloc_config.allocated_bw = 0; 59 60 link->dpia_bw_alloc_config.estimated_bw = 0; 60 61 link->dpia_bw_alloc_config.bw_granularity = 0; 62 + link->dpia_bw_alloc_config.dp_overhead = 0; 61 63 link->dpia_bw_alloc_config.response_ready = false; 62 - link->dpia_bw_alloc_config.sink_allocated_bw = 0; 64 + link->dpia_bw_alloc_config.nrd_max_lane_count = 0; 65 + link->dpia_bw_alloc_config.nrd_max_link_rate = 0; 66 + for (int i = 0; i < MAX_SINKS_PER_LINK; i++) 67 + link->dpia_bw_alloc_config.remote_sink_req_bw[i] = 0; 68 + DC_LOG_DEBUG("reset usb4 bw alloc of link(%d)\n", link->link_index); 63 69 } 64 70 65 71 #define BW_GRANULARITY_0 4 // 0.25 Gbps ··· 216 210 link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) && 217 211 (link_dpia_secondary->hpd_status && 218 212 link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled)) { 219 - total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw + 220 - link_dpia_secondary->dpia_bw_alloc_config.sink_allocated_bw; 213 + total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw + 214 + link_dpia_secondary->dpia_bw_alloc_config.allocated_bw; 221 215 } else if (link_dpia_primary->hpd_status && 222 216 link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) { 223 217 total_bw = link_dpia_primary->dpia_bw_alloc_config.estimated_bw; ··· 270 264 271 265 /* Error check whether requested and allocated are equal */ 272 266 req_bw = requested_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); 273 - if (req_bw == link->dpia_bw_alloc_config.sink_allocated_bw) { 267 + if (req_bw == link->dpia_bw_alloc_config.allocated_bw) { 274 268 DC_LOG_ERROR("%s: Request bw equals to allocated bw for link(%d)\n", 275 269 __func__, link->link_index); 276 270 } ··· 393 387 DC_LOG_DEBUG("%s: BW REQ SUCCESS for DP-TX Request for link(%d)\n", 394 388 __func__, link->link_index); 395 389 DC_LOG_DEBUG("%s: current allocated_bw(%d), new allocated_bw(%d)\n", 396 - __func__, link->dpia_bw_alloc_config.sink_allocated_bw, bw_needed); 390 + __func__, link->dpia_bw_alloc_config.allocated_bw, bw_needed); 397 391 398 - link->dpia_bw_alloc_config.sink_allocated_bw = bw_needed; 392 + link->dpia_bw_alloc_config.allocated_bw = bw_needed; 399 393 400 394 link->dpia_bw_alloc_config.response_ready = true; 401 395 break; ··· 433 427 if (link->hpd_status && peak_bw > 0) { 434 428 435 429 // If DP over USB4 then we need to check BW allocation 436 - link->dpia_bw_alloc_config.sink_max_bw = peak_bw; 437 - set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.sink_max_bw); 430 + link->dpia_bw_alloc_config.link_max_bw = peak_bw; 431 + set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.link_max_bw); 438 432 439 433 do { 440 434 if (timeout > 0) ··· 446 440 447 441 if (!timeout) 448 442 ret = 0;// ERROR TIMEOUT waiting for response for allocating bw 449 - else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0) 450 - ret = link->dpia_bw_alloc_config.sink_allocated_bw; 443 + else if (link->dpia_bw_alloc_config.allocated_bw > 0) 444 + ret = link->dpia_bw_alloc_config.allocated_bw; 451 445 } 452 446 //2. Cold Unplug 453 447 else if (!link->hpd_status) ··· 456 450 out: 457 451 return ret; 458 452 } 459 - 460 453 bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw) 461 454 { 462 455 bool ret = false; ··· 463 458 464 459 DC_LOG_DEBUG("%s: ENTER: link(%d), hpd_status(%d), current allocated_bw(%d), req_bw(%d)\n", 465 460 __func__, link->link_index, link->hpd_status, 466 - link->dpia_bw_alloc_config.sink_allocated_bw, req_bw); 461 + link->dpia_bw_alloc_config.allocated_bw, req_bw); 467 462 468 463 if (!get_bw_alloc_proceed_flag(link)) 469 464 goto out; ··· 527 522 } 528 523 529 524 return ret; 525 + } 526 + 527 + int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link) 528 + { 529 + int dp_overhead = 0, link_mst_overhead = 0; 530 + 531 + if (!get_bw_alloc_proceed_flag((link))) 532 + return dp_overhead; 533 + 534 + /* if its mst link, add MTPH overhead */ 535 + if ((link->type == dc_connection_mst_branch) && 536 + !link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) { 537 + /* For 8b/10b encoding: MTP is 64 time slots long, slot 0 is used for MTPH 538 + * MST overhead is 1/64 of link bandwidth (excluding any overhead) 539 + */ 540 + const struct dc_link_settings *link_cap = 541 + dc_link_get_link_cap(link); 542 + uint32_t link_bw_in_kbps = (uint32_t)link_cap->link_rate * 543 + (uint32_t)link_cap->lane_count * 544 + LINK_RATE_REF_FREQ_IN_KHZ * 8; 545 + link_mst_overhead = (link_bw_in_kbps / 64) + ((link_bw_in_kbps % 64) ? 1 : 0); 546 + } 547 + 548 + /* add all the overheads */ 549 + dp_overhead = link_mst_overhead; 550 + 551 + return dp_overhead; 530 552 }

+9

drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h

··· 99 99 */ 100 100 bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed, const unsigned int num_dpias); 101 101 102 + /* 103 + * Obtain all the DP overheads in dp tunneling for the dpia link 104 + * 105 + * @link: pointer to the dc_link struct instance 106 + * 107 + * return: DP overheads in DP tunneling 108 + */ 109 + int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link); 110 + 102 111 #endif /* DC_INC_LINK_DP_DPIA_BW_H_ */

+7 -4

drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c

··· 930 930 bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream) 931 931 { 932 932 /* To-do: Setup Replay */ 933 - struct dc *dc = link->ctx->dc; 934 - struct dmub_replay *replay = dc->res_pool->replay; 933 + struct dc *dc; 934 + struct dmub_replay *replay; 935 935 int i; 936 936 unsigned int panel_inst; 937 937 struct replay_context replay_context = { 0 }; ··· 946 946 947 947 if (!link) 948 948 return false; 949 + 950 + dc = link->ctx->dc; 951 + 952 + replay = dc->res_pool->replay; 949 953 950 954 if (!replay) 951 955 return false; ··· 979 975 980 976 replay_context.line_time_in_ns = lineTimeInNs; 981 977 982 - if (replay) 983 - link->replay_settings.replay_feature_enabled = 978 + link->replay_settings.replay_feature_enabled = 984 979 replay->funcs->replay_copy_settings(replay, link, &replay_context, panel_inst); 985 980 if (link->replay_settings.replay_feature_enabled) { 986 981

+16 -6

drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c

··· 166 166 { 167 167 struct optc *optc1 = DCN10TG_FROM_TG(optc); 168 168 169 - /* disable otg request until end of the first line 170 - * in the vertical blank region 171 - */ 172 - REG_UPDATE(OTG_CONTROL, 173 - OTG_MASTER_EN, 0); 174 - 175 169 REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT, 176 170 OPTC_SEG0_SRC_SEL, 0xf, 177 171 OPTC_SEG1_SRC_SEL, 0xf, 178 172 OPTC_SEG2_SRC_SEL, 0xf, 179 173 OPTC_SEG3_SRC_SEL, 0xf, 180 174 OPTC_NUM_OF_INPUT_SEGMENT, 0); 175 + 176 + REG_UPDATE(OPTC_MEMORY_CONFIG, 177 + OPTC_MEM_SEL, 0); 178 + 179 + /* disable otg request until end of the first line 180 + * in the vertical blank region 181 + */ 182 + REG_UPDATE(OTG_CONTROL, 183 + OTG_MASTER_EN, 0); 181 184 182 185 REG_UPDATE(CONTROL, 183 186 VTG0_ENABLE, 0); ··· 207 204 static void optc32_disable_phantom_otg(struct timing_generator *optc) 208 205 { 209 206 struct optc *optc1 = DCN10TG_FROM_TG(optc); 207 + 208 + REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT, 209 + OPTC_SEG0_SRC_SEL, 0xf, 210 + OPTC_SEG1_SRC_SEL, 0xf, 211 + OPTC_SEG2_SRC_SEL, 0xf, 212 + OPTC_SEG3_SRC_SEL, 0xf, 213 + OPTC_NUM_OF_INPUT_SEGMENT, 0); 210 214 211 215 REG_UPDATE(OTG_CONTROL, OTG_MASTER_EN, 0); 212 216 }

+9 -6

drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c

··· 138 138 { 139 139 struct optc *optc1 = DCN10TG_FROM_TG(optc); 140 140 141 - /* disable otg request until end of the first line 142 - * in the vertical blank region 143 - */ 144 - REG_UPDATE(OTG_CONTROL, 145 - OTG_MASTER_EN, 0); 146 - 147 141 REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT, 148 142 OPTC_SEG0_SRC_SEL, 0xf, 149 143 OPTC_SEG1_SRC_SEL, 0xf, 150 144 OPTC_SEG2_SRC_SEL, 0xf, 151 145 OPTC_SEG3_SRC_SEL, 0xf, 152 146 OPTC_NUM_OF_INPUT_SEGMENT, 0); 147 + 148 + REG_UPDATE(OPTC_MEMORY_CONFIG, 149 + OPTC_MEM_SEL, 0); 150 + 151 + /* disable otg request until end of the first line 152 + * in the vertical blank region 153 + */ 154 + REG_UPDATE(OTG_CONTROL, 155 + OTG_MASTER_EN, 0); 153 156 154 157 REG_UPDATE(CONTROL, 155 158 VTG0_ENABLE, 0);

+1 -1

drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c

··· 1899 1899 1900 1900 static struct dc_cap_funcs cap_funcs = { 1901 1901 .get_dcc_compression_cap = dcn20_get_dcc_compression_cap, 1902 - .get_subvp_en = resource_subvp_in_use, 1902 + .get_subvp_en = dcn32_subvp_in_use, 1903 1903 }; 1904 1904 1905 1905 void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context,

+3

drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h

··· 131 131 bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc, 132 132 struct dc_state *context); 133 133 134 + bool dcn32_subvp_in_use(struct dc *dc, 135 + struct dc_state *context); 136 + 134 137 bool dcn32_mpo_in_use(struct dc_state *context); 135 138 136 139 bool dcn32_any_surfaces_rotated(struct dc *dc, struct dc_state *context);

+1 -1

drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c

··· 1574 1574 1575 1575 static struct dc_cap_funcs cap_funcs = { 1576 1576 .get_dcc_compression_cap = dcn20_get_dcc_compression_cap, 1577 - .get_subvp_en = resource_subvp_in_use, 1577 + .get_subvp_en = dcn32_subvp_in_use, 1578 1578 }; 1579 1579 1580 1580 static void dcn321_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)

+1 -1

drivers/gpu/drm/amd/display/include/audio_types.h

··· 64 64 /* PLL information required for AZALIA DTO calculation */ 65 65 66 66 struct audio_pll_info { 67 - uint32_t dp_dto_source_clock_in_khz; 67 + uint32_t audio_dto_source_clock_in_khz; 68 68 uint32_t feed_back_divider; 69 69 enum audio_dto_source dto_source; 70 70 bool ss_enabled;

+4 -4

drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h

··· 8707 8707 #define regBIF_BX1_MM_CFGREGS_CNTL_BASE_IDX 2 8708 8708 #define regBIF_BX1_BX_RESET_CNTL 0x00f0 8709 8709 #define regBIF_BX1_BX_RESET_CNTL_BASE_IDX 2 8710 - #define regBIF_BX1_INTERRUPT_CNTL 0x8e11 8711 - #define regBIF_BX1_INTERRUPT_CNTL_BASE_IDX 5 8712 - #define regBIF_BX1_INTERRUPT_CNTL2 0x8e12 8713 - #define regBIF_BX1_INTERRUPT_CNTL2_BASE_IDX 5 8710 + #define regBIF_BX1_INTERRUPT_CNTL 0x00f1 8711 + #define regBIF_BX1_INTERRUPT_CNTL_BASE_IDX 2 8712 + #define regBIF_BX1_INTERRUPT_CNTL2 0x00f2 8713 + #define regBIF_BX1_INTERRUPT_CNTL2_BASE_IDX 2 8714 8714 #define regBIF_BX1_CLKREQB_PAD_CNTL 0x00f8 8715 8715 #define regBIF_BX1_CLKREQB_PAD_CNTL_BASE_IDX 2 8716 8716 #define regBIF_BX1_BIF_FEATURES_CONTROL_MISC 0x00fb

+18 -10

drivers/gpu/drm/amd/pm/amdgpu_pm.c

··· 4349 4349 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size)) 4350 4350 seq_printf(m, "\t%u mV (VDDNB)\n", value); 4351 4351 size = sizeof(uint32_t); 4352 - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&query, &size)) 4353 - seq_printf(m, "\t%u.%02u W (average GPU)\n", query >> 8, query & 0xff); 4352 + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&query, &size)) { 4353 + if (adev->flags & AMD_IS_APU) 4354 + seq_printf(m, "\t%u.%02u W (average SoC including CPU)\n", query >> 8, query & 0xff); 4355 + else 4356 + seq_printf(m, "\t%u.%02u W (average SoC)\n", query >> 8, query & 0xff); 4357 + } 4354 4358 size = sizeof(uint32_t); 4355 - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER, (void *)&query, &size)) 4356 - seq_printf(m, "\t%u.%02u W (current GPU)\n", query >> 8, query & 0xff); 4359 + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER, (void *)&query, &size)) { 4360 + if (adev->flags & AMD_IS_APU) 4361 + seq_printf(m, "\t%u.%02u W (current SoC including CPU)\n", query >> 8, query & 0xff); 4362 + else 4363 + seq_printf(m, "\t%u.%02u W (current SoC)\n", query >> 8, query & 0xff); 4364 + } 4357 4365 size = sizeof(value); 4358 4366 seq_printf(m, "\n"); 4359 4367 ··· 4387 4379 /* VCN clocks */ 4388 4380 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCN_POWER_STATE, (void *)&value, &size)) { 4389 4381 if (!value) { 4390 - seq_printf(m, "VCN: Disabled\n"); 4382 + seq_printf(m, "VCN: Powered down\n"); 4391 4383 } else { 4392 - seq_printf(m, "VCN: Enabled\n"); 4384 + seq_printf(m, "VCN: Powered up\n"); 4393 4385 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size)) 4394 4386 seq_printf(m, "\t%u MHz (DCLK)\n", value/100); 4395 4387 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size)) ··· 4401 4393 /* UVD clocks */ 4402 4394 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, (void *)&value, &size)) { 4403 4395 if (!value) { 4404 - seq_printf(m, "UVD: Disabled\n"); 4396 + seq_printf(m, "UVD: Powered down\n"); 4405 4397 } else { 4406 - seq_printf(m, "UVD: Enabled\n"); 4398 + seq_printf(m, "UVD: Powered up\n"); 4407 4399 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size)) 4408 4400 seq_printf(m, "\t%u MHz (DCLK)\n", value/100); 4409 4401 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size)) ··· 4415 4407 /* VCE clocks */ 4416 4408 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_POWER, (void *)&value, &size)) { 4417 4409 if (!value) { 4418 - seq_printf(m, "VCE: Disabled\n"); 4410 + seq_printf(m, "VCE: Powered down\n"); 4419 4411 } else { 4420 - seq_printf(m, "VCE: Enabled\n"); 4412 + seq_printf(m, "VCE: Powered up\n"); 4421 4413 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_ECCLK, (void *)&value, &size)) 4422 4414 seq_printf(m, "\t%u MHz (ECCLK)\n", value/100); 4423 4415 }

+1 -1

drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c

··· 200 200 struct pp_hwmgr *hwmgr, 201 201 ATOM_Tonga_PPM_Table *atom_ppm_table) 202 202 { 203 - struct phm_ppm_table *ptr = kzalloc(sizeof(ATOM_Tonga_PPM_Table), GFP_KERNEL); 203 + struct phm_ppm_table *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); 204 204 struct phm_ppt_v1_information *pp_table_information = 205 205 (struct phm_ppt_v1_information *)(hwmgr->pptable); 206 206

+16 -1

drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c

··· 3999 3999 uint32_t sclk, mclk, activity_percent; 4000 4000 uint32_t offset, val_vid; 4001 4001 struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); 4002 + struct amdgpu_device *adev = hwmgr->adev; 4002 4003 4003 4004 /* size must be at least 4 bytes for all sensors */ 4004 4005 if (*size < 4) ··· 4043 4042 *size = 4; 4044 4043 return 0; 4045 4044 case AMDGPU_PP_SENSOR_GPU_INPUT_POWER: 4046 - return smu7_get_gpu_power(hwmgr, (uint32_t *)value); 4045 + if ((adev->asic_type != CHIP_HAWAII) && 4046 + (adev->asic_type != CHIP_BONAIRE) && 4047 + (adev->asic_type != CHIP_FIJI) && 4048 + (adev->asic_type != CHIP_TONGA)) 4049 + return smu7_get_gpu_power(hwmgr, (uint32_t *)value); 4050 + else 4051 + return -EOPNOTSUPP; 4052 + case AMDGPU_PP_SENSOR_GPU_AVG_POWER: 4053 + if ((adev->asic_type != CHIP_HAWAII) && 4054 + (adev->asic_type != CHIP_BONAIRE) && 4055 + (adev->asic_type != CHIP_FIJI) && 4056 + (adev->asic_type != CHIP_TONGA)) 4057 + return -EOPNOTSUPP; 4058 + else 4059 + return smu7_get_gpu_power(hwmgr, (uint32_t *)value); 4047 4060 case AMDGPU_PP_SENSOR_VDDGFX: 4048 4061 if ((data->vr_config & VRCONF_VDDGFX_MASK) == 4049 4062 (VR_SVI2_PLANE_2 << VRCONF_VDDGFX_SHIFT))

+9 -6

drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c

··· 970 970 if (i < (clocks.num_levels - 1)) 971 971 clk2 = clocks.data[i + 1].clocks_in_khz / 1000; 972 972 973 - if (curr_clk >= clk1 && curr_clk < clk2) { 973 + if (curr_clk == clk1) { 974 + level = i; 975 + } else if (curr_clk >= clk1 && curr_clk < clk2) { 974 976 level = (curr_clk - clk1) <= (clk2 - curr_clk) ? 975 977 i : 976 978 i + 1; ··· 2236 2234 continue; 2237 2235 } 2238 2236 2239 - if (ret) { 2240 - dev_err(adev->dev, 2241 - "failed to send mode2 message \tparam: 0x%08x error code %d\n", 2242 - SMU_RESET_MODE_2, ret); 2237 + if (ret) 2243 2238 goto out; 2244 - } 2239 + 2245 2240 } while (ret == -ETIME && timeout); 2246 2241 2247 2242 out: 2248 2243 mutex_unlock(&smu->message_lock); 2244 + 2245 + if (ret) 2246 + dev_err(adev->dev, "failed to send mode2 reset, error code %d", 2247 + ret); 2249 2248 2250 2249 return ret; 2251 2250 }

+3

drivers/gpu/drm/nouveau/nouveau_vmm.c

··· 108 108 } else { 109 109 ret = nvif_vmm_get(&vmm->vmm, PTES, false, mem->mem.page, 0, 110 110 mem->mem.size, &tmp); 111 + if (ret) 112 + goto done; 113 + 111 114 vma->addr = tmp.addr; 112 115 } 113 116

+1 -1

drivers/gpu/drm/xe/Kconfig

··· 47 47 48 48 config DRM_XE_DISPLAY 49 49 bool "Enable display support" 50 - depends on DRM_XE && EXPERT && DRM_XE=m 50 + depends on DRM_XE && DRM_XE=m 51 51 select FB_IOMEM_HELPERS 52 52 select I2C 53 53 select I2C_ALGOBIT

-1

drivers/gpu/drm/xe/Makefile

··· 17 17 subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned) 18 18 subdir-ccflags-y += $(call cc-option, -Wformat-overflow) 19 19 subdir-ccflags-y += $(call cc-option, -Wformat-truncation) 20 - subdir-ccflags-y += $(call cc-option, -Wstringop-overflow) 21 20 subdir-ccflags-y += $(call cc-option, -Wstringop-truncation) 22 21 # The following turn off the warnings enabled by -Wextra 23 22 ifeq ($(findstring 2, $(KBUILD_EXTRA_WARN)),)

+2 -3

drivers/gpu/drm/xe/tests/xe_bo.c

··· 125 125 126 126 bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, 127 127 ttm_bo_type_device, bo_flags); 128 - 129 - xe_bo_lock(bo, false); 130 - 131 128 if (IS_ERR(bo)) { 132 129 KUNIT_FAIL(test, "Failed to create bo.\n"); 133 130 return; 134 131 } 132 + 133 + xe_bo_lock(bo, false); 135 134 136 135 kunit_info(test, "Verifying that CCS data is cleared on creation.\n"); 137 136 ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL,

+1 -1

drivers/gpu/drm/xe/tests/xe_migrate.c

··· 331 331 xe_res_first_sg(xe_bo_sg(pt), 0, pt->size, &src_it); 332 332 333 333 emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), false, 334 - &src_it, XE_PAGE_SIZE, pt); 334 + &src_it, XE_PAGE_SIZE, pt->ttm.resource); 335 335 336 336 run_sanity_job(m, xe, bb, bb->len, "Writing PTE for our fake PT", test); 337 337

+8 -8

drivers/gpu/drm/xe/xe_bo.c

··· 125 125 static void try_add_system(struct xe_device *xe, struct xe_bo *bo, 126 126 u32 bo_flags, u32 *c) 127 127 { 128 - xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); 129 - 130 128 if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) { 129 + xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); 130 + 131 131 bo->placements[*c] = (struct ttm_place) { 132 132 .mem_type = XE_PL_TT, 133 133 }; ··· 144 144 struct ttm_place place = { .mem_type = mem_type }; 145 145 struct xe_mem_region *vram; 146 146 u64 io_size; 147 + 148 + xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); 147 149 148 150 vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram; 149 151 xe_assert(xe, vram && vram->usable_size); ··· 177 175 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo, 178 176 u32 bo_flags, u32 *c) 179 177 { 180 - xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); 181 - 182 178 if (bo->props.preferred_gt == XE_GT1) { 183 179 if (bo_flags & XE_BO_CREATE_VRAM1_BIT) 184 180 add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); ··· 193 193 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo, 194 194 u32 bo_flags, u32 *c) 195 195 { 196 - xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); 197 - 198 196 if (bo_flags & XE_BO_CREATE_STOLEN_BIT) { 197 + xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); 198 + 199 199 bo->placements[*c] = (struct ttm_place) { 200 200 .mem_type = XE_PL_STOLEN, 201 201 .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT | ··· 442 442 443 443 if (vram->mapping && 444 444 mem->placement & TTM_PL_FLAG_CONTIGUOUS) 445 - mem->bus.addr = (u8 *)vram->mapping + 445 + mem->bus.addr = (u8 __force *)vram->mapping + 446 446 mem->bus.offset; 447 447 448 448 mem->bus.offset += vram->io_start; ··· 734 734 /* Create a new VMAP once kernel BO back in VRAM */ 735 735 if (!ret && resource_is_vram(new_mem)) { 736 736 struct xe_mem_region *vram = res_to_mem_region(new_mem); 737 - void *new_addr = vram->mapping + 737 + void __iomem *new_addr = vram->mapping + 738 738 (new_mem->start << PAGE_SHIFT); 739 739 740 740 if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {

+1 -1

drivers/gpu/drm/xe/xe_device.c

··· 484 484 485 485 err = xe_device_set_has_flat_ccs(xe); 486 486 if (err) 487 - return err; 487 + goto err_irq_shutdown; 488 488 489 489 err = xe_mmio_probe_vram(xe); 490 490 if (err)

+4 -4

drivers/gpu/drm/xe/xe_device_types.h

··· 97 97 */ 98 98 resource_size_t actual_physical_size; 99 99 /** @mapping: pointer to VRAM mappable space */ 100 - void *__iomem mapping; 100 + void __iomem *mapping; 101 101 }; 102 102 103 103 /** ··· 146 146 size_t size; 147 147 148 148 /** @regs: pointer to tile's MMIO space (starting with registers) */ 149 - void *regs; 149 + void __iomem *regs; 150 150 } mmio; 151 151 152 152 /** ··· 159 159 size_t size; 160 160 161 161 /** @regs: pointer to tile's additional MMIO-extension space */ 162 - void *regs; 162 + void __iomem *regs; 163 163 } mmio_ext; 164 164 165 165 /** @mem: memory management info for tile */ ··· 301 301 /** @size: size of MMIO space for device */ 302 302 size_t size; 303 303 /** @regs: pointer to MMIO space for device */ 304 - void *regs; 304 + void __iomem *regs; 305 305 } mmio; 306 306 307 307 /** @mem: memory info for device */

+4 -3

drivers/gpu/drm/xe/xe_exec.c

··· 115 115 struct xe_sched_job *job; 116 116 struct dma_fence *rebind_fence; 117 117 struct xe_vm *vm; 118 - bool write_locked; 118 + bool write_locked, skip_retry = false; 119 119 ktime_t end = 0; 120 120 int err = 0; 121 121 ··· 227 227 } 228 228 229 229 if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) { 230 - err = -EWOULDBLOCK; 230 + err = -EWOULDBLOCK; /* Aliased to -EAGAIN */ 231 + skip_retry = true; 231 232 goto err_exec; 232 233 } 233 234 ··· 338 337 up_write(&vm->lock); 339 338 else 340 339 up_read(&vm->lock); 341 - if (err == -EAGAIN) 340 + if (err == -EAGAIN && !skip_retry) 342 341 goto retry; 343 342 err_syncs: 344 343 for (i = 0; i < num_syncs; i++)

+5

drivers/gpu/drm/xe/xe_exec_queue.c

··· 67 67 q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; 68 68 q->sched_props.preempt_timeout_us = 69 69 hwe->eclass->sched_props.preempt_timeout_us; 70 + if (q->flags & EXEC_QUEUE_FLAG_KERNEL && 71 + q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY) 72 + q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL; 73 + else 74 + q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; 70 75 71 76 if (xe_exec_queue_is_parallel(q)) { 72 77 q->parallel.composite_fence_ctx = dma_fence_context_alloc(1);

+4 -2

drivers/gpu/drm/xe/xe_exec_queue_types.h

··· 52 52 struct xe_vm *vm; 53 53 /** @class: class of this exec queue */ 54 54 enum xe_engine_class class; 55 - /** @priority: priority of this exec queue */ 56 - enum xe_exec_queue_priority priority; 57 55 /** 58 56 * @logical_mask: logical mask of where job submitted to exec queue can run 59 57 */ ··· 82 84 #define EXEC_QUEUE_FLAG_VM BIT(4) 83 85 /* child of VM queue for multi-tile VM jobs */ 84 86 #define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(5) 87 + /* kernel exec_queue only, set priority to highest level */ 88 + #define EXEC_QUEUE_FLAG_HIGH_PRIORITY BIT(6) 85 89 86 90 /** 87 91 * @flags: flags for this exec queue, should statically setup aside from ban ··· 142 142 u32 timeslice_us; 143 143 /** @preempt_timeout_us: preemption timeout in micro-seconds */ 144 144 u32 preempt_timeout_us; 145 + /** @priority: priority of this exec queue */ 146 + enum xe_exec_queue_priority priority; 145 147 } sched_props; 146 148 147 149 /** @compute: compute exec queue state */

+3

drivers/gpu/drm/xe/xe_gt_freq.c

··· 196 196 struct xe_device *xe = gt_to_xe(gt); 197 197 int err; 198 198 199 + if (xe->info.skip_guc_pc) 200 + return; 201 + 199 202 gt->freq = kobject_create_and_add("freq0", gt->sysfs); 200 203 if (!gt->freq) { 201 204 drm_warn(&xe->drm, "failed to add freq0 directory to %s\n",

+6 -1

drivers/gpu/drm/xe/xe_guc.c

··· 60 60 61 61 static u32 guc_ctl_feature_flags(struct xe_guc *guc) 62 62 { 63 - return GUC_CTL_ENABLE_SLPC; 63 + u32 flags = 0; 64 + 65 + if (!guc_to_xe(guc)->info.skip_guc_pc) 66 + flags |= GUC_CTL_ENABLE_SLPC; 67 + 68 + return flags; 64 69 } 65 70 66 71 static u32 guc_ctl_log_params_flags(struct xe_guc *guc)

+3 -4

drivers/gpu/drm/xe/xe_guc_submit.c

··· 421 421 { 422 422 struct exec_queue_policy policy; 423 423 struct xe_device *xe = guc_to_xe(guc); 424 - enum xe_exec_queue_priority prio = q->priority; 424 + enum xe_exec_queue_priority prio = q->sched_props.priority; 425 425 u32 timeslice_us = q->sched_props.timeslice_us; 426 426 u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; 427 427 ··· 1231 1231 err = xe_sched_entity_init(&ge->entity, sched); 1232 1232 if (err) 1233 1233 goto err_sched; 1234 - q->priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; 1235 1234 1236 1235 if (xe_exec_queue_is_lr(q)) 1237 1236 INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup); ··· 1300 1301 { 1301 1302 struct xe_sched_msg *msg; 1302 1303 1303 - if (q->priority == priority || exec_queue_killed_or_banned(q)) 1304 + if (q->sched_props.priority == priority || exec_queue_killed_or_banned(q)) 1304 1305 return 0; 1305 1306 1306 1307 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1307 1308 if (!msg) 1308 1309 return -ENOMEM; 1309 1310 1311 + q->sched_props.priority = priority; 1310 1312 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 1311 - q->priority = priority; 1312 1313 1313 1314 return 0; 1314 1315 }

+81 -52

drivers/gpu/drm/xe/xe_migrate.c

··· 62 62 * out of the pt_bo. 63 63 */ 64 64 struct drm_suballoc_manager vm_update_sa; 65 + /** @min_chunk_size: For dgfx, Minimum chunk size */ 66 + u64 min_chunk_size; 65 67 }; 66 68 67 69 #define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */ ··· 346 344 347 345 m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe, 348 346 EXEC_QUEUE_FLAG_KERNEL | 349 - EXEC_QUEUE_FLAG_PERMANENT); 347 + EXEC_QUEUE_FLAG_PERMANENT | 348 + EXEC_QUEUE_FLAG_HIGH_PRIORITY); 350 349 } else { 351 350 m->q = xe_exec_queue_create_class(xe, primary_gt, vm, 352 351 XE_ENGINE_CLASS_COPY, ··· 358 355 xe_vm_close_and_put(vm); 359 356 return ERR_CAST(m->q); 360 357 } 361 - if (xe->info.has_usm) 362 - m->q->priority = XE_EXEC_QUEUE_PRIORITY_KERNEL; 363 358 364 359 mutex_init(&m->job_mutex); 365 360 366 361 err = drmm_add_action_or_reset(&xe->drm, xe_migrate_fini, m); 367 362 if (err) 368 363 return ERR_PTR(err); 364 + 365 + if (IS_DGFX(xe)) { 366 + if (xe_device_has_flat_ccs(xe)) 367 + /* min chunk size corresponds to 4K of CCS Metadata */ 368 + m->min_chunk_size = SZ_4K * SZ_64K / 369 + xe_device_ccs_bytes(xe, SZ_64K); 370 + else 371 + /* Somewhat arbitrary to avoid a huge amount of blits */ 372 + m->min_chunk_size = SZ_64K; 373 + m->min_chunk_size = roundup_pow_of_two(m->min_chunk_size); 374 + drm_dbg(&xe->drm, "Migrate min chunk size is 0x%08llx\n", 375 + (unsigned long long)m->min_chunk_size); 376 + } 369 377 370 378 return m; 371 379 } ··· 389 375 return MAX_PREEMPTDISABLE_TRANSFER; 390 376 } 391 377 392 - static u64 xe_migrate_res_sizes(struct xe_device *xe, struct xe_res_cursor *cur) 378 + static u64 xe_migrate_res_sizes(struct xe_migrate *m, struct xe_res_cursor *cur) 393 379 { 394 - /* 395 - * For VRAM we use identity mapped pages so we are limited to current 396 - * cursor size. For system we program the pages ourselves so we have no 397 - * such limitation. 398 - */ 399 - return min_t(u64, max_mem_transfer_per_pass(xe), 400 - mem_type_is_vram(cur->mem_type) ? cur->size : 401 - cur->remaining); 380 + struct xe_device *xe = tile_to_xe(m->tile); 381 + u64 size = min_t(u64, max_mem_transfer_per_pass(xe), cur->remaining); 382 + 383 + if (mem_type_is_vram(cur->mem_type)) { 384 + /* 385 + * VRAM we want to blit in chunks with sizes aligned to 386 + * min_chunk_size in order for the offset to CCS metadata to be 387 + * page-aligned. If it's the last chunk it may be smaller. 388 + * 389 + * Another constraint is that we need to limit the blit to 390 + * the VRAM block size, unless size is smaller than 391 + * min_chunk_size. 392 + */ 393 + u64 chunk = max_t(u64, cur->size, m->min_chunk_size); 394 + 395 + size = min_t(u64, size, chunk); 396 + if (size > m->min_chunk_size) 397 + size = round_down(size, m->min_chunk_size); 398 + } 399 + 400 + return size; 401 + } 402 + 403 + static bool xe_migrate_allow_identity(u64 size, const struct xe_res_cursor *cur) 404 + { 405 + /* If the chunk is not fragmented, allow identity map. */ 406 + return cur->size >= size; 402 407 } 403 408 404 409 static u32 pte_update_size(struct xe_migrate *m, ··· 430 397 u32 cmds = 0; 431 398 432 399 *L0_pt = pt_ofs; 433 - if (!is_vram) { 400 + if (is_vram && xe_migrate_allow_identity(*L0, cur)) { 401 + /* Offset into identity map. */ 402 + *L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile), 403 + cur->start + vram_region_gpu_offset(res)); 404 + cmds += cmd_size; 405 + } else { 434 406 /* Clip L0 to available size */ 435 407 u64 size = min(*L0, (u64)avail_pts * SZ_2M); 436 408 u64 num_4k_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE); ··· 451 413 452 414 /* Each chunk has a single blit command */ 453 415 cmds += cmd_size; 454 - } else { 455 - /* Offset into identity map. */ 456 - *L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile), 457 - cur->start + vram_region_gpu_offset(res)); 458 - cmds += cmd_size; 459 416 } 460 417 461 418 return cmds; ··· 460 427 struct xe_bb *bb, u32 at_pt, 461 428 bool is_vram, bool is_comp_pte, 462 429 struct xe_res_cursor *cur, 463 - u32 size, struct xe_bo *bo) 430 + u32 size, struct ttm_resource *res) 464 431 { 465 432 struct xe_device *xe = tile_to_xe(m->tile); 466 - 433 + struct xe_vm *vm = m->q->vm; 467 434 u16 pat_index; 468 435 u32 ptes; 469 436 u64 ofs = at_pt * XE_PAGE_SIZE; ··· 475 442 xe->pat.idx[XE_CACHE_NONE]; 476 443 else 477 444 pat_index = xe->pat.idx[XE_CACHE_WB]; 478 - 479 - /* 480 - * FIXME: Emitting VRAM PTEs to L0 PTs is forbidden. Currently 481 - * we're only emitting VRAM PTEs during sanity tests, so when 482 - * that's moved to a Kunit test, we should condition VRAM PTEs 483 - * on running tests. 484 - */ 485 445 486 446 ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE); 487 447 ··· 495 469 496 470 addr = xe_res_dma(cur) & PAGE_MASK; 497 471 if (is_vram) { 498 - /* Is this a 64K PTE entry? */ 499 - if ((m->q->vm->flags & XE_VM_FLAG_64K) && 500 - !(cur_ofs & (16 * 8 - 1))) { 501 - xe_tile_assert(m->tile, IS_ALIGNED(addr, SZ_64K)); 472 + if (vm->flags & XE_VM_FLAG_64K) { 473 + u64 va = cur_ofs * XE_PAGE_SIZE / 8; 474 + 475 + xe_assert(xe, (va & (SZ_64K - 1)) == 476 + (addr & (SZ_64K - 1))); 477 + 502 478 flags |= XE_PTE_PS64; 503 479 } 504 480 505 - addr += vram_region_gpu_offset(bo->ttm.resource); 481 + addr += vram_region_gpu_offset(res); 506 482 devmem = true; 507 483 } 508 484 509 - addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe, 510 - addr, pat_index, 511 - 0, devmem, flags); 485 + addr = vm->pt_ops->pte_encode_addr(m->tile->xe, 486 + addr, pat_index, 487 + 0, devmem, flags); 512 488 bb->cs[bb->len++] = lower_32_bits(addr); 513 489 bb->cs[bb->len++] = upper_32_bits(addr); 514 490 ··· 722 694 bool usm = xe->info.has_usm; 723 695 u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; 724 696 725 - src_L0 = xe_migrate_res_sizes(xe, &src_it); 726 - dst_L0 = xe_migrate_res_sizes(xe, &dst_it); 697 + src_L0 = xe_migrate_res_sizes(m, &src_it); 698 + dst_L0 = xe_migrate_res_sizes(m, &dst_it); 727 699 728 700 drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n", 729 701 pass++, src_L0, dst_L0); ··· 744 716 &ccs_ofs, &ccs_pt, 0, 745 717 2 * avail_pts, 746 718 avail_pts); 719 + xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE)); 747 720 } 748 721 749 722 /* Add copy commands size here */ ··· 757 728 goto err_sync; 758 729 } 759 730 760 - if (!src_is_vram) 761 - emit_pte(m, bb, src_L0_pt, src_is_vram, true, &src_it, src_L0, 762 - src_bo); 763 - else 731 + if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it)) 764 732 xe_res_next(&src_it, src_L0); 765 - 766 - if (!dst_is_vram) 767 - emit_pte(m, bb, dst_L0_pt, dst_is_vram, true, &dst_it, src_L0, 768 - dst_bo); 769 733 else 734 + emit_pte(m, bb, src_L0_pt, src_is_vram, true, &src_it, src_L0, 735 + src); 736 + 737 + if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it)) 770 738 xe_res_next(&dst_it, src_L0); 739 + else 740 + emit_pte(m, bb, dst_L0_pt, dst_is_vram, true, &dst_it, src_L0, 741 + dst); 771 742 772 743 if (copy_system_ccs) 773 - emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src_bo); 744 + emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src); 774 745 775 746 bb->cs[bb->len++] = MI_BATCH_BUFFER_END; 776 747 update_idx = bb->len; ··· 979 950 bool usm = xe->info.has_usm; 980 951 u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; 981 952 982 - clear_L0 = xe_migrate_res_sizes(xe, &src_it); 953 + clear_L0 = xe_migrate_res_sizes(m, &src_it); 983 954 984 955 drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0); 985 956 ··· 1006 977 1007 978 size -= clear_L0; 1008 979 /* Preemption is enabled again by the ring ops. */ 1009 - if (!clear_vram) { 1010 - emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0, 1011 - bo); 1012 - } else { 980 + if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) 1013 981 xe_res_next(&src_it, clear_L0); 1014 - } 982 + else 983 + emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0, 984 + dst); 985 + 1015 986 bb->cs[bb->len++] = MI_BATCH_BUFFER_END; 1016 987 update_idx = bb->len; 1017 988

+1 -1

drivers/gpu/drm/xe/xe_mmio.c

··· 303 303 u8 id, tile_count = xe->info.tile_count; 304 304 struct xe_gt *gt = xe_root_mmio_gt(xe); 305 305 struct xe_tile *tile; 306 - void *regs; 306 + void __iomem *regs; 307 307 u32 mtcfg; 308 308 309 309 if (tile_count == 1)

+2 -2

drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c

··· 31 31 /* GPU base offset */ 32 32 resource_size_t stolen_base; 33 33 34 - void *__iomem mapping; 34 + void __iomem *mapping; 35 35 }; 36 36 37 37 static inline struct xe_ttm_stolen_mgr * ··· 275 275 drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS)); 276 276 277 277 if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping) 278 - mem->bus.addr = (u8 *)mgr->mapping + mem->bus.offset; 278 + mem->bus.addr = (u8 __force *)mgr->mapping + mem->bus.offset; 279 279 280 280 mem->bus.offset += mgr->io_base; 281 281 mem->bus.is_iomem = true;

+9 -6

drivers/gpu/drm/xe/xe_vm.c

··· 335 335 down_write(&vm->lock); 336 336 err = drm_gpuvm_exec_lock(&vm_exec); 337 337 if (err) 338 - return err; 338 + goto out_up_write; 339 339 340 340 pfence = xe_preempt_fence_create(q, q->compute.context, 341 341 ++q->compute.seqno); 342 342 if (!pfence) { 343 343 err = -ENOMEM; 344 - goto out_unlock; 344 + goto out_fini; 345 345 } 346 346 347 347 list_add(&q->compute.link, &vm->preempt.exec_queues); ··· 364 364 365 365 up_read(&vm->userptr.notifier_lock); 366 366 367 - out_unlock: 367 + out_fini: 368 368 drm_exec_fini(exec); 369 + out_up_write: 369 370 up_write(&vm->lock); 370 371 371 372 return err; ··· 2064 2063 if (err) 2065 2064 return ERR_PTR(err); 2066 2065 2067 - vm_bo = drm_gpuvm_bo_find(&vm->gpuvm, obj); 2068 - if (!vm_bo) 2069 - break; 2066 + vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); 2067 + if (IS_ERR(vm_bo)) { 2068 + xe_bo_unlock(bo); 2069 + return ERR_CAST(vm_bo); 2070 + } 2070 2071 2071 2072 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); 2072 2073 drm_gpuvm_bo_put(vm_bo);

Configure Feed

Configure Feed