Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'drm-next-2024-09-28' of https://gitlab.freedesktop.org/drm/kernel

Pull drm fixes from Dave Airlie:
"Regular fixes for the week to end the merge window, i915 and xe have a
few each, amdgpu makes up most of it with a bunch of SR-IOV related
fixes amongst others.

i915:
- Fix BMG support to UHBR13.5
- Two PSR fixes
- Fix colorimetry detection for DP

xe:
- Fix macro for checking minimum GuC version
- Fix CCS offset calculation for some BMG SKUs
- Fix locking on memory usage reporting via fdinfo and BO destroy
- Fix GPU page fault handler on a closed VM
- Fix overflow in oa batch buffer

amdgpu:
- MES 12 fix
- KFD fence sync fix
- SR-IOV fixes
- VCN 4.0.6 fix
- SDMA 7.x fix
- Bump driver version to note cleared VRAM support
- SWSMU fix
- CU occupancy logic fix
- SDMA queue fix"

* tag 'drm-next-2024-09-28' of https://gitlab.freedesktop.org/drm/kernel: (79 commits)
drm/amd/pm: update workload mask after the setting
drm/amdgpu: bump driver version for cleared VRAM
drm/amdgpu: fix vbios fetching for SR-IOV
drm/amdgpu: fix PTE copy corruption for sdma 7
drm/amdkfd: Add SDMA queue quantum support for GFX12
drm/amdgpu/vcn: enable AV1 on both instances
drm/amdkfd: Fix CU occupancy for GFX 9.4.3
drm/amdkfd: Update logic for CU occupancy calculations
drm/amdgpu: skip coredump after job timeout in SRIOV
drm/amdgpu: sync to KFD fences before clearing PTEs
drm/amdgpu/mes12: set enable_level_process_quantum_check
drm/i915/dp: Fix colorimetry detection
drm/amdgpu/mes12: reduce timeout
drm/amdgpu/mes11: reduce timeout
drm/amdgpu: use GEM references instead of TTMs v2
drm/amd/display: Allow backlight to go below `AMDGPU_DM_DEFAULT_MIN_BACKLIGHT`
drm/amd/display: Fix kdoc entry for 'tps' in 'dc_process_dmub_dpia_set_tps_notification'
drm/amdgpu: update golden regs for gfx12
drm/amdgpu: clean up vbios fetching code
drm/amd/display: handle nulled pipe context in DCE110's set_drr()
...

+1094 -859
-4
drivers/gpu/drm/amd/amdgpu/amdgpu.h
··· 1083 1083 1084 1084 struct amdgpu_virt virt; 1085 1085 1086 - /* link all shadow bo */ 1087 - struct list_head shadow_list; 1088 - struct mutex shadow_list_lock; 1089 - 1090 1086 /* record hw reset is performed */ 1091 1087 bool has_hw_reset; 1092 1088 u8 reset_magic[AMDGPU_RESET_MAGIC_NUM];
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
··· 511 511 return -EINVAL; 512 512 } 513 513 514 - /* udpate aca bank to aca source error_cache first */ 514 + /* update aca bank to aca source error_cache first */ 515 515 ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, qctx, NULL); 516 516 if (ret) 517 517 return ret;
+43 -61
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
··· 950 950 * @inst: xcc's instance number on a multi-XCC setup 951 951 */ 952 952 static void get_wave_count(struct amdgpu_device *adev, int queue_idx, 953 - int *wave_cnt, int *vmid, uint32_t inst) 953 + struct kfd_cu_occupancy *queue_cnt, uint32_t inst) 954 954 { 955 955 int pipe_idx; 956 956 int queue_slot; 957 957 unsigned int reg_val; 958 - 958 + unsigned int wave_cnt; 959 959 /* 960 960 * Program GRBM with appropriate MEID, PIPEID, QUEUEID and VMID 961 961 * parameters to read out waves in flight. Get VMID if there are 962 962 * non-zero waves in flight. 963 963 */ 964 - *vmid = 0xFF; 965 - *wave_cnt = 0; 966 964 pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe; 967 965 queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe; 968 - soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, inst); 969 - reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst, mmSPI_CSQ_WF_ACTIVE_COUNT_0) + 970 - queue_slot); 971 - *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK; 972 - if (*wave_cnt != 0) 973 - *vmid = (RREG32_SOC15(GC, inst, mmCP_HQD_VMID) & 974 - CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT; 966 + soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, GET_INST(GC, inst)); 967 + reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 968 + mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot); 969 + wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK; 970 + if (wave_cnt != 0) { 971 + queue_cnt->wave_cnt += wave_cnt; 972 + queue_cnt->doorbell_off = 973 + (RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL) & 974 + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK) >> 975 + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; 976 + } 975 977 } 976 978 977 979 /** ··· 983 981 * or more queues running and submitting waves to compute units. 984 982 * 985 983 * @adev: Handle of device from which to get number of waves in flight 986 - * @pasid: Identifies the process for which this query call is invoked 987 - * @pasid_wave_cnt: Output parameter updated with number of waves in flight that 988 - * belong to process with given pasid 984 + * @cu_occupancy: Array that gets filled with wave_cnt and doorbell offset 985 + * for comparison later. 989 986 * @max_waves_per_cu: Output parameter updated with maximum number of waves 990 987 * possible per Compute Unit 991 988 * @inst: xcc's instance number on a multi-XCC setup ··· 1012 1011 * number of waves that are in flight for the queue at specified index. The 1013 1012 * index ranges from 0 to 7. 1014 1013 * 1015 - * If non-zero waves are in flight, read CP_HQD_VMID register to obtain VMID 1016 - * of the wave(s). 1014 + * If non-zero waves are in flight, store the corresponding doorbell offset 1015 + * of the queue, along with the wave count. 1017 1016 * 1018 - * Determine if VMID from above step maps to pasid provided as parameter. If 1019 - * it matches agrregate the wave count. That the VMID will not match pasid is 1020 - * a normal condition i.e. a device is expected to support multiple queues 1021 - * from multiple proceses. 1017 + * Determine if the queue belongs to the process by comparing the doorbell 1018 + * offset against the process's queues. If it matches, aggregate the wave 1019 + * count for the process. 1022 1020 * 1023 1021 * Reading registers referenced above involves programming GRBM appropriately 1024 1022 */ 1025 - void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, 1026 - int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst) 1023 + void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, 1024 + struct kfd_cu_occupancy *cu_occupancy, 1025 + int *max_waves_per_cu, uint32_t inst) 1027 1026 { 1028 1027 int qidx; 1029 - int vmid; 1030 1028 int se_idx; 1031 - int sh_idx; 1032 1029 int se_cnt; 1033 - int sh_cnt; 1034 - int wave_cnt; 1035 1030 int queue_map; 1036 - int pasid_tmp; 1037 1031 int max_queue_cnt; 1038 - int vmid_wave_cnt = 0; 1039 1032 DECLARE_BITMAP(cp_queue_bitmap, AMDGPU_MAX_QUEUES); 1040 1033 1041 1034 lock_spi_csq_mutexes(adev); 1042 - soc15_grbm_select(adev, 1, 0, 0, 0, inst); 1035 + soc15_grbm_select(adev, 1, 0, 0, 0, GET_INST(GC, inst)); 1043 1036 1044 1037 /* 1045 1038 * Iterate through the shader engines and arrays of the device ··· 1043 1048 AMDGPU_MAX_QUEUES); 1044 1049 max_queue_cnt = adev->gfx.mec.num_pipe_per_mec * 1045 1050 adev->gfx.mec.num_queue_per_pipe; 1046 - sh_cnt = adev->gfx.config.max_sh_per_se; 1047 1051 se_cnt = adev->gfx.config.max_shader_engines; 1048 1052 for (se_idx = 0; se_idx < se_cnt; se_idx++) { 1049 - for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) { 1053 + amdgpu_gfx_select_se_sh(adev, se_idx, 0, 0xffffffff, inst); 1054 + queue_map = RREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_CSQ_WF_ACTIVE_STATUS); 1050 1055 1051 - amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff, inst); 1052 - queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS); 1053 - 1054 - /* 1055 - * Assumption: queue map encodes following schema: four 1056 - * pipes per each micro-engine, with each pipe mapping 1057 - * eight queues. This schema is true for GFX9 devices 1058 - * and must be verified for newer device families 1056 + /* 1057 + * Assumption: queue map encodes following schema: four 1058 + * pipes per each micro-engine, with each pipe mapping 1059 + * eight queues. This schema is true for GFX9 devices 1060 + * and must be verified for newer device families 1061 + */ 1062 + for (qidx = 0; qidx < max_queue_cnt; qidx++) { 1063 + /* Skip qeueus that are not associated with 1064 + * compute functions 1059 1065 */ 1060 - for (qidx = 0; qidx < max_queue_cnt; qidx++) { 1066 + if (!test_bit(qidx, cp_queue_bitmap)) 1067 + continue; 1061 1068 1062 - /* Skip qeueus that are not associated with 1063 - * compute functions 1064 - */ 1065 - if (!test_bit(qidx, cp_queue_bitmap)) 1066 - continue; 1069 + if (!(queue_map & (1 << qidx))) 1070 + continue; 1067 1071 1068 - if (!(queue_map & (1 << qidx))) 1069 - continue; 1070 - 1071 - /* Get number of waves in flight and aggregate them */ 1072 - get_wave_count(adev, qidx, &wave_cnt, &vmid, 1073 - inst); 1074 - if (wave_cnt != 0) { 1075 - pasid_tmp = 1076 - RREG32(SOC15_REG_OFFSET(OSSSYS, inst, 1077 - mmIH_VMID_0_LUT) + vmid); 1078 - if (pasid_tmp == pasid) 1079 - vmid_wave_cnt += wave_cnt; 1080 - } 1081 - } 1072 + /* Get number of waves in flight and aggregate them */ 1073 + get_wave_count(adev, qidx, &cu_occupancy[qidx], 1074 + inst); 1082 1075 } 1083 1076 } 1084 1077 1085 1078 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, inst); 1086 - soc15_grbm_select(adev, 0, 0, 0, 0, inst); 1079 + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst)); 1087 1080 unlock_spi_csq_mutexes(adev); 1088 1081 1089 1082 /* Update the output parameters and return */ 1090 - *pasid_wave_cnt = vmid_wave_cnt; 1091 1083 *max_waves_per_cu = adev->gfx.cu_info.simd_per_cu * 1092 1084 adev->gfx.cu_info.max_waves_per_simd; 1093 1085 }
+3 -2
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
··· 52 52 uint8_t vmid, uint16_t *p_pasid); 53 53 void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, 54 54 uint32_t vmid, uint64_t page_table_base); 55 - void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, 56 - int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst); 55 + void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, 56 + struct kfd_cu_occupancy *cu_occupancy, 57 + int *max_waves_per_cu, uint32_t inst); 57 58 void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, 58 59 uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, 59 60 uint32_t inst);
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
··· 1499 1499 } 1500 1500 } 1501 1501 1502 - ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0); 1502 + ret = amdgpu_bo_pin(bo, domain); 1503 1503 if (ret) 1504 1504 pr_err("Error in Pinning BO to domain: %d\n", domain); 1505 1505
+51 -13
drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
··· 87 87 * part of the system bios. On boot, the system bios puts a 88 88 * copy of the igp rom at the start of vram if a discrete card is 89 89 * present. 90 + * For SR-IOV, the vbios image is also put in VRAM in the VF. 90 91 */ 91 - static bool igp_read_bios_from_vram(struct amdgpu_device *adev) 92 + static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev) 92 93 { 93 94 uint8_t __iomem *bios; 94 95 resource_size_t vram_base; ··· 285 284 acpi_status status; 286 285 bool found = false; 287 286 288 - /* ATRM is for the discrete card only */ 289 - if (adev->flags & AMD_IS_APU) 290 - return false; 291 - 292 287 /* ATRM is for on-platform devices only */ 293 288 if (dev_is_removable(&adev->pdev->dev)) 294 289 return false; ··· 340 343 341 344 static bool amdgpu_read_disabled_bios(struct amdgpu_device *adev) 342 345 { 343 - if (adev->flags & AMD_IS_APU) 344 - return igp_read_bios_from_vram(adev); 345 - else 346 - return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ? 347 - false : amdgpu_asic_read_disabled_bios(adev); 346 + return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ? 347 + false : amdgpu_asic_read_disabled_bios(adev); 348 348 } 349 349 350 350 #ifdef CONFIG_ACPI ··· 408 414 } 409 415 #endif 410 416 411 - bool amdgpu_get_bios(struct amdgpu_device *adev) 417 + static bool amdgpu_get_bios_apu(struct amdgpu_device *adev) 418 + { 419 + if (amdgpu_acpi_vfct_bios(adev)) { 420 + dev_info(adev->dev, "Fetched VBIOS from VFCT\n"); 421 + goto success; 422 + } 423 + 424 + if (amdgpu_read_bios_from_vram(adev)) { 425 + dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n"); 426 + goto success; 427 + } 428 + 429 + if (amdgpu_read_bios(adev)) { 430 + dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n"); 431 + goto success; 432 + } 433 + 434 + if (amdgpu_read_platform_bios(adev)) { 435 + dev_info(adev->dev, "Fetched VBIOS from platform\n"); 436 + goto success; 437 + } 438 + 439 + dev_err(adev->dev, "Unable to locate a BIOS ROM\n"); 440 + return false; 441 + 442 + success: 443 + return true; 444 + } 445 + 446 + static bool amdgpu_get_bios_dgpu(struct amdgpu_device *adev) 412 447 { 413 448 if (amdgpu_atrm_get_bios(adev)) { 414 449 dev_info(adev->dev, "Fetched VBIOS from ATRM\n"); ··· 449 426 goto success; 450 427 } 451 428 452 - if (igp_read_bios_from_vram(adev)) { 429 + /* this is required for SR-IOV */ 430 + if (amdgpu_read_bios_from_vram(adev)) { 453 431 dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n"); 454 432 goto success; 455 433 } ··· 479 455 return false; 480 456 481 457 success: 482 - adev->is_atom_fw = adev->asic_type >= CHIP_VEGA10; 483 458 return true; 459 + } 460 + 461 + bool amdgpu_get_bios(struct amdgpu_device *adev) 462 + { 463 + bool found; 464 + 465 + if (adev->flags & AMD_IS_APU) 466 + found = amdgpu_get_bios_apu(adev); 467 + else 468 + found = amdgpu_get_bios_dgpu(adev); 469 + 470 + if (found) 471 + adev->is_atom_fw = adev->asic_type >= CHIP_VEGA10; 472 + 473 + return found; 484 474 } 485 475 486 476 /* helper function for soc15 and onwards to read bios from rom */
+3 -86
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
··· 4107 4107 spin_lock_init(&adev->mm_stats.lock); 4108 4108 spin_lock_init(&adev->wb.lock); 4109 4109 4110 - INIT_LIST_HEAD(&adev->shadow_list); 4111 - mutex_init(&adev->shadow_list_lock); 4112 - 4113 4110 INIT_LIST_HEAD(&adev->reset_list); 4114 4111 4115 4112 INIT_LIST_HEAD(&adev->ras_list); ··· 5027 5030 } 5028 5031 5029 5032 /** 5030 - * amdgpu_device_recover_vram - Recover some VRAM contents 5031 - * 5032 - * @adev: amdgpu_device pointer 5033 - * 5034 - * Restores the contents of VRAM buffers from the shadows in GTT. Used to 5035 - * restore things like GPUVM page tables after a GPU reset where 5036 - * the contents of VRAM might be lost. 5037 - * 5038 - * Returns: 5039 - * 0 on success, negative error code on failure. 5040 - */ 5041 - static int amdgpu_device_recover_vram(struct amdgpu_device *adev) 5042 - { 5043 - struct dma_fence *fence = NULL, *next = NULL; 5044 - struct amdgpu_bo *shadow; 5045 - struct amdgpu_bo_vm *vmbo; 5046 - long r = 1, tmo; 5047 - 5048 - if (amdgpu_sriov_runtime(adev)) 5049 - tmo = msecs_to_jiffies(8000); 5050 - else 5051 - tmo = msecs_to_jiffies(100); 5052 - 5053 - dev_info(adev->dev, "recover vram bo from shadow start\n"); 5054 - mutex_lock(&adev->shadow_list_lock); 5055 - list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) { 5056 - /* If vm is compute context or adev is APU, shadow will be NULL */ 5057 - if (!vmbo->shadow) 5058 - continue; 5059 - shadow = vmbo->shadow; 5060 - 5061 - /* No need to recover an evicted BO */ 5062 - if (!shadow->tbo.resource || 5063 - shadow->tbo.resource->mem_type != TTM_PL_TT || 5064 - shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET || 5065 - shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM) 5066 - continue; 5067 - 5068 - r = amdgpu_bo_restore_shadow(shadow, &next); 5069 - if (r) 5070 - break; 5071 - 5072 - if (fence) { 5073 - tmo = dma_fence_wait_timeout(fence, false, tmo); 5074 - dma_fence_put(fence); 5075 - fence = next; 5076 - if (tmo == 0) { 5077 - r = -ETIMEDOUT; 5078 - break; 5079 - } else if (tmo < 0) { 5080 - r = tmo; 5081 - break; 5082 - } 5083 - } else { 5084 - fence = next; 5085 - } 5086 - } 5087 - mutex_unlock(&adev->shadow_list_lock); 5088 - 5089 - if (fence) 5090 - tmo = dma_fence_wait_timeout(fence, false, tmo); 5091 - dma_fence_put(fence); 5092 - 5093 - if (r < 0 || tmo <= 0) { 5094 - dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo); 5095 - return -EIO; 5096 - } 5097 - 5098 - dev_info(adev->dev, "recover vram bo from shadow done\n"); 5099 - return 0; 5100 - } 5101 - 5102 - 5103 - /** 5104 5033 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf 5105 5034 * 5106 5035 * @adev: amdgpu_device pointer ··· 5088 5165 if (r) 5089 5166 return r; 5090 5167 5091 - if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { 5168 + if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) 5092 5169 amdgpu_inc_vram_lost(adev); 5093 - r = amdgpu_device_recover_vram(adev); 5094 - } 5095 - if (r) 5096 - return r; 5097 5170 5098 5171 /* need to be called during full access so we can't do it later like 5099 5172 * bare-metal does. ··· 5488 5569 } 5489 5570 } 5490 5571 5491 - if (!r) 5492 - r = amdgpu_device_recover_vram(tmp_adev); 5493 - else 5572 + if (r) 5494 5573 tmp_adev->asic_reset_res = r; 5495 5574 } 5496 5575 ··· 6106 6189 p2p_addressable = !(adev->gmc.aper_base & address_mask || 6107 6190 aper_limit & address_mask); 6108 6191 } 6109 - return is_large_bar && p2p_access && p2p_addressable; 6192 + return pcie_p2p && is_large_bar && p2p_access && p2p_addressable; 6110 6193 #else 6111 6194 return false; 6112 6195 #endif
+3 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
··· 233 233 } 234 234 235 235 if (!adev->enable_virtual_display) { 236 + new_abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 236 237 r = amdgpu_bo_pin(new_abo, 237 238 amdgpu_display_supported_domains(adev, new_abo->flags)); 238 239 if (unlikely(r != 0)) { ··· 1475 1474 if ((!(mode->flags & DRM_MODE_FLAG_INTERLACE)) && 1476 1475 ((amdgpu_encoder->underscan_type == UNDERSCAN_ON) || 1477 1476 ((amdgpu_encoder->underscan_type == UNDERSCAN_AUTO) && 1478 - connector->display_info.is_hdmi && 1477 + connector && connector->display_info.is_hdmi && 1479 1478 amdgpu_display_is_hdtv_mode(mode)))) { 1480 1479 if (amdgpu_encoder->underscan_hborder != 0) 1481 1480 amdgpu_crtc->h_border = amdgpu_encoder->underscan_hborder; ··· 1760 1759 1761 1760 r = amdgpu_bo_reserve(aobj, true); 1762 1761 if (r == 0) { 1762 + aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 1763 1763 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); 1764 1764 if (r != 0) 1765 1765 dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r);
+2 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
··· 117 117 * - 3.56.0 - Update IB start address and size alignment for decode and encode 118 118 * - 3.57.0 - Compute tunneling on GFX10+ 119 119 * - 3.58.0 - Add GFX12 DCC support 120 + * - 3.59.0 - Cleared VRAM 120 121 */ 121 122 #define KMS_DRIVER_MAJOR 3 122 - #define KMS_DRIVER_MINOR 58 123 + #define KMS_DRIVER_MINOR 59 123 124 #define KMS_DRIVER_PATCHLEVEL 0 124 125 125 126 /*
+5 -8
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
··· 43 43 #include "amdgpu_hmm.h" 44 44 #include "amdgpu_xgmi.h" 45 45 46 - static const struct drm_gem_object_funcs amdgpu_gem_object_funcs; 47 - 48 46 static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf) 49 47 { 50 48 struct ttm_buffer_object *bo = vmf->vma->vm_private_data; ··· 85 87 86 88 static void amdgpu_gem_object_free(struct drm_gem_object *gobj) 87 89 { 88 - struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj); 90 + struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj); 89 91 90 - if (robj) { 91 - amdgpu_hmm_unregister(robj); 92 - amdgpu_bo_unref(&robj); 92 + if (aobj) { 93 + amdgpu_hmm_unregister(aobj); 94 + ttm_bo_put(&aobj->tbo); 93 95 } 94 96 } 95 97 ··· 124 126 125 127 bo = &ubo->bo; 126 128 *obj = &bo->tbo.base; 127 - (*obj)->funcs = &amdgpu_gem_object_funcs; 128 129 129 130 return 0; 130 131 } ··· 292 295 return drm_gem_ttm_mmap(obj, vma); 293 296 } 294 297 295 - static const struct drm_gem_object_funcs amdgpu_gem_object_funcs = { 298 + const struct drm_gem_object_funcs amdgpu_gem_object_funcs = { 296 299 .free = amdgpu_gem_object_free, 297 300 .open = amdgpu_gem_object_open, 298 301 .close = amdgpu_gem_object_close,
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
··· 33 33 #define AMDGPU_GEM_DOMAIN_MAX 0x3 34 34 #define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, tbo.base) 35 35 36 + extern const struct drm_gem_object_funcs amdgpu_gem_object_funcs; 37 + 36 38 unsigned long amdgpu_gem_timeout(uint64_t timeout_ns); 37 39 38 40 /*
+4 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
··· 107 107 /* 108 108 * Do the coredump immediately after a job timeout to get a very 109 109 * close dump/snapshot/representation of GPU's current error status 110 + * Skip it for SRIOV, since VF FLR will be triggered by host driver 111 + * before job timeout 110 112 */ 111 - amdgpu_job_core_dump(adev, job); 113 + if (!amdgpu_sriov_vf(adev)) 114 + amdgpu_job_core_dump(adev, job); 112 115 113 116 if (amdgpu_gpu_recovery && 114 117 amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
+9 -123
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
··· 77 77 amdgpu_bo_destroy(tbo); 78 78 } 79 79 80 - static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo) 81 - { 82 - struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); 83 - struct amdgpu_bo *shadow_bo = ttm_to_amdgpu_bo(tbo), *bo; 84 - struct amdgpu_bo_vm *vmbo; 85 - 86 - bo = shadow_bo->parent; 87 - vmbo = to_amdgpu_bo_vm(bo); 88 - /* in case amdgpu_device_recover_vram got NULL of bo->parent */ 89 - if (!list_empty(&vmbo->shadow_list)) { 90 - mutex_lock(&adev->shadow_list_lock); 91 - list_del_init(&vmbo->shadow_list); 92 - mutex_unlock(&adev->shadow_list_lock); 93 - } 94 - 95 - amdgpu_bo_destroy(tbo); 96 - } 97 - 98 80 /** 99 81 * amdgpu_bo_is_amdgpu_bo - check if the buffer object is an &amdgpu_bo 100 82 * @bo: buffer object to be checked ··· 90 108 bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo) 91 109 { 92 110 if (bo->destroy == &amdgpu_bo_destroy || 93 - bo->destroy == &amdgpu_bo_user_destroy || 94 - bo->destroy == &amdgpu_bo_vm_destroy) 111 + bo->destroy == &amdgpu_bo_user_destroy) 95 112 return true; 96 113 97 114 return false; ··· 564 583 if (bo == NULL) 565 584 return -ENOMEM; 566 585 drm_gem_private_object_init(adev_to_drm(adev), &bo->tbo.base, size); 586 + bo->tbo.base.funcs = &amdgpu_gem_object_funcs; 567 587 bo->vm_bo = NULL; 568 588 bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain : 569 589 bp->domain; ··· 705 723 } 706 724 707 725 /** 708 - * amdgpu_bo_add_to_shadow_list - add a BO to the shadow list 709 - * 710 - * @vmbo: BO that will be inserted into the shadow list 711 - * 712 - * Insert a BO to the shadow list. 713 - */ 714 - void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo) 715 - { 716 - struct amdgpu_device *adev = amdgpu_ttm_adev(vmbo->bo.tbo.bdev); 717 - 718 - mutex_lock(&adev->shadow_list_lock); 719 - list_add_tail(&vmbo->shadow_list, &adev->shadow_list); 720 - vmbo->shadow->parent = amdgpu_bo_ref(&vmbo->bo); 721 - vmbo->shadow->tbo.destroy = &amdgpu_bo_vm_destroy; 722 - mutex_unlock(&adev->shadow_list_lock); 723 - } 724 - 725 - /** 726 - * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow 727 - * 728 - * @shadow: &amdgpu_bo shadow to be restored 729 - * @fence: dma_fence associated with the operation 730 - * 731 - * Copies a buffer object's shadow content back to the object. 732 - * This is used for recovering a buffer from its shadow in case of a gpu 733 - * reset where vram context may be lost. 734 - * 735 - * Returns: 736 - * 0 for success or a negative error code on failure. 737 - */ 738 - int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence) 739 - 740 - { 741 - struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev); 742 - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; 743 - uint64_t shadow_addr, parent_addr; 744 - 745 - shadow_addr = amdgpu_bo_gpu_offset(shadow); 746 - parent_addr = amdgpu_bo_gpu_offset(shadow->parent); 747 - 748 - return amdgpu_copy_buffer(ring, shadow_addr, parent_addr, 749 - amdgpu_bo_size(shadow), NULL, fence, 750 - true, false, 0); 751 - } 752 - 753 - /** 754 726 * amdgpu_bo_kmap - map an &amdgpu_bo buffer object 755 727 * @bo: &amdgpu_bo buffer object to be mapped 756 728 * @ptr: kernel virtual address to be returned ··· 787 851 if (bo == NULL) 788 852 return NULL; 789 853 790 - ttm_bo_get(&bo->tbo); 854 + drm_gem_object_get(&bo->tbo.base); 791 855 return bo; 792 856 } 793 857 ··· 799 863 */ 800 864 void amdgpu_bo_unref(struct amdgpu_bo **bo) 801 865 { 802 - struct ttm_buffer_object *tbo; 803 - 804 866 if ((*bo) == NULL) 805 867 return; 806 868 807 - tbo = &((*bo)->tbo); 808 - ttm_bo_put(tbo); 869 + drm_gem_object_put(&(*bo)->tbo.base); 809 870 *bo = NULL; 810 871 } 811 872 812 873 /** 813 - * amdgpu_bo_pin_restricted - pin an &amdgpu_bo buffer object 874 + * amdgpu_bo_pin - pin an &amdgpu_bo buffer object 814 875 * @bo: &amdgpu_bo buffer object to be pinned 815 876 * @domain: domain to be pinned to 816 - * @min_offset: the start of requested address range 817 - * @max_offset: the end of requested address range 818 877 * 819 - * Pins the buffer object according to requested domain and address range. If 820 - * the memory is unbound gart memory, binds the pages into gart table. Adjusts 821 - * pin_count and pin_size accordingly. 878 + * Pins the buffer object according to requested domain. If the memory is 879 + * unbound gart memory, binds the pages into gart table. Adjusts pin_count and 880 + * pin_size accordingly. 822 881 * 823 882 * Pinning means to lock pages in memory along with keeping them at a fixed 824 883 * offset. It is required when a buffer can not be moved, for example, when 825 884 * a display buffer is being scanned out. 826 885 * 827 - * Compared with amdgpu_bo_pin(), this function gives more flexibility on 828 - * where to pin a buffer if there are specific restrictions on where a buffer 829 - * must be located. 830 - * 831 886 * Returns: 832 887 * 0 for success or a negative error code on failure. 833 888 */ 834 - int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, 835 - u64 min_offset, u64 max_offset) 889 + int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain) 836 890 { 837 891 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 838 892 struct ttm_operation_ctx ctx = { false, false }; ··· 830 904 831 905 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) 832 906 return -EPERM; 833 - 834 - if (WARN_ON_ONCE(min_offset > max_offset)) 835 - return -EINVAL; 836 907 837 908 /* Check domain to be pinned to against preferred domains */ 838 909 if (bo->preferred_domains & domain) ··· 856 933 return -EINVAL; 857 934 858 935 ttm_bo_pin(&bo->tbo); 859 - 860 - if (max_offset != 0) { 861 - u64 domain_start = amdgpu_ttm_domain_start(adev, 862 - mem_type); 863 - WARN_ON_ONCE(max_offset < 864 - (amdgpu_bo_gpu_offset(bo) - domain_start)); 865 - } 866 - 867 936 return 0; 868 937 } 869 938 ··· 872 957 bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 873 958 amdgpu_bo_placement_from_domain(bo, domain); 874 959 for (i = 0; i < bo->placement.num_placement; i++) { 875 - unsigned int fpfn, lpfn; 876 - 877 - fpfn = min_offset >> PAGE_SHIFT; 878 - lpfn = max_offset >> PAGE_SHIFT; 879 - 880 - if (fpfn > bo->placements[i].fpfn) 881 - bo->placements[i].fpfn = fpfn; 882 - if (!bo->placements[i].lpfn || 883 - (lpfn && lpfn < bo->placements[i].lpfn)) 884 - bo->placements[i].lpfn = lpfn; 885 - 886 960 if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && 887 961 bo->placements[i].mem_type == TTM_PL_VRAM) 888 962 bo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; ··· 895 991 896 992 error: 897 993 return r; 898 - } 899 - 900 - /** 901 - * amdgpu_bo_pin - pin an &amdgpu_bo buffer object 902 - * @bo: &amdgpu_bo buffer object to be pinned 903 - * @domain: domain to be pinned to 904 - * 905 - * A simple wrapper to amdgpu_bo_pin_restricted(). 906 - * Provides a simpler API for buffers that do not have any strict restrictions 907 - * on where a buffer must be located. 908 - * 909 - * Returns: 910 - * 0 for success or a negative error code on failure. 911 - */ 912 - int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain) 913 - { 914 - bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 915 - return amdgpu_bo_pin_restricted(bo, domain, 0, 0); 916 994 } 917 995 918 996 /**
-23
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
··· 136 136 137 137 struct amdgpu_bo_vm { 138 138 struct amdgpu_bo bo; 139 - struct amdgpu_bo *shadow; 140 - struct list_head shadow_list; 141 139 struct amdgpu_vm_bo_base entries[]; 142 140 }; 143 141 ··· 273 275 return bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED; 274 276 } 275 277 276 - /** 277 - * amdgpu_bo_shadowed - check if the BO is shadowed 278 - * 279 - * @bo: BO to be tested. 280 - * 281 - * Returns: 282 - * NULL if not shadowed or else return a BO pointer. 283 - */ 284 - static inline struct amdgpu_bo *amdgpu_bo_shadowed(struct amdgpu_bo *bo) 285 - { 286 - if (bo->tbo.type == ttm_bo_type_kernel) 287 - return to_amdgpu_bo_vm(bo)->shadow; 288 - 289 - return NULL; 290 - } 291 - 292 278 bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); 293 279 void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain); 294 280 ··· 304 322 struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo); 305 323 void amdgpu_bo_unref(struct amdgpu_bo **bo); 306 324 int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain); 307 - int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, 308 - u64 min_offset, u64 max_offset); 309 325 void amdgpu_bo_unpin(struct amdgpu_bo *bo); 310 326 int amdgpu_bo_init(struct amdgpu_device *adev); 311 327 void amdgpu_bo_fini(struct amdgpu_device *adev); ··· 329 349 u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo); 330 350 void amdgpu_bo_get_memory(struct amdgpu_bo *bo, 331 351 struct amdgpu_mem_stats *stats); 332 - void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo); 333 - int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, 334 - struct dma_fence **fence); 335 352 uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, 336 353 uint32_t domain); 337 354
+24 -7
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
··· 2853 2853 if (ret) 2854 2854 return ret; 2855 2855 2856 - /* Start rlc autoload after psp recieved all the gfx firmware */ 2856 + /* Start rlc autoload after psp received all the gfx firmware */ 2857 2857 if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ? 2858 2858 adev->virt.autoload_ucode_id : AMDGPU_UCODE_ID_RLC_G)) { 2859 2859 ret = psp_rlc_autoload_start(psp); ··· 3425 3425 const struct psp_firmware_header_v1_2 *sos_hdr_v1_2; 3426 3426 const struct psp_firmware_header_v1_3 *sos_hdr_v1_3; 3427 3427 const struct psp_firmware_header_v2_0 *sos_hdr_v2_0; 3428 - int err = 0; 3428 + const struct psp_firmware_header_v2_1 *sos_hdr_v2_1; 3429 + int fw_index, fw_bin_count, start_index = 0; 3430 + const struct psp_fw_bin_desc *fw_bin; 3429 3431 uint8_t *ucode_array_start_addr; 3430 - int fw_index = 0; 3432 + int err = 0; 3431 3433 3432 3434 err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, "amdgpu/%s_sos.bin", chip_name); 3433 3435 if (err) ··· 3480 3478 case 2: 3481 3479 sos_hdr_v2_0 = (const struct psp_firmware_header_v2_0 *)adev->psp.sos_fw->data; 3482 3480 3483 - if (le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count) >= UCODE_MAX_PSP_PACKAGING) { 3481 + fw_bin_count = le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count); 3482 + 3483 + if (fw_bin_count >= UCODE_MAX_PSP_PACKAGING) { 3484 3484 dev_err(adev->dev, "packed SOS count exceeds maximum limit\n"); 3485 3485 err = -EINVAL; 3486 3486 goto out; 3487 3487 } 3488 3488 3489 - for (fw_index = 0; fw_index < le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count); fw_index++) { 3490 - err = parse_sos_bin_descriptor(psp, 3491 - &sos_hdr_v2_0->psp_fw_bin[fw_index], 3489 + if (sos_hdr_v2_0->header.header_version_minor == 1) { 3490 + sos_hdr_v2_1 = (const struct psp_firmware_header_v2_1 *)adev->psp.sos_fw->data; 3491 + 3492 + fw_bin = sos_hdr_v2_1->psp_fw_bin; 3493 + 3494 + if (psp_is_aux_sos_load_required(psp)) 3495 + start_index = le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index); 3496 + else 3497 + fw_bin_count -= le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index); 3498 + 3499 + } else { 3500 + fw_bin = sos_hdr_v2_0->psp_fw_bin; 3501 + } 3502 + 3503 + for (fw_index = start_index; fw_index < fw_bin_count; fw_index++) { 3504 + err = parse_sos_bin_descriptor(psp, fw_bin + fw_index, 3492 3505 sos_hdr_v2_0); 3493 3506 if (err) 3494 3507 goto out;
+4
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
··· 138 138 int (*vbflash_stat)(struct psp_context *psp); 139 139 int (*fatal_error_recovery_quirk)(struct psp_context *psp); 140 140 bool (*get_ras_capability)(struct psp_context *psp); 141 + bool (*is_aux_sos_load_required)(struct psp_context *psp); 141 142 }; 142 143 143 144 struct ta_funcs { ··· 464 463 #define psp_fatal_error_recovery_quirk(psp) \ 465 464 ((psp)->funcs->fatal_error_recovery_quirk ? \ 466 465 (psp)->funcs->fatal_error_recovery_quirk((psp)) : 0) 466 + 467 + #define psp_is_aux_sos_load_required(psp) \ 468 + ((psp)->funcs->is_aux_sos_load_required ? (psp)->funcs->is_aux_sos_load_required((psp)) : 0) 467 469 468 470 extern const struct amd_ip_funcs psp_ip_funcs; 469 471
+6 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
··· 882 882 if (ret) 883 883 return ret; 884 884 885 - /* gfx block ras dsiable cmd must send to ras-ta */ 885 + /* gfx block ras disable cmd must send to ras-ta */ 886 886 if (head->block == AMDGPU_RAS_BLOCK__GFX) 887 887 con->features |= BIT(head->block); 888 888 ··· 3468 3468 3469 3469 /* aca is disabled by default */ 3470 3470 adev->aca.is_enabled = false; 3471 + 3472 + /* bad page feature is not applicable to specific app platform */ 3473 + if (adev->gmc.is_app_apu && 3474 + amdgpu_ip_version(adev, UMC_HWIP, 0) == IP_VERSION(12, 0, 0)) 3475 + amdgpu_bad_page_threshold = 0; 3471 3476 } 3472 3477 3473 3478 static void amdgpu_ras_counte_dw(struct work_struct *work)
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
··· 58 58 #define EEPROM_I2C_MADDR_4 0x40000 59 59 60 60 /* 61 - * The 2 macros bellow represent the actual size in bytes that 61 + * The 2 macros below represent the actual size in bytes that 62 62 * those entities occupy in the EEPROM memory. 63 63 * RAS_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which 64 64 * uses uint64 to store 6b fields such as retired_page.
+30
drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
··· 260 260 return 0; 261 261 } 262 262 263 + /** 264 + * amdgpu_sync_kfd - sync to KFD fences 265 + * 266 + * @sync: sync object to add KFD fences to 267 + * @resv: reservation object with KFD fences 268 + * 269 + * Extract all KFD fences and add them to the sync object. 270 + */ 271 + int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv) 272 + { 273 + struct dma_resv_iter cursor; 274 + struct dma_fence *f; 275 + int r = 0; 276 + 277 + dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP); 278 + dma_resv_for_each_fence_unlocked(&cursor, f) { 279 + void *fence_owner = amdgpu_sync_get_owner(f); 280 + 281 + if (fence_owner != AMDGPU_FENCE_OWNER_KFD) 282 + continue; 283 + 284 + r = amdgpu_sync_fence(sync, f); 285 + if (r) 286 + break; 287 + } 288 + dma_resv_iter_end(&cursor); 289 + 290 + return r; 291 + } 292 + 263 293 /* Free the entry back to the slab */ 264 294 static void amdgpu_sync_entry_free(struct amdgpu_sync_entry *e) 265 295 {
+1
drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
··· 51 51 int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, 52 52 struct dma_resv *resv, enum amdgpu_sync_mode mode, 53 53 void *owner); 54 + int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv); 54 55 struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, 55 56 struct amdgpu_ring *ring); 56 57 struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
··· 1970 1970 DRM_INFO("amdgpu: %uM of GTT memory ready.\n", 1971 1971 (unsigned int)(gtt_size / (1024 * 1024))); 1972 1972 1973 - /* Initiailize doorbell pool on PCI BAR */ 1973 + /* Initialize doorbell pool on PCI BAR */ 1974 1974 r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE); 1975 1975 if (r) { 1976 1976 DRM_ERROR("Failed initializing doorbell heap.\n");
+10 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
··· 136 136 struct psp_fw_bin_desc psp_fw_bin[]; 137 137 }; 138 138 139 + /* version_major=2, version_minor=1 */ 140 + struct psp_firmware_header_v2_1 { 141 + struct common_firmware_header header; 142 + uint32_t psp_fw_bin_count; 143 + uint32_t psp_aux_fw_bin_index; 144 + struct psp_fw_bin_desc psp_fw_bin[]; 145 + }; 146 + 139 147 /* version_major=1, version_minor=0 */ 140 148 struct ta_firmware_header_v1_0 { 141 149 struct common_firmware_header header; ··· 434 426 struct psp_firmware_header_v1_1 psp_v1_1; 435 427 struct psp_firmware_header_v1_3 psp_v1_3; 436 428 struct psp_firmware_header_v2_0 psp_v2_0; 429 + struct psp_firmware_header_v2_0 psp_v2_1; 437 430 struct ta_firmware_header_v1_0 ta; 438 431 struct ta_firmware_header_v2_0 ta_v2_0; 439 432 struct gfx_firmware_header_v1_0 gfx; ··· 456 447 uint8_t raw[0x100]; 457 448 }; 458 449 459 - #define UCODE_MAX_PSP_PACKAGING ((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc)) 450 + #define UCODE_MAX_PSP_PACKAGING (((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc)) * 2) 460 451 461 452 /* 462 453 * fw loading support
+1
drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
··· 338 338 else 339 339 domain = AMDGPU_GEM_DOMAIN_VRAM; 340 340 341 + rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 341 342 r = amdgpu_bo_pin(rbo, domain); 342 343 if (unlikely(r != 0)) { 343 344 if (r != -ERESTARTSYS)
+6 -17
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
··· 465 465 { 466 466 uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm); 467 467 struct amdgpu_vm_bo_base *bo_base; 468 - struct amdgpu_bo *shadow; 469 468 struct amdgpu_bo *bo; 470 469 int r; 471 470 ··· 485 486 spin_unlock(&vm->status_lock); 486 487 487 488 bo = bo_base->bo; 488 - shadow = amdgpu_bo_shadowed(bo); 489 489 490 490 r = validate(param, bo); 491 491 if (r) 492 492 return r; 493 - if (shadow) { 494 - r = validate(param, shadow); 495 - if (r) 496 - return r; 497 - } 498 493 499 494 if (bo->tbo.type != ttm_bo_type_kernel) { 500 495 amdgpu_vm_bo_moved(bo_base); ··· 1169 1176 AMDGPU_SYNC_EQ_OWNER, vm); 1170 1177 if (r) 1171 1178 goto error_free; 1179 + if (bo) { 1180 + r = amdgpu_sync_kfd(&sync, bo->tbo.base.resv); 1181 + if (r) 1182 + goto error_free; 1183 + } 1184 + 1172 1185 } else { 1173 1186 struct drm_gem_object *obj = &bo->tbo.base; 1174 1187 ··· 2148 2149 { 2149 2150 struct amdgpu_vm_bo_base *bo_base; 2150 2151 2151 - /* shadow bo doesn't have bo base, its validation needs its parent */ 2152 - if (bo->parent && (amdgpu_bo_shadowed(bo->parent) == bo)) 2153 - bo = bo->parent; 2154 - 2155 2152 for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { 2156 2153 struct amdgpu_vm *vm = bo_base->vm; 2157 2154 ··· 2477 2482 root_bo = amdgpu_bo_ref(&root->bo); 2478 2483 r = amdgpu_bo_reserve(root_bo, true); 2479 2484 if (r) { 2480 - amdgpu_bo_unref(&root->shadow); 2481 2485 amdgpu_bo_unref(&root_bo); 2482 2486 goto error_free_delayed; 2483 2487 } ··· 2568 2574 dma_fence_put(vm->last_update); 2569 2575 vm->last_update = dma_fence_get_stub(); 2570 2576 vm->is_compute_context = true; 2571 - 2572 - /* Free the shadow bo for compute VM */ 2573 - amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow); 2574 - 2575 - goto unreserve_bo; 2576 2577 2577 2578 unreserve_bo: 2578 2579 amdgpu_bo_unreserve(vm->root.bo);
+1 -55
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
··· 383 383 if (r) 384 384 return r; 385 385 386 - if (vmbo->shadow) { 387 - struct amdgpu_bo *shadow = vmbo->shadow; 388 - 389 - r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx); 390 - if (r) 391 - return r; 392 - } 393 - 394 386 if (!drm_dev_enter(adev_to_drm(adev), &idx)) 395 387 return -ENODEV; 396 388 ··· 440 448 int32_t xcp_id) 441 449 { 442 450 struct amdgpu_bo_param bp; 443 - struct amdgpu_bo *bo; 444 - struct dma_resv *resv; 445 451 unsigned int num_entries; 446 - int r; 447 452 448 453 memset(&bp, 0, sizeof(bp)); 449 454 ··· 473 484 if (vm->root.bo) 474 485 bp.resv = vm->root.bo->tbo.base.resv; 475 486 476 - r = amdgpu_bo_create_vm(adev, &bp, vmbo); 477 - if (r) 478 - return r; 479 - 480 - bo = &(*vmbo)->bo; 481 - if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) { 482 - (*vmbo)->shadow = NULL; 483 - return 0; 484 - } 485 - 486 - if (!bp.resv) 487 - WARN_ON(dma_resv_lock(bo->tbo.base.resv, 488 - NULL)); 489 - resv = bp.resv; 490 - memset(&bp, 0, sizeof(bp)); 491 - bp.size = amdgpu_vm_pt_size(adev, level); 492 - bp.domain = AMDGPU_GEM_DOMAIN_GTT; 493 - bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; 494 - bp.type = ttm_bo_type_kernel; 495 - bp.resv = bo->tbo.base.resv; 496 - bp.bo_ptr_size = sizeof(struct amdgpu_bo); 497 - bp.xcp_id_plus1 = xcp_id + 1; 498 - 499 - r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); 500 - 501 - if (!resv) 502 - dma_resv_unlock(bo->tbo.base.resv); 503 - 504 - if (r) { 505 - amdgpu_bo_unref(&bo); 506 - return r; 507 - } 508 - 509 - amdgpu_bo_add_to_shadow_list(*vmbo); 510 - 511 - return 0; 487 + return amdgpu_bo_create_vm(adev, &bp, vmbo); 512 488 } 513 489 514 490 /** ··· 523 569 return 0; 524 570 525 571 error_free_pt: 526 - amdgpu_bo_unref(&pt->shadow); 527 572 amdgpu_bo_unref(&pt_bo); 528 573 return r; 529 574 } ··· 534 581 */ 535 582 static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) 536 583 { 537 - struct amdgpu_bo *shadow; 538 - 539 584 if (!entry->bo) 540 585 return; 541 586 542 587 entry->bo->vm_bo = NULL; 543 - shadow = amdgpu_bo_shadowed(entry->bo); 544 - if (shadow) { 545 - ttm_bo_set_bulk_move(&shadow->tbo, NULL); 546 - amdgpu_bo_unref(&shadow); 547 - } 548 588 ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); 549 589 550 590 spin_lock(&entry->vm->status_lock);
+2 -17
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
··· 35 35 */ 36 36 static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table) 37 37 { 38 - int r; 39 - 40 - r = amdgpu_ttm_alloc_gart(&table->bo.tbo); 41 - if (r) 42 - return r; 43 - 44 - if (table->shadow) 45 - r = amdgpu_ttm_alloc_gart(&table->shadow->tbo); 46 - 47 - return r; 38 + return amdgpu_ttm_alloc_gart(&table->bo.tbo); 48 39 } 49 40 50 41 /* Allocate a new job for @count PTE updates */ ··· 256 265 257 266 if (!p->pages_addr) { 258 267 /* set page commands needed */ 259 - if (vmbo->shadow) 260 - amdgpu_vm_sdma_set_ptes(p, vmbo->shadow, pe, addr, 261 - count, incr, flags); 262 268 amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count, 263 269 incr, flags); 264 270 return 0; 265 271 } 266 272 267 273 /* copy commands needed */ 268 - ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw * 269 - (vmbo->shadow ? 2 : 1); 274 + ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw; 270 275 271 276 /* for padding */ 272 277 ndw -= 7; ··· 277 290 pte[i] |= flags; 278 291 } 279 292 280 - if (vmbo->shadow) 281 - amdgpu_vm_sdma_copy_ptes(p, vmbo->shadow, pe, nptes); 282 293 amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes); 283 294 284 295 pe += nptes * 8;
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
··· 180 180 181 181 #define for_each_xcp(xcp_mgr, xcp, i) \ 182 182 for (i = 0, xcp = amdgpu_get_next_xcp(xcp_mgr, &i); xcp; \ 183 - xcp = amdgpu_get_next_xcp(xcp_mgr, &i)) 183 + ++i, xcp = amdgpu_get_next_xcp(xcp_mgr, &i)) 184 184 185 185 #endif
+13 -19
drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
··· 94 94 case AMDGPU_RING_TYPE_VCN_ENC: 95 95 case AMDGPU_RING_TYPE_VCN_JPEG: 96 96 ip_blk = AMDGPU_XCP_VCN; 97 - if (aqua_vanjaram_xcp_vcn_shared(adev)) 98 - inst_mask = 1 << (inst_idx * 2); 99 97 break; 100 98 default: 101 99 DRM_ERROR("Not support ring type %d!", ring->funcs->type); ··· 103 105 for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) { 104 106 if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) { 105 107 ring->xcp_id = xcp_id; 108 + dev_dbg(adev->dev, "ring:%s xcp_id :%u", ring->name, 109 + ring->xcp_id); 106 110 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) 107 111 adev->gfx.enforce_isolation[xcp_id].xcp_id = xcp_id; 108 112 break; ··· 394 394 struct amdgpu_xcp_ip *ip) 395 395 { 396 396 struct amdgpu_device *adev = xcp_mgr->adev; 397 + int num_sdma, num_vcn, num_shared_vcn, num_xcp; 397 398 int num_xcc_xcp, num_sdma_xcp, num_vcn_xcp; 398 - int num_sdma, num_vcn; 399 399 400 400 num_sdma = adev->sdma.num_instances; 401 401 num_vcn = adev->vcn.num_vcn_inst; 402 + num_shared_vcn = 1; 403 + 404 + num_xcc_xcp = adev->gfx.num_xcc_per_xcp; 405 + num_xcp = NUM_XCC(adev->gfx.xcc_mask) / num_xcc_xcp; 402 406 403 407 switch (xcp_mgr->mode) { 404 408 case AMDGPU_SPX_PARTITION_MODE: 405 - num_sdma_xcp = num_sdma; 406 - num_vcn_xcp = num_vcn; 407 - break; 408 409 case AMDGPU_DPX_PARTITION_MODE: 409 - num_sdma_xcp = num_sdma / 2; 410 - num_vcn_xcp = num_vcn / 2; 411 - break; 412 410 case AMDGPU_TPX_PARTITION_MODE: 413 - num_sdma_xcp = num_sdma / 3; 414 - num_vcn_xcp = num_vcn / 3; 415 - break; 416 411 case AMDGPU_QPX_PARTITION_MODE: 417 - num_sdma_xcp = num_sdma / 4; 418 - num_vcn_xcp = num_vcn / 4; 419 - break; 420 412 case AMDGPU_CPX_PARTITION_MODE: 421 - num_sdma_xcp = 2; 422 - num_vcn_xcp = num_vcn ? 1 : 0; 413 + num_sdma_xcp = DIV_ROUND_UP(num_sdma, num_xcp); 414 + num_vcn_xcp = DIV_ROUND_UP(num_vcn, num_xcp); 423 415 break; 424 416 default: 425 417 return -EINVAL; 426 418 } 427 419 428 - num_xcc_xcp = adev->gfx.num_xcc_per_xcp; 420 + if (num_vcn && num_xcp > num_vcn) 421 + num_shared_vcn = num_xcp / num_vcn; 429 422 430 423 switch (ip_id) { 431 424 case AMDGPU_XCP_GFXHUB: ··· 434 441 ip->ip_funcs = &sdma_v4_4_2_xcp_funcs; 435 442 break; 436 443 case AMDGPU_XCP_VCN: 437 - ip->inst_mask = XCP_INST_MASK(num_vcn_xcp, xcp_id); 444 + ip->inst_mask = 445 + XCP_INST_MASK(num_vcn_xcp, xcp_id / num_shared_vcn); 438 446 /* TODO : Assign IP funcs */ 439 447 break; 440 448 default:
+2
drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
··· 1881 1881 return r; 1882 1882 1883 1883 if (!atomic) { 1884 + abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 1884 1885 r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); 1885 1886 if (unlikely(r != 0)) { 1886 1887 amdgpu_bo_unreserve(abo); ··· 2402 2401 return ret; 2403 2402 } 2404 2403 2404 + aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 2405 2405 ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); 2406 2406 amdgpu_bo_unreserve(aobj); 2407 2407 if (ret) {
+2
drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
··· 1931 1931 return r; 1932 1932 1933 1933 if (!atomic) { 1934 + abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 1934 1935 r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); 1935 1936 if (unlikely(r != 0)) { 1936 1937 amdgpu_bo_unreserve(abo); ··· 2486 2485 return ret; 2487 2486 } 2488 2487 2488 + aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 2489 2489 ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); 2490 2490 amdgpu_bo_unreserve(aobj); 2491 2491 if (ret) {
+2
drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
··· 1861 1861 return r; 1862 1862 1863 1863 if (!atomic) { 1864 + abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 1864 1865 r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); 1865 1866 if (unlikely(r != 0)) { 1866 1867 amdgpu_bo_unreserve(abo); ··· 2322 2321 return ret; 2323 2322 } 2324 2323 2324 + aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 2325 2325 ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); 2326 2326 amdgpu_bo_unreserve(aobj); 2327 2327 if (ret) {
+2
drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
··· 1828 1828 return r; 1829 1829 1830 1830 if (!atomic) { 1831 + abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 1831 1832 r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); 1832 1833 if (unlikely(r != 0)) { 1833 1834 amdgpu_bo_unreserve(abo); ··· 2321 2320 return ret; 2322 2321 } 2323 2322 2323 + aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 2324 2324 ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); 2325 2325 amdgpu_bo_unreserve(aobj); 2326 2326 if (ret) {
+11 -3
drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
··· 202 202 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) 203 203 }; 204 204 205 - static const struct soc15_reg_golden golden_settings_gc_12_0[] = { 205 + static const struct soc15_reg_golden golden_settings_gc_12_0_rev0[] = { 206 206 SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f), 207 207 SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000), 208 208 SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020) 209 + }; 210 + 211 + static const struct soc15_reg_golden golden_settings_gc_12_0[] = { 212 + SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x00008000, 0x00008000), 209 213 }; 210 214 211 215 #define DEFAULT_SH_MEM_CONFIG \ ··· 3499 3495 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 3500 3496 case IP_VERSION(12, 0, 0): 3501 3497 case IP_VERSION(12, 0, 1): 3498 + soc15_program_register_sequence(adev, 3499 + golden_settings_gc_12_0, 3500 + (const u32)ARRAY_SIZE(golden_settings_gc_12_0)); 3501 + 3502 3502 if (adev->rev_id == 0) 3503 3503 soc15_program_register_sequence(adev, 3504 - golden_settings_gc_12_0, 3505 - (const u32)ARRAY_SIZE(golden_settings_gc_12_0)); 3504 + golden_settings_gc_12_0_rev0, 3505 + (const u32)ARRAY_SIZE(golden_settings_gc_12_0_rev0)); 3506 3506 break; 3507 3507 default: 3508 3508 break;
+12 -8
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
··· 1701 1701 WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL, 0); 1702 1702 } else { 1703 1703 WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL, 1704 - (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 1704 + (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK | 1705 + CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK | 1706 + CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK | 1707 + CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK | 1708 + CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK | 1709 + CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK | 1710 + CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK | 1711 + CP_MEC_CNTL__MEC_ME1_HALT_MASK | 1712 + CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 1705 1713 adev->gfx.kiq[xcc_id].ring.sched.ready = false; 1706 1714 } 1707 1715 udelay(50); ··· 2248 2240 r = gfx_v9_4_3_xcc_cp_compute_load_microcode(adev, xcc_id); 2249 2241 if (r) 2250 2242 return r; 2243 + } else { 2244 + gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id); 2251 2245 } 2252 2246 2253 2247 r = gfx_v9_4_3_xcc_kiq_resume(adev, xcc_id); ··· 2309 2299 return 0; 2310 2300 } 2311 2301 2312 - static void gfx_v9_4_3_xcc_cp_enable(struct amdgpu_device *adev, bool enable, 2313 - int xcc_id) 2314 - { 2315 - gfx_v9_4_3_xcc_cp_compute_enable(adev, enable, xcc_id); 2316 - } 2317 - 2318 2302 static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id) 2319 2303 { 2320 2304 if (amdgpu_gfx_disable_kcq(adev, xcc_id)) ··· 2340 2336 } 2341 2337 2342 2338 gfx_v9_4_3_xcc_kcq_fini_register(adev, xcc_id); 2343 - gfx_v9_4_3_xcc_cp_enable(adev, false, xcc_id); 2339 + gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id); 2344 2340 } 2345 2341 2346 2342 static int gfx_v9_4_3_hw_init(void *handle)
+1 -1
drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
··· 153 153 WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val); 154 154 } 155 155 156 - //disble imu Rtavfs, SmsRepair, DfllBTC, and ClkB 156 + //disable imu Rtavfs, SmsRepair, DfllBTC, and ClkB 157 157 imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10); 158 158 imu_reg_val |= 0x10007; 159 159 WREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10, imu_reg_val);
+1 -1
drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
··· 161 161 int api_status_off) 162 162 { 163 163 union MESAPI__QUERY_MES_STATUS mes_status_pkt; 164 - signed long timeout = 3000000; /* 3000 ms */ 164 + signed long timeout = 2100000; /* 2100 ms */ 165 165 struct amdgpu_device *adev = mes->adev; 166 166 struct amdgpu_ring *ring = &mes->ring[0]; 167 167 struct MES_API_STATUS *api_status;
+8 -6
drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
··· 146 146 int api_status_off) 147 147 { 148 148 union MESAPI__QUERY_MES_STATUS mes_status_pkt; 149 - signed long timeout = 3000000; /* 3000 ms */ 149 + signed long timeout = 2100000; /* 2100 ms */ 150 150 struct amdgpu_device *adev = mes->adev; 151 151 struct amdgpu_ring *ring = &mes->ring[pipe]; 152 152 spinlock_t *ring_lock = &mes->ring_lock[pipe]; ··· 479 479 union MESAPI__MISC misc_pkt; 480 480 int pipe; 481 481 482 + if (mes->adev->enable_uni_mes) 483 + pipe = AMDGPU_MES_KIQ_PIPE; 484 + else 485 + pipe = AMDGPU_MES_SCHED_PIPE; 486 + 482 487 memset(&misc_pkt, 0, sizeof(misc_pkt)); 483 488 484 489 misc_pkt.header.type = MES_API_TYPE_SCHEDULER; ··· 518 513 misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1; 519 514 break; 520 515 case MES_MISC_OP_SET_SHADER_DEBUGGER: 516 + pipe = AMDGPU_MES_SCHED_PIPE; 521 517 misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER; 522 518 misc_pkt.set_shader_debugger.process_context_addr = 523 519 input->set_shader_debugger.process_context_addr; ··· 535 529 DRM_ERROR("unsupported misc op (%d) \n", input->op); 536 530 return -EINVAL; 537 531 } 538 - 539 - if (mes->adev->enable_uni_mes) 540 - pipe = AMDGPU_MES_KIQ_PIPE; 541 - else 542 - pipe = AMDGPU_MES_SCHED_PIPE; 543 532 544 533 return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, 545 534 &misc_pkt, sizeof(misc_pkt), ··· 609 608 mes_set_hw_res_pkt.disable_mes_log = 1; 610 609 mes_set_hw_res_pkt.use_different_vmid_compute = 1; 611 610 mes_set_hw_res_pkt.enable_reg_active_poll = 1; 611 + mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; 612 612 613 613 /* 614 614 * Keep oversubscribe timer for sdma . When we have unmapped doorbell
+1 -1
drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
··· 365 365 366 366 data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; 367 367 } else { 368 - /* Disbale ASPM L1 */ 368 + /* Disable ASPM L1 */ 369 369 data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK; 370 370 /* Disable ASPM TxL0s */ 371 371 data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
+17
drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
··· 81 81 /* memory training timeout define */ 82 82 #define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000 83 83 84 + #define regMP1_PUB_SCRATCH0 0x3b10090 85 + 84 86 static int psp_v13_0_init_microcode(struct psp_context *psp) 85 87 { 86 88 struct amdgpu_device *adev = psp->adev; ··· 809 807 } 810 808 } 811 809 810 + static bool psp_v13_0_is_aux_sos_load_required(struct psp_context *psp) 811 + { 812 + struct amdgpu_device *adev = psp->adev; 813 + u32 pmfw_ver; 814 + 815 + if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) 816 + return false; 817 + 818 + /* load 4e version of sos if pmfw version less than 85.115.0 */ 819 + pmfw_ver = RREG32(regMP1_PUB_SCRATCH0 / 4); 820 + 821 + return (pmfw_ver < 0x557300); 822 + } 823 + 812 824 static const struct psp_funcs psp_v13_0_funcs = { 813 825 .init_microcode = psp_v13_0_init_microcode, 814 826 .wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state, ··· 846 830 .vbflash_stat = psp_v13_0_vbflash_status, 847 831 .fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk, 848 832 .get_ras_capability = psp_v13_0_get_ras_capability, 833 + .is_aux_sos_load_required = psp_v13_0_is_aux_sos_load_required, 849 834 }; 850 835 851 836 void psp_v13_0_set_psp_funcs(struct psp_context *psp)
+1 -1
drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
··· 710 710 upper_32_bits(wptr_gpu_addr)); 711 711 wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]); 712 712 if (ring->use_pollmem) { 713 - /*wptr polling is not enogh fast, directly clean the wptr register */ 713 + /*wptr polling is not enough fast, directly clean the wptr register */ 714 714 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); 715 715 wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, 716 716 SDMA0_GFX_RB_WPTR_POLL_CNTL,
+5 -2
drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
··· 1080 1080 unsigned bytes = count * 8; 1081 1081 1082 1082 ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | 1083 - SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 1083 + SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | 1084 + SDMA_PKT_COPY_LINEAR_HEADER_CPV(1); 1085 + 1084 1086 ib->ptr[ib->length_dw++] = bytes - 1; 1085 1087 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 1086 1088 ib->ptr[ib->length_dw++] = lower_32_bits(src); 1087 1089 ib->ptr[ib->length_dw++] = upper_32_bits(src); 1088 1090 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 1089 1091 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 1092 + ib->ptr[ib->length_dw++] = 0; 1090 1093 1091 1094 } 1092 1095 ··· 1747 1744 } 1748 1745 1749 1746 static const struct amdgpu_vm_pte_funcs sdma_v7_0_vm_pte_funcs = { 1750 - .copy_pte_num_dw = 7, 1747 + .copy_pte_num_dw = 8, 1751 1748 .copy_pte = sdma_v7_0_vm_copy_pte, 1752 1749 .write_pte = sdma_v7_0_vm_write_pte, 1753 1750 .set_pte_pde = sdma_v7_0_vm_set_pte_pde,
+1 -1
drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c
··· 60 60 { 61 61 u32 data; 62 62 63 - /* CGTT_ROM_CLK_CTRL0 is not availabe for APUs */ 63 + /* CGTT_ROM_CLK_CTRL0 is not available for APUs */ 64 64 if (adev->flags & AMD_IS_APU) 65 65 return; 66 66
+13 -10
drivers/gpu/drm/amd/amdgpu/soc24.c
··· 250 250 adev->nbio.funcs->program_aspm(adev); 251 251 } 252 252 253 - static void soc24_enable_doorbell_aperture(struct amdgpu_device *adev, 254 - bool enable) 255 - { 256 - adev->nbio.funcs->enable_doorbell_aperture(adev, enable); 257 - adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable); 258 - } 259 - 260 253 const struct amdgpu_ip_block_version soc24_common_ip_block = { 261 254 .type = AMD_IP_BLOCK_TYPE_COMMON, 262 255 .major = 1, ··· 447 454 if (amdgpu_sriov_vf(adev)) 448 455 xgpu_nv_mailbox_get_irq(adev); 449 456 457 + /* Enable selfring doorbell aperture late because doorbell BAR 458 + * aperture will change if resize BAR successfully in gmc sw_init. 459 + */ 460 + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true); 461 + 450 462 return 0; 451 463 } 452 464 ··· 489 491 adev->df.funcs->hw_init(adev); 490 492 491 493 /* enable the doorbell aperture */ 492 - soc24_enable_doorbell_aperture(adev, true); 494 + adev->nbio.funcs->enable_doorbell_aperture(adev, true); 493 495 494 496 return 0; 495 497 } ··· 498 500 { 499 501 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 500 502 501 - /* disable the doorbell aperture */ 502 - soc24_enable_doorbell_aperture(adev, false); 503 + /* Disable the doorbell aperture and selfring doorbell aperture 504 + * separately in hw_fini because soc21_enable_doorbell_aperture 505 + * has been removed and there is no need to delay disabling 506 + * selfring doorbell. 507 + */ 508 + adev->nbio.funcs->enable_doorbell_aperture(adev, false); 509 + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false); 503 510 504 511 if (amdgpu_sriov_vf(adev)) 505 512 xgpu_nv_mailbox_put_irq(adev);
-165
drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
··· 1395 1395 } 1396 1396 } 1397 1397 1398 - static int vcn_v4_0_5_limit_sched(struct amdgpu_cs_parser *p, 1399 - struct amdgpu_job *job) 1400 - { 1401 - struct drm_gpu_scheduler **scheds; 1402 - 1403 - /* The create msg must be in the first IB submitted */ 1404 - if (atomic_read(&job->base.entity->fence_seq)) 1405 - return -EINVAL; 1406 - 1407 - /* if VCN0 is harvested, we can't support AV1 */ 1408 - if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) 1409 - return -EINVAL; 1410 - 1411 - scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC] 1412 - [AMDGPU_RING_PRIO_0].sched; 1413 - drm_sched_entity_modify_sched(job->base.entity, scheds, 1); 1414 - return 0; 1415 - } 1416 - 1417 - static int vcn_v4_0_5_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, 1418 - uint64_t addr) 1419 - { 1420 - struct ttm_operation_ctx ctx = { false, false }; 1421 - struct amdgpu_bo_va_mapping *map; 1422 - uint32_t *msg, num_buffers; 1423 - struct amdgpu_bo *bo; 1424 - uint64_t start, end; 1425 - unsigned int i; 1426 - void *ptr; 1427 - int r; 1428 - 1429 - addr &= AMDGPU_GMC_HOLE_MASK; 1430 - r = amdgpu_cs_find_mapping(p, addr, &bo, &map); 1431 - if (r) { 1432 - DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr); 1433 - return r; 1434 - } 1435 - 1436 - start = map->start * AMDGPU_GPU_PAGE_SIZE; 1437 - end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE; 1438 - if (addr & 0x7) { 1439 - DRM_ERROR("VCN messages must be 8 byte aligned!\n"); 1440 - return -EINVAL; 1441 - } 1442 - 1443 - bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 1444 - amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); 1445 - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 1446 - if (r) { 1447 - DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r); 1448 - return r; 1449 - } 1450 - 1451 - r = amdgpu_bo_kmap(bo, &ptr); 1452 - if (r) { 1453 - DRM_ERROR("Failed mapping the VCN message (%d)!\n", r); 1454 - return r; 1455 - } 1456 - 1457 - msg = ptr + addr - start; 1458 - 1459 - /* Check length */ 1460 - if (msg[1] > end - addr) { 1461 - r = -EINVAL; 1462 - goto out; 1463 - } 1464 - 1465 - if (msg[3] != RDECODE_MSG_CREATE) 1466 - goto out; 1467 - 1468 - num_buffers = msg[2]; 1469 - for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) { 1470 - uint32_t offset, size, *create; 1471 - 1472 - if (msg[0] != RDECODE_MESSAGE_CREATE) 1473 - continue; 1474 - 1475 - offset = msg[1]; 1476 - size = msg[2]; 1477 - 1478 - if (offset + size > end) { 1479 - r = -EINVAL; 1480 - goto out; 1481 - } 1482 - 1483 - create = ptr + addr + offset - start; 1484 - 1485 - /* H264, HEVC and VP9 can run on any instance */ 1486 - if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) 1487 - continue; 1488 - 1489 - r = vcn_v4_0_5_limit_sched(p, job); 1490 - if (r) 1491 - goto out; 1492 - } 1493 - 1494 - out: 1495 - amdgpu_bo_kunmap(bo); 1496 - return r; 1497 - } 1498 - 1499 - #define RADEON_VCN_ENGINE_TYPE_ENCODE (0x00000002) 1500 - #define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003) 1501 - 1502 - #define RADEON_VCN_ENGINE_INFO (0x30000001) 1503 - #define RADEON_VCN_ENGINE_INFO_MAX_OFFSET 16 1504 - 1505 - #define RENCODE_ENCODE_STANDARD_AV1 2 1506 - #define RENCODE_IB_PARAM_SESSION_INIT 0x00000003 1507 - #define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET 64 1508 - 1509 - /* return the offset in ib if id is found, -1 otherwise 1510 - * to speed up the searching we only search upto max_offset 1511 - */ 1512 - static int vcn_v4_0_5_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int max_offset) 1513 - { 1514 - int i; 1515 - 1516 - for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i += ib->ptr[i]/4) { 1517 - if (ib->ptr[i + 1] == id) 1518 - return i; 1519 - } 1520 - return -1; 1521 - } 1522 - 1523 - static int vcn_v4_0_5_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, 1524 - struct amdgpu_job *job, 1525 - struct amdgpu_ib *ib) 1526 - { 1527 - struct amdgpu_ring *ring = amdgpu_job_ring(job); 1528 - struct amdgpu_vcn_decode_buffer *decode_buffer; 1529 - uint64_t addr; 1530 - uint32_t val; 1531 - int idx; 1532 - 1533 - /* The first instance can decode anything */ 1534 - if (!ring->me) 1535 - return 0; 1536 - 1537 - /* RADEON_VCN_ENGINE_INFO is at the top of ib block */ 1538 - idx = vcn_v4_0_5_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO, 1539 - RADEON_VCN_ENGINE_INFO_MAX_OFFSET); 1540 - if (idx < 0) /* engine info is missing */ 1541 - return 0; 1542 - 1543 - val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */ 1544 - if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { 1545 - decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6]; 1546 - 1547 - if (!(decode_buffer->valid_buf_flag & 0x1)) 1548 - return 0; 1549 - 1550 - addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 | 1551 - decode_buffer->msg_buffer_address_lo; 1552 - return vcn_v4_0_5_dec_msg(p, job, addr); 1553 - } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) { 1554 - idx = vcn_v4_0_5_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT, 1555 - RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET); 1556 - if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1) 1557 - return vcn_v4_0_5_limit_sched(p, job); 1558 - } 1559 - return 0; 1560 - } 1561 - 1562 1398 static const struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { 1563 1399 .type = AMDGPU_RING_TYPE_VCN_ENC, 1564 1400 .align_mask = 0x3f, ··· 1402 1566 .get_rptr = vcn_v4_0_5_unified_ring_get_rptr, 1403 1567 .get_wptr = vcn_v4_0_5_unified_ring_get_wptr, 1404 1568 .set_wptr = vcn_v4_0_5_unified_ring_set_wptr, 1405 - .patch_cs_in_place = vcn_v4_0_5_ring_patch_cs_in_place, 1406 1569 .emit_frame_size = 1407 1570 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 1408 1571 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+24
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
··· 3540 3540 return debug_map_and_unlock(dqm); 3541 3541 } 3542 3542 3543 + bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm, 3544 + struct qcm_process_device *qpd, 3545 + int doorbell_off, u32 *queue_format) 3546 + { 3547 + struct queue *q; 3548 + bool r = false; 3549 + 3550 + if (!queue_format) 3551 + return r; 3552 + 3553 + dqm_lock(dqm); 3554 + 3555 + list_for_each_entry(q, &qpd->queues_list, list) { 3556 + if (q->properties.doorbell_off == doorbell_off) { 3557 + *queue_format = q->properties.format; 3558 + r = true; 3559 + goto out; 3560 + } 3561 + } 3562 + 3563 + out: 3564 + dqm_unlock(dqm); 3565 + return r; 3566 + } 3543 3567 #if defined(CONFIG_DEBUG_FS) 3544 3568 3545 3569 static void seq_reg_dump(struct seq_file *m,
+3
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
··· 324 324 int debug_lock_and_unmap(struct device_queue_manager *dqm); 325 325 int debug_map_and_unlock(struct device_queue_manager *dqm); 326 326 int debug_refresh_runlist(struct device_queue_manager *dqm); 327 + bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm, 328 + struct qcm_process_device *qpd, 329 + int doorbell_off, u32 *queue_format); 327 330 328 331 static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) 329 332 {
-15
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
··· 306 306 client_id == SOC15_IH_CLIENTID_UTCL2) { 307 307 struct kfd_vm_fault_info info = {0}; 308 308 uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); 309 - uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry); 310 - uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry); 311 - int hub_inst = 0; 312 309 struct kfd_hsa_memory_exception_data exception_data; 313 - 314 - /* gfxhub */ 315 - if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) { 316 - hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev, 317 - node_id); 318 - if (hub_inst < 0) 319 - hub_inst = 0; 320 - } 321 - 322 - /* mmhub */ 323 - if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC) 324 - hub_inst = node_id / 4; 325 310 326 311 info.vmid = vmid; 327 312 info.mc_id = client_id;
+4
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c
··· 341 341 m->sdmax_rlcx_doorbell_offset = 342 342 q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; 343 343 344 + m->sdmax_rlcx_sched_cntl = (amdgpu_sdma_phase_quantum 345 + << SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM__SHIFT) 346 + & SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM_MASK; 347 + 344 348 m->sdma_engine_id = q->sdma_engine_id; 345 349 m->sdma_queue_id = q->sdma_queue_id; 346 350
+28 -2
drivers/gpu/drm/amd/amdkfd/kfd_process.c
··· 270 270 struct kfd_node *dev = NULL; 271 271 struct kfd_process *proc = NULL; 272 272 struct kfd_process_device *pdd = NULL; 273 + int i; 274 + struct kfd_cu_occupancy cu_occupancy[AMDGPU_MAX_QUEUES]; 275 + u32 queue_format; 276 + 277 + memset(cu_occupancy, 0x0, sizeof(cu_occupancy)); 273 278 274 279 pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy); 275 280 dev = pdd->dev; ··· 292 287 /* Collect wave count from device if it supports */ 293 288 wave_cnt = 0; 294 289 max_waves_per_cu = 0; 295 - dev->kfd2kgd->get_cu_occupancy(dev->adev, proc->pasid, &wave_cnt, 296 - &max_waves_per_cu, 0); 290 + 291 + /* 292 + * For GFX 9.4.3, fetch the CU occupancy from the first XCC in the partition. 293 + * For AQL queues, because of cooperative dispatch we multiply the wave count 294 + * by number of XCCs in the partition to get the total wave counts across all 295 + * XCCs in the partition. 296 + * For PM4 queues, there is no cooperative dispatch so wave_cnt stay as it is. 297 + */ 298 + dev->kfd2kgd->get_cu_occupancy(dev->adev, cu_occupancy, 299 + &max_waves_per_cu, ffs(dev->xcc_mask) - 1); 300 + 301 + for (i = 0; i < AMDGPU_MAX_QUEUES; i++) { 302 + if (cu_occupancy[i].wave_cnt != 0 && 303 + kfd_dqm_is_queue_in_process(dev->dqm, &pdd->qpd, 304 + cu_occupancy[i].doorbell_off, 305 + &queue_format)) { 306 + if (unlikely(queue_format == KFD_QUEUE_FORMAT_PM4)) 307 + wave_cnt += cu_occupancy[i].wave_cnt; 308 + else 309 + wave_cnt += (NUM_XCC(dev->xcc_mask) * 310 + cu_occupancy[i].wave_cnt); 311 + } 312 + } 297 313 298 314 /* Translate wave count to number of compute units */ 299 315 cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu;
+1 -1
drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
··· 517 517 if (retval) 518 518 goto err_destroy_queue; 519 519 520 - kfd_procfs_del_queue(pqn->q); 521 520 dqm = pqn->q->device->dqm; 522 521 retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); 523 522 if (retval) { ··· 526 527 if (retval != -ETIME) 527 528 goto err_destroy_queue; 528 529 } 530 + kfd_procfs_del_queue(pqn->q); 529 531 kfd_queue_release_buffers(pdd, &pqn->q->properties); 530 532 pqm_clean_queue_resource(pqm, pqn); 531 533 uninit_queue(pqn->q);
+69 -17
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
··· 808 808 } 809 809 810 810 /** 811 + * dmub_hpd_sense_callback - DMUB HPD sense processing callback. 812 + * @adev: amdgpu_device pointer 813 + * @notify: dmub notification structure 814 + * 815 + * HPD sense changes can occur during low power states and need to be 816 + * notified from firmware to driver. 817 + */ 818 + static void dmub_hpd_sense_callback(struct amdgpu_device *adev, 819 + struct dmub_notification *notify) 820 + { 821 + DRM_DEBUG_DRIVER("DMUB HPD SENSE callback.\n"); 822 + } 823 + 824 + /** 811 825 * register_dmub_notify_callback - Sets callback for DMUB notify 812 826 * @adev: amdgpu_device pointer 813 827 * @type: Type of dmub notification ··· 1771 1757 static enum dmub_ips_disable_type dm_get_default_ips_mode( 1772 1758 struct amdgpu_device *adev) 1773 1759 { 1774 - /* 1775 - * On DCN35 systems with Z8 enabled, it's possible for IPS2 + Z8 to 1776 - * cause a hard hang. A fix exists for newer PMFW. 1777 - * 1778 - * As a workaround, for non-fixed PMFW, force IPS1+RCG as the deepest 1779 - * IPS state in all cases, except for s0ix and all displays off (DPMS), 1780 - * where IPS2 is allowed. 1781 - * 1782 - * When checking pmfw version, use the major and minor only. 1783 - */ 1784 - if (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(3, 5, 0) && 1785 - (adev->pm.fw_version & 0x00FFFF00) < 0x005D6300) 1786 - return DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; 1760 + enum dmub_ips_disable_type ret = DMUB_IPS_ENABLE; 1787 1761 1788 - if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 5, 0)) 1789 - return DMUB_IPS_ENABLE; 1762 + switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { 1763 + case IP_VERSION(3, 5, 0): 1764 + /* 1765 + * On DCN35 systems with Z8 enabled, it's possible for IPS2 + Z8 to 1766 + * cause a hard hang. A fix exists for newer PMFW. 1767 + * 1768 + * As a workaround, for non-fixed PMFW, force IPS1+RCG as the deepest 1769 + * IPS state in all cases, except for s0ix and all displays off (DPMS), 1770 + * where IPS2 is allowed. 1771 + * 1772 + * When checking pmfw version, use the major and minor only. 1773 + */ 1774 + if ((adev->pm.fw_version & 0x00FFFF00) < 0x005D6300) 1775 + ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; 1776 + else if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(11, 5, 0)) 1777 + /* 1778 + * Other ASICs with DCN35 that have residency issues with 1779 + * IPS2 in idle. 1780 + * We want them to use IPS2 only in display off cases. 1781 + */ 1782 + ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; 1783 + break; 1784 + case IP_VERSION(3, 5, 1): 1785 + ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; 1786 + break; 1787 + default: 1788 + /* ASICs older than DCN35 do not have IPSs */ 1789 + if (amdgpu_ip_version(adev, DCE_HWIP, 0) < IP_VERSION(3, 5, 0)) 1790 + ret = DMUB_IPS_DISABLE_ALL; 1791 + break; 1792 + } 1790 1793 1791 - /* ASICs older than DCN35 do not have IPSs */ 1792 - return DMUB_IPS_DISABLE_ALL; 1794 + return ret; 1793 1795 } 1794 1796 1795 1797 static int amdgpu_dm_init(struct amdgpu_device *adev) ··· 3838 3808 DRM_ERROR("amdgpu: fail to register dmub hpd callback"); 3839 3809 return -EINVAL; 3840 3810 } 3811 + 3812 + if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_SENSE_NOTIFY, 3813 + dmub_hpd_sense_callback, true)) { 3814 + DRM_ERROR("amdgpu: fail to register dmub hpd sense callback"); 3815 + return -EINVAL; 3816 + } 3841 3817 } 3842 3818 3843 3819 list_for_each_entry(connector, ··· 4485 4449 4486 4450 #define AMDGPU_DM_DEFAULT_MIN_BACKLIGHT 12 4487 4451 #define AMDGPU_DM_DEFAULT_MAX_BACKLIGHT 255 4452 + #define AMDGPU_DM_MIN_SPREAD ((AMDGPU_DM_DEFAULT_MAX_BACKLIGHT - AMDGPU_DM_DEFAULT_MIN_BACKLIGHT) / 2) 4488 4453 #define AUX_BL_DEFAULT_TRANSITION_TIME_MS 50 4489 4454 4490 4455 static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm, ··· 4500 4463 return; 4501 4464 4502 4465 amdgpu_acpi_get_backlight_caps(&caps); 4466 + 4467 + /* validate the firmware value is sane */ 4468 + if (caps.caps_valid) { 4469 + int spread = caps.max_input_signal - caps.min_input_signal; 4470 + 4471 + if (caps.max_input_signal > AMDGPU_DM_DEFAULT_MAX_BACKLIGHT || 4472 + caps.min_input_signal < 0 || 4473 + spread > AMDGPU_DM_DEFAULT_MAX_BACKLIGHT || 4474 + spread < AMDGPU_DM_MIN_SPREAD) { 4475 + DRM_DEBUG_KMS("DM: Invalid backlight caps: min=%d, max=%d\n", 4476 + caps.min_input_signal, caps.max_input_signal); 4477 + caps.caps_valid = false; 4478 + } 4479 + } 4480 + 4503 4481 if (caps.caps_valid) { 4504 4482 dm->backlight_caps[bl_idx].caps_valid = true; 4505 4483 if (caps.aux_support)
+1 -1
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
··· 50 50 51 51 #define AMDGPU_DM_MAX_NUM_EDP 2 52 52 53 - #define AMDGPU_DMUB_NOTIFICATION_MAX 6 53 + #define AMDGPU_DMUB_NOTIFICATION_MAX 7 54 54 55 55 #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID 0x00001A 56 56 #define AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE 0x40
+2 -2
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
··· 1147 1147 params[count].num_slices_v = aconnector->dsc_settings.dsc_num_slices_v; 1148 1148 params[count].bpp_overwrite = aconnector->dsc_settings.dsc_bits_per_pixel; 1149 1149 params[count].compression_possible = stream->sink->dsc_caps.dsc_dec_caps.is_dsc_supported; 1150 - dc_dsc_get_policy_for_timing(params[count].timing, 0, &dsc_policy); 1150 + dc_dsc_get_policy_for_timing(params[count].timing, 0, &dsc_policy, dc_link_get_highest_encoding_format(stream->link)); 1151 1151 if (!dc_dsc_compute_bandwidth_range( 1152 1152 stream->sink->ctx->dc->res_pool->dscs[0], 1153 1153 stream->sink->ctx->dc->debug.dsc_min_slice_height_override, ··· 1681 1681 { 1682 1682 struct dc_dsc_policy dsc_policy = {0}; 1683 1683 1684 - dc_dsc_get_policy_for_timing(&stream->timing, 0, &dsc_policy); 1684 + dc_dsc_get_policy_for_timing(&stream->timing, 0, &dsc_policy, dc_link_get_highest_encoding_format(stream->link)); 1685 1685 dc_dsc_compute_bandwidth_range(stream->sink->ctx->dc->res_pool->dscs[0], 1686 1686 stream->sink->ctx->dc->debug.dsc_min_slice_height_override, 1687 1687 dsc_policy.min_target_bpp * 16,
+1
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
··· 961 961 else 962 962 domain = AMDGPU_GEM_DOMAIN_VRAM; 963 963 964 + rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 964 965 r = amdgpu_bo_pin(rbo, domain); 965 966 if (unlikely(r != 0)) { 966 967 if (r != -ERESTARTSYS)
+1
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
··· 114 114 115 115 domain = amdgpu_display_supported_domains(adev, rbo->flags); 116 116 117 + rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 117 118 r = amdgpu_bo_pin(rbo, domain); 118 119 if (unlikely(r != 0)) { 119 120 if (r != -ERESTARTSYS)
+1 -1
drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c
··· 569 569 break; 570 570 } 571 571 data->lb_partitions[i] = bw_floor2(bw_div(data->lb_size_per_component[i], data->lb_line_pitch), bw_int_to_fixed(1)); 572 - /*clamp the partitions to the maxium number supported by the lb*/ 572 + /* clamp the partitions to the maximum number supported by the lb */ 573 573 if ((surface_type[i] != bw_def_graphics || dceip->graphics_lb_nodownscaling_multi_line_prefetching == 1)) { 574 574 data->lb_partitions_max[i] = bw_int_to_fixed(10); 575 575 }
+2 -7
drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
··· 59 59 display_count = 0; 60 60 for (i = 0; i < context->stream_count; i++) { 61 61 const struct dc_stream_state *stream = context->streams[i]; 62 + const struct dc_stream_status *stream_status = &context->stream_status[i]; 62 63 63 64 /* Don't count SubVP phantom pipes as part of active 64 65 * display count ··· 67 66 if (dc_state_get_stream_subvp_type(context, stream) == SUBVP_PHANTOM) 68 67 continue; 69 68 70 - /* 71 - * Only notify active stream or virtual stream. 72 - * Need to notify virtual stream to work around 73 - * headless case. HPD does not fire when system is in 74 - * S0i2. 75 - */ 76 - if (!stream->dpms_off || stream->signal == SIGNAL_TYPE_VIRTUAL) 69 + if (!stream->dpms_off || (stream_status && stream_status->plane_count)) 77 70 display_count++; 78 71 } 79 72
+6
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
··· 1222 1222 ctx->dc->debug.disable_dpp_power_gate = false; 1223 1223 ctx->dc->debug.disable_hubp_power_gate = false; 1224 1224 ctx->dc->debug.disable_dsc_power_gate = false; 1225 + 1226 + /* Disable dynamic IPS2 in older PMFW (93.12) for Z8 interop. */ 1227 + if (ctx->dc->config.disable_ips == DMUB_IPS_ENABLE && 1228 + ctx->dce_version == DCN_VERSION_3_5 && 1229 + ((clk_mgr->base.smu_ver & 0x00FFFFFF) <= 0x005d0c00)) 1230 + ctx->dc->config.disable_ips = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; 1225 1231 } else { 1226 1232 /*let's reset the config control flag*/ 1227 1233 ctx->dc->config.disable_ips = DMUB_IPS_DISABLE_ALL; /*pmfw not support it, disable it all*/
+37 -4
drivers/gpu/drm/amd/display/dc/core/dc.c
··· 1767 1767 if (crtc_timing->pix_clk_100hz != pix_clk_100hz) 1768 1768 return false; 1769 1769 1770 - if (!se->funcs->dp_get_pixel_format) 1770 + if (!se || !se->funcs->dp_get_pixel_format) 1771 1771 return false; 1772 1772 1773 1773 if (!se->funcs->dp_get_pixel_format( ··· 2376 2376 return false; 2377 2377 } 2378 2378 2379 - static enum surface_update_type get_plane_info_update_type(const struct dc_surface_update *u) 2379 + static enum surface_update_type get_plane_info_update_type(const struct dc *dc, const struct dc_surface_update *u) 2380 2380 { 2381 2381 union surface_update_flags *update_flags = &u->surface->update_flags; 2382 2382 enum surface_update_type update_type = UPDATE_TYPE_FAST; ··· 2455 2455 /* todo: below are HW dependent, we should add a hook to 2456 2456 * DCE/N resource and validated there. 2457 2457 */ 2458 - if (u->plane_info->tiling_info.gfx9.swizzle != DC_SW_LINEAR) { 2458 + if (!dc->debug.skip_full_updated_if_possible) { 2459 2459 /* swizzled mode requires RQ to be setup properly, 2460 2460 * thus need to run DML to calculate RQ settings 2461 2461 */ ··· 2547 2547 2548 2548 update_flags->raw = 0; // Reset all flags 2549 2549 2550 - type = get_plane_info_update_type(u); 2550 + type = get_plane_info_update_type(dc, u); 2551 2551 elevate_update_type(&overall_type, type); 2552 2552 2553 2553 type = get_scaling_info_update_type(dc, u); ··· 2594 2594 if (u->hdr_mult.value != u->surface->hdr_mult.value) { 2595 2595 update_flags->bits.hdr_mult = 1; 2596 2596 elevate_update_type(&overall_type, UPDATE_TYPE_MED); 2597 + } 2598 + 2599 + if (u->sdr_white_level_nits) 2600 + if (u->sdr_white_level_nits != u->surface->sdr_white_level_nits) { 2601 + update_flags->bits.sdr_white_level_nits = 1; 2602 + elevate_update_type(&overall_type, UPDATE_TYPE_FULL); 2597 2603 } 2598 2604 2599 2605 if (u->cm2_params) { ··· 2881 2875 if (srf_update->hdr_mult.value) 2882 2876 surface->hdr_mult = 2883 2877 srf_update->hdr_mult; 2878 + 2879 + if (srf_update->sdr_white_level_nits) 2880 + surface->sdr_white_level_nits = 2881 + srf_update->sdr_white_level_nits; 2884 2882 2885 2883 if (srf_update->blend_tf) 2886 2884 memcpy(&surface->blend_tf, srf_update->blend_tf, ··· 4689 4679 srf_updates[i].scaling_info || 4690 4680 (srf_updates[i].hdr_mult.value && 4691 4681 srf_updates[i].hdr_mult.value != srf_updates->surface->hdr_mult.value) || 4682 + (srf_updates[i].sdr_white_level_nits && 4683 + srf_updates[i].sdr_white_level_nits != srf_updates->surface->sdr_white_level_nits) || 4692 4684 srf_updates[i].in_transfer_func || 4693 4685 srf_updates[i].func_shaper || 4694 4686 srf_updates[i].lut3d_func || ··· 5753 5741 } 5754 5742 5755 5743 return DC_OK; 5744 + } 5745 + 5746 + /** 5747 + * dc_process_dmub_dpia_set_tps_notification - Submits tps notification 5748 + * 5749 + * @dc: [in] dc structure 5750 + * @link_index: [in] link index 5751 + * @tps: [in] request tps 5752 + * 5753 + * Submits set_tps_notification command to dmub via inbox message 5754 + */ 5755 + void dc_process_dmub_dpia_set_tps_notification(const struct dc *dc, uint32_t link_index, uint8_t tps) 5756 + { 5757 + union dmub_rb_cmd cmd = {0}; 5758 + 5759 + cmd.set_tps_notification.header.type = DMUB_CMD__DPIA; 5760 + cmd.set_tps_notification.header.sub_type = DMUB_CMD__DPIA_SET_TPS_NOTIFICATION; 5761 + cmd.set_tps_notification.tps_notification.instance = dc->links[link_index]->ddc_hw_inst; 5762 + cmd.set_tps_notification.tps_notification.tps = tps; 5763 + 5764 + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); 5756 5765 } 5757 5766 5758 5767 /**
+12 -2
drivers/gpu/drm/amd/display/dc/dc.h
··· 55 55 struct set_config_cmd_payload; 56 56 struct dmub_notification; 57 57 58 - #define DC_VER "3.2.299" 58 + #define DC_VER "3.2.301" 59 59 60 60 #define MAX_SURFACES 3 61 61 #define MAX_PLANES 6 ··· 462 462 bool support_edp0_on_dp1; 463 463 unsigned int enable_fpo_flicker_detection; 464 464 bool disable_hbr_audio_dp2; 465 + bool consolidated_dpia_dp_lt; 465 466 }; 466 467 467 468 enum visual_confirm { ··· 763 762 uint32_t disable_mst_dsc_work_around:1; /* bit 3 */ 764 763 uint32_t enable_force_tbt3_work_around:1; /* bit 4 */ 765 764 uint32_t disable_usb4_pm_support:1; /* bit 5 */ 766 - uint32_t reserved:26; 765 + uint32_t enable_consolidated_dpia_dp_lt:1; /* bit 6 */ 766 + uint32_t reserved:25; 767 767 } bits; 768 768 uint32_t raw; 769 769 }; ··· 1058 1056 unsigned int force_lls; 1059 1057 bool notify_dpia_hr_bw; 1060 1058 bool enable_ips_visual_confirm; 1059 + unsigned int sharpen_policy; 1060 + unsigned int scale_to_sharpness_policy; 1061 + bool skip_full_updated_if_possible; 1061 1062 }; 1062 1063 1063 1064 ··· 1274 1269 uint32_t tmz_changed:1; 1275 1270 uint32_t mcm_transfer_function_enable_change:1; /* disable or enable MCM transfer func */ 1276 1271 uint32_t full_update:1; 1272 + uint32_t sdr_white_level_nits:1; 1277 1273 } bits; 1278 1274 1279 1275 uint32_t raw; ··· 1357 1351 bool adaptive_sharpness_en; 1358 1352 int sharpness_level; 1359 1353 enum linear_light_scaling linear_light_scaling; 1354 + unsigned int sdr_white_level_nits; 1360 1355 }; 1361 1356 1362 1357 struct dc_plane_info { ··· 1515 1508 */ 1516 1509 struct dc_cm2_parameters *cm2_params; 1517 1510 const struct dc_csc_transform *cursor_csc_color_matrix; 1511 + unsigned int sdr_white_level_nits; 1518 1512 }; 1519 1513 1520 1514 /* ··· 2527 2519 uint32_t link_index, 2528 2520 uint8_t mst_alloc_slots, 2529 2521 uint8_t *mst_slots_in_use); 2522 + 2523 + void dc_process_dmub_dpia_set_tps_notification(const struct dc *dc, uint32_t link_index, uint8_t tps); 2530 2524 2531 2525 void dc_process_dmub_dpia_hpd_int_enable(const struct dc *dc, 2532 2526 uint32_t hpd_int_enable);
+12
drivers/gpu/drm/amd/display/dc/dc_dp_types.h
··· 969 969 uint8_t raw; 970 970 }; 971 971 972 + union dpcd_max_uncompressed_pixel_rate_cap { 973 + struct { 974 + uint16_t max_uncompressed_pixel_rate_cap :15; 975 + uint16_t valid :1; 976 + } bits; 977 + uint8_t raw[2]; 978 + }; 979 + 972 980 union dp_fec_capability1 { 973 981 struct { 974 982 uint8_t AGGREGATED_ERROR_COUNTERS_CAPABLE :1; ··· 1178 1170 struct dc_lttpr_caps lttpr_caps; 1179 1171 struct adaptive_sync_caps adaptive_sync_caps; 1180 1172 struct dpcd_usb4_dp_tunneling_info usb4_dp_tun_info; 1173 + union dpcd_max_uncompressed_pixel_rate_cap max_uncompressed_pixel_rate_cap; 1181 1174 1182 1175 union dp_128b_132b_supported_link_rates dp_128b_132b_supported_link_rates; 1183 1176 union dp_main_line_channel_coding_cap channel_coding_cap; ··· 1348 1339 #endif 1349 1340 #ifndef DP_CABLE_ATTRIBUTES_UPDATED_BY_DPTX 1350 1341 #define DP_CABLE_ATTRIBUTES_UPDATED_BY_DPTX 0x110 1342 + #endif 1343 + #ifndef DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP 1344 + #define DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP 0x221c 1351 1345 #endif 1352 1346 #ifndef DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE 1353 1347 #define DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE 0x50
+3 -1
drivers/gpu/drm/amd/display/dc/dc_dsc.h
··· 59 59 uint32_t max_target_bpp_limit_override_x16; 60 60 uint32_t slice_height_granularity; 61 61 uint32_t dsc_force_odm_hslice_override; 62 + bool force_dsc_when_not_needed; 62 63 }; 63 64 64 65 bool dc_dsc_parse_dsc_dpcd(const struct dc *dc, ··· 101 100 */ 102 101 void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing, 103 102 uint32_t max_target_bpp_limit_override_x16, 104 - struct dc_dsc_policy *policy); 103 + struct dc_dsc_policy *policy, 104 + const enum dc_link_encoding_format link_encoding); 105 105 106 106 void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit); 107 107
+6 -8
drivers/gpu/drm/amd/display/dc/dc_spl_translate.c
··· 186 186 187 187 spl_in->h_active = pipe_ctx->plane_res.scl_data.h_active; 188 188 spl_in->v_active = pipe_ctx->plane_res.scl_data.v_active; 189 + 190 + spl_in->debug.sharpen_policy = (enum sharpen_policy)pipe_ctx->stream->ctx->dc->debug.sharpen_policy; 191 + spl_in->debug.scale_to_sharpness_policy = 192 + (enum scale_to_sharpness_policy)pipe_ctx->stream->ctx->dc->debug.scale_to_sharpness_policy; 193 + 189 194 /* Check if it is stream is in fullscreen and if its HDR. 190 195 * Use this to determine sharpness levels 191 196 */ 192 197 spl_in->is_fullscreen = dm_helpers_is_fullscreen(pipe_ctx->stream->ctx, pipe_ctx->stream); 193 198 spl_in->is_hdr_on = dm_helpers_is_hdr_on(pipe_ctx->stream->ctx, pipe_ctx->stream); 194 - spl_in->hdr_multx100 = 0; 195 - if (spl_in->is_hdr_on) { 196 - spl_in->hdr_multx100 = (uint32_t)dc_fixpt_floor(dc_fixpt_mul(plane_state->hdr_mult, 197 - dc_fixpt_from_int(100))); 198 - /* Disable sharpness for HDR Mult > 6.0 */ 199 - if (spl_in->hdr_multx100 > 600) 200 - spl_in->adaptive_sharpness.enable = false; 201 - } 199 + spl_in->sdr_white_level_nits = plane_state->sdr_white_level_nits; 202 200 } 203 201 204 202 /// @brief Translate SPL output parameters to pipe context
-3
drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
··· 313 313 314 314 if (swath_height_c > 0) 315 315 log2_swath_height_c = dml_log2(swath_height_c); 316 - 317 - if (req128_c && log2_swath_height_c > 0) 318 - log2_swath_height_c -= 1; 319 316 } 320 317 321 318 rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l;
-3
drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
··· 313 313 314 314 if (swath_height_c > 0) 315 315 log2_swath_height_c = dml_log2(swath_height_c); 316 - 317 - if (req128_c && log2_swath_height_c > 0) 318 - log2_swath_height_c -= 1; 319 316 } 320 317 321 318 rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l;
-9
drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
··· 1924 1924 *PixelPTEReqWidth = 32768.0 / BytePerPixel; 1925 1925 *PTERequestSize = 64; 1926 1926 FractionOfPTEReturnDrop = 0; 1927 - } else if (MacroTileSizeBytes == 4096) { 1928 - PixelPTEReqHeightPTEs = 1; 1929 - *PixelPTEReqHeight = MacroTileHeight; 1930 - *PixelPTEReqWidth = 8 * *MacroTileWidth; 1931 - *PTERequestSize = 64; 1932 - if (ScanDirection != dm_vert) 1933 - FractionOfPTEReturnDrop = 0; 1934 - else 1935 - FractionOfPTEReturnDrop = 7.0 / 8; 1936 1927 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) { 1937 1928 PixelPTEReqHeightPTEs = 16; 1938 1929 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
+3 -3
drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
··· 8926 8926 8927 8927 // The prefetch scheduling should only be calculated once as per AllowForPStateChangeOrStutterInVBlank requirement 8928 8928 // If the AllowForPStateChangeOrStutterInVBlank requirement is not strict (i.e. only try those power saving feature 8929 - // if possible, then will try to program for the best power saving features in order of diffculty (dram, fclk, stutter) 8929 + // if possible, then will try to program for the best power saving features in order of difficulty (dram, fclk, stutter) 8930 8930 s->iteration = 0; 8931 8931 s->MaxTotalRDBandwidth = 0; 8932 8932 s->AllPrefetchModeTested = false; ··· 9977 9977 dml_print("DML_DLG: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes); 9978 9978 #endif 9979 9979 9980 - // just suppluy with enough parameters to calculate meta and dte 9980 + // just supply with enough parameters to calculate meta and dte 9981 9981 CalculateVMAndRowBytes( 9982 9982 0, // dml_bool_t ViewportStationary, 9983 9983 1, // dml_bool_t DCCEnable, ··· 10110 10110 /// Note: In this function, it is assumed that DCFCLK, SOCCLK freq are the state values, and mode_program will just use the DML calculated DPPCLK and DISPCLK 10111 10111 /// @param mode_lib mode_lib data struct that house all the input/output/bbox and calculation values. 10112 10112 /// @param state_idx Power state idx chosen 10113 - /// @param display_cfg Display Congiuration 10113 + /// @param display_cfg Display Configuration 10114 10114 /// @param call_standalone Calling mode_programming without calling mode support. Some of the "support" struct member will be pre-calculated before doing mode programming 10115 10115 /// TODO: Add clk_cfg input, could be useful for standalone mode 10116 10116 dml_bool_t dml_mode_programming(
+3 -1
drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
··· 858 858 859 859 plane->immediate_flip = plane_state->flip_immediate; 860 860 861 - plane->composition.rect_out_height_spans_vactive = plane_state->dst_rect.height >= stream->timing.v_addressable; 861 + plane->composition.rect_out_height_spans_vactive = 862 + plane_state->dst_rect.height >= stream->timing.v_addressable && 863 + stream->dst.height >= stream->timing.v_addressable; 862 864 } 863 865 864 866 //TODO : Could be possibly moved to a common helper layer.
+5 -3
drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c
··· 940 940 /* find synchronizable timing groups */ 941 941 for (j = i + 1; j < display_config->display_config.num_streams; j++) { 942 942 if (memcmp(master_timing, 943 - &display_config->display_config.stream_descriptors[j].timing, 944 - sizeof(struct dml2_timing_cfg)) == 0 && 945 - display_config->display_config.stream_descriptors[i].output.output_encoder == display_config->display_config.stream_descriptors[j].output.output_encoder) { 943 + &display_config->display_config.stream_descriptors[j].timing, 944 + sizeof(struct dml2_timing_cfg)) == 0 && 945 + display_config->display_config.stream_descriptors[i].output.output_encoder == display_config->display_config.stream_descriptors[j].output.output_encoder && 946 + (display_config->display_config.stream_descriptors[i].output.output_encoder != dml2_hdmi || //hdmi requires formats match 947 + display_config->display_config.stream_descriptors[i].output.output_format == display_config->display_config.stream_descriptors[j].output.output_format)) { 946 948 set_bit_in_bitfield(&pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], j); 947 949 set_bit_in_bitfield(&stream_mapped_mask, j); 948 950 }
+8 -7
drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
··· 668 668 */ 669 669 static bool decide_dsc_target_bpp_x16( 670 670 const struct dc_dsc_policy *policy, 671 + const struct dc_dsc_config_options *options, 671 672 const struct dsc_enc_caps *dsc_common_caps, 672 673 const int target_bandwidth_kbps, 673 674 const struct dc_crtc_timing *timing, ··· 683 682 if (decide_dsc_bandwidth_range(policy->min_target_bpp * 16, policy->max_target_bpp * 16, 684 683 num_slices_h, dsc_common_caps, timing, link_encoding, &range)) { 685 684 if (target_bandwidth_kbps >= range.stream_kbps) { 686 - if (policy->enable_dsc_when_not_needed) 685 + if (policy->enable_dsc_when_not_needed || options->force_dsc_when_not_needed) 687 686 /* enable max bpp even dsc is not needed */ 688 687 *target_bpp_x16 = range.max_target_bpp_x16; 689 688 } else if (target_bandwidth_kbps >= range.max_kbps) { ··· 883 882 884 883 memset(dsc_cfg, 0, sizeof(struct dc_dsc_config)); 885 884 886 - dc_dsc_get_policy_for_timing(timing, options->max_target_bpp_limit_override_x16, &policy); 885 + dc_dsc_get_policy_for_timing(timing, options->max_target_bpp_limit_override_x16, &policy, link_encoding); 887 886 pic_width = timing->h_addressable + timing->h_border_left + timing->h_border_right; 888 887 pic_height = timing->v_addressable + timing->v_border_top + timing->v_border_bottom; 889 888 ··· 1081 1080 if (target_bandwidth_kbps > 0) { 1082 1081 is_dsc_possible = decide_dsc_target_bpp_x16( 1083 1082 &policy, 1083 + options, 1084 1084 &dsc_common_caps, 1085 1085 target_bandwidth_kbps, 1086 1086 timing, ··· 1173 1171 1174 1172 void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing, 1175 1173 uint32_t max_target_bpp_limit_override_x16, 1176 - struct dc_dsc_policy *policy) 1174 + struct dc_dsc_policy *policy, 1175 + const enum dc_link_encoding_format link_encoding) 1177 1176 { 1178 1177 uint32_t bpc = 0; 1179 1178 ··· 1238 1235 policy->max_target_bpp = max_target_bpp_limit_override_x16 / 16; 1239 1236 1240 1237 /* enable DSC when not needed, default false */ 1241 - if (dsc_policy_enable_dsc_when_not_needed) 1242 - policy->enable_dsc_when_not_needed = dsc_policy_enable_dsc_when_not_needed; 1243 - else 1244 - policy->enable_dsc_when_not_needed = false; 1238 + policy->enable_dsc_when_not_needed = dsc_policy_enable_dsc_when_not_needed; 1245 1239 } 1246 1240 1247 1241 void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit) ··· 1267 1267 options->dsc_force_odm_hslice_override = dc->debug.force_odm_combine; 1268 1268 options->max_target_bpp_limit_override_x16 = 0; 1269 1269 options->slice_height_granularity = 1; 1270 + options->force_dsc_when_not_needed = false; 1270 1271 }
+1
drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c
··· 545 545 DCHUBBUB_ARB_MAX_REQ_OUTSTAND, 256, 546 546 DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 256); 547 547 548 + memset(&hubbub2->watermarks.a.cstate_pstate, 0, sizeof(hubbub2->watermarks.a.cstate_pstate)); 548 549 } 549 550 550 551 /*static void hubbub35_set_request_limit(struct hubbub *hubbub,
+63 -6
drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
··· 57 57 #include "panel_cntl.h" 58 58 #include "dc_state_priv.h" 59 59 #include "dpcd_defs.h" 60 + #include "dsc.h" 60 61 /* include DCE11 register header files */ 61 62 #include "dce/dce_11_0_d.h" 62 63 #include "dce/dce_11_0_sh_mask.h" ··· 1824 1823 } 1825 1824 } 1826 1825 1826 + static void clean_up_dsc_blocks(struct dc *dc) 1827 + { 1828 + struct display_stream_compressor *dsc = NULL; 1829 + struct timing_generator *tg = NULL; 1830 + struct stream_encoder *se = NULL; 1831 + struct dccg *dccg = dc->res_pool->dccg; 1832 + struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl; 1833 + int i; 1834 + 1835 + if (dc->ctx->dce_version != DCN_VERSION_3_5 && 1836 + dc->ctx->dce_version != DCN_VERSION_3_51) 1837 + return; 1838 + 1839 + for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) { 1840 + struct dcn_dsc_state s = {0}; 1841 + 1842 + dsc = dc->res_pool->dscs[i]; 1843 + dsc->funcs->dsc_read_state(dsc, &s); 1844 + if (s.dsc_fw_en) { 1845 + /* disable DSC in OPTC */ 1846 + if (i < dc->res_pool->timing_generator_count) { 1847 + tg = dc->res_pool->timing_generators[i]; 1848 + tg->funcs->set_dsc_config(tg, OPTC_DSC_DISABLED, 0, 0); 1849 + } 1850 + /* disable DSC in stream encoder */ 1851 + if (i < dc->res_pool->stream_enc_count) { 1852 + se = dc->res_pool->stream_enc[i]; 1853 + se->funcs->dp_set_dsc_config(se, OPTC_DSC_DISABLED, 0, 0); 1854 + se->funcs->dp_set_dsc_pps_info_packet(se, false, NULL, true); 1855 + } 1856 + /* disable DSC block */ 1857 + if (dccg->funcs->set_ref_dscclk) 1858 + dccg->funcs->set_ref_dscclk(dccg, dsc->inst); 1859 + dsc->funcs->dsc_disable(dsc); 1860 + 1861 + /* power down DSC */ 1862 + if (pg_cntl != NULL) 1863 + pg_cntl->funcs->dsc_pg_control(pg_cntl, dsc->inst, false); 1864 + } 1865 + } 1866 + } 1867 + 1827 1868 /* 1828 1869 * When ASIC goes from VBIOS/VGA mode to driver/accelerated mode we need: 1829 1870 * 1. Power down all DC HW blocks ··· 1970 1927 clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr); 1971 1928 1972 1929 power_down_all_hw_blocks(dc); 1930 + 1931 + /* DSC could be enabled on eDP during VBIOS post. 1932 + * To clean up dsc blocks if eDP is in link but not active. 1933 + */ 1934 + if (edp_link_with_sink && (edp_stream_num == 0)) 1935 + clean_up_dsc_blocks(dc); 1936 + 1973 1937 disable_vga_and_power_gate_all_controllers(dc); 1974 1938 if (edp_link_with_sink && !keep_edp_vdd_on) 1975 1939 dc->hwss.edp_power_control(edp_link_with_sink, false); ··· 2096 2046 * as well. 2097 2047 */ 2098 2048 for (i = 0; i < num_pipes; i++) { 2099 - pipe_ctx[i]->stream_res.tg->funcs->set_drr( 2100 - pipe_ctx[i]->stream_res.tg, &params); 2049 + /* dc_state_destruct() might null the stream resources, so fetch tg 2050 + * here first to avoid a race condition. The lifetime of the pointee 2051 + * itself (the timing_generator object) is not a problem here. 2052 + */ 2053 + struct timing_generator *tg = pipe_ctx[i]->stream_res.tg; 2101 2054 2102 - if (adjust.v_total_max != 0 && adjust.v_total_min != 0) 2103 - pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( 2104 - pipe_ctx[i]->stream_res.tg, 2105 - event_triggers, num_frames); 2055 + if ((tg != NULL) && tg->funcs) { 2056 + if (tg->funcs->set_drr) 2057 + tg->funcs->set_drr(tg, &params); 2058 + if (adjust.v_total_max != 0 && adjust.v_total_min != 0) 2059 + if (tg->funcs->set_static_screen_control) 2060 + tg->funcs->set_static_screen_control( 2061 + tg, event_triggers, num_frames); 2062 + } 2106 2063 } 2107 2064 } 2108 2065
+1 -1
drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
··· 455 455 struct mcif_wb *mcif_wb; 456 456 struct mcif_warmup_params warmup_params = {0}; 457 457 unsigned int i, i_buf; 458 - /*make sure there is no active DWB eanbled */ 458 + /* make sure there is no active DWB enabled */ 459 459 for (i = 0; i < num_dwb; i++) { 460 460 dwb = dc->res_pool->dwbc[wb_info[i].dwb_pipe_inst]; 461 461 if (dwb->dwb_is_efc_transition || dwb->dwb_is_drc) {
+14
drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
··· 1032 1032 struct dsc_config dsc_cfg; 1033 1033 struct dsc_optc_config dsc_optc_cfg = {0}; 1034 1034 enum optc_dsc_mode optc_dsc_mode; 1035 + struct dcn_dsc_state dsc_state = {0}; 1036 + 1037 + if (!dsc) { 1038 + DC_LOG_DSC("DSC is NULL for tg instance %d:", pipe_ctx->stream_res.tg->inst); 1039 + return; 1040 + } 1041 + 1042 + if (dsc->funcs->dsc_read_state) { 1043 + dsc->funcs->dsc_read_state(dsc, &dsc_state); 1044 + if (!dsc_state.dsc_fw_en) { 1045 + DC_LOG_DSC("DSC has been disabled for tg instance %d:", pipe_ctx->stream_res.tg->inst); 1046 + return; 1047 + } 1048 + } 1035 1049 1036 1050 /* Enable DSC hw block */ 1037 1051 dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt;
+13
drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
··· 334 334 struct dsc_config dsc_cfg; 335 335 struct dsc_optc_config dsc_optc_cfg = {0}; 336 336 enum optc_dsc_mode optc_dsc_mode; 337 + struct dcn_dsc_state dsc_state = {0}; 337 338 339 + if (!dsc) { 340 + DC_LOG_DSC("DSC is NULL for tg instance %d:", pipe_ctx->stream_res.tg->inst); 341 + return; 342 + } 343 + 344 + if (dsc->funcs->dsc_read_state) { 345 + dsc->funcs->dsc_read_state(dsc, &dsc_state); 346 + if (!dsc_state.dsc_fw_en) { 347 + DC_LOG_DSC("DSC has been disabled for tg instance %d:", pipe_ctx->stream_res.tg->inst); 348 + return; 349 + } 350 + } 338 351 /* Enable DSC hw block */ 339 352 dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt; 340 353 dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom;
+1
drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
··· 2155 2155 2156 2156 dc->dml2_options.max_segments_per_hubp = 24; 2157 2157 dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE;/*todo*/ 2158 + dc->dml2_options.override_det_buffer_size_kbytes = true; 2158 2159 2159 2160 if (dc->config.sdpif_request_limit_words_per_umc == 0) 2160 2161 dc->config.sdpif_request_limit_words_per_umc = 16;/*todo*/
+2 -1
drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
··· 736 736 .hdmichar = true, 737 737 .dpstream = true, 738 738 .symclk32_se = true, 739 - .symclk32_le = true, 739 + .symclk32_le = false, 740 740 .symclk_fe = true, 741 741 .physymclk = false, 742 742 .dpiasymclk = true, ··· 2133 2133 2134 2134 dc->dml2_options.max_segments_per_hubp = 24; 2135 2135 dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE;/*todo*/ 2136 + dc->dml2_options.override_det_buffer_size_kbytes = true; 2136 2137 2137 2138 if (dc->config.sdpif_request_limit_words_per_umc == 0) 2138 2139 dc->config.sdpif_request_limit_words_per_umc = 16;/*todo*/
+39 -15
drivers/gpu/drm/amd/display/dc/spl/dc_spl.c
··· 813 813 return skip_easf; 814 814 } 815 815 816 + /* Check if video is in fullscreen mode */ 817 + static bool spl_is_video_fullscreen(struct spl_in *spl_in) 818 + { 819 + if (spl_is_yuv420(spl_in->basic_in.format) && spl_in->is_fullscreen) 820 + return true; 821 + return false; 822 + } 823 + 816 824 static bool spl_get_isharp_en(struct spl_in *spl_in, 817 825 struct spl_scratch *spl_scratch) 818 826 { ··· 828 820 int vratio = 0; 829 821 int hratio = 0; 830 822 struct spl_taps taps = spl_scratch->scl_data.taps; 823 + bool fullscreen = spl_is_video_fullscreen(spl_in); 831 824 832 825 /* Return if adaptive sharpness is disabled */ 833 826 if (spl_in->adaptive_sharpness.enable == false) ··· 844 835 // Scaling is up to 1:1 (no scaling) or upscaling 845 836 846 837 /* 847 - * Apply sharpness to all RGB surfaces and to 848 - * NV12/P010 surfaces 838 + * Apply sharpness to RGB and YUV (NV12/P010) 839 + * surfaces based on policy setting 849 840 */ 841 + if (!spl_is_yuv420(spl_in->basic_in.format) && 842 + (spl_in->debug.sharpen_policy == SHARPEN_YUV)) 843 + return enable_isharp; 844 + else if ((spl_is_yuv420(spl_in->basic_in.format) && !fullscreen) && 845 + (spl_in->debug.sharpen_policy == SHARPEN_RGB_FULLSCREEN_YUV)) 846 + return enable_isharp; 847 + else if (!spl_in->is_fullscreen && 848 + spl_in->debug.sharpen_policy == SHARPEN_FULLSCREEN_ALL) 849 + return enable_isharp; 850 850 851 851 /* 852 852 * Apply sharpness if supports horizontal taps 4,6 AND ··· 1173 1155 } 1174 1156 1175 1157 /* Calculate C0-C3 coefficients based on HDR_mult */ 1176 - static void spl_calculate_c0_c3_hdr(struct dscl_prog_data *dscl_prog_data, uint32_t hdr_multx100) 1158 + static void spl_calculate_c0_c3_hdr(struct dscl_prog_data *dscl_prog_data, uint32_t sdr_white_level_nits) 1177 1159 { 1178 1160 struct spl_fixed31_32 hdr_mult, c0_mult, c1_mult, c2_mult; 1179 1161 struct spl_fixed31_32 c0_calc, c1_calc, c2_calc; 1180 1162 struct spl_custom_float_format fmt; 1163 + uint32_t hdr_multx100_int; 1181 1164 1182 - SPL_ASSERT(hdr_multx100); 1183 - hdr_mult = spl_fixpt_from_fraction((long long)hdr_multx100, 100LL); 1165 + if ((sdr_white_level_nits >= 80) && (sdr_white_level_nits <= 480)) 1166 + hdr_multx100_int = sdr_white_level_nits * 100 / 80; 1167 + else 1168 + hdr_multx100_int = 100; /* default for 80 nits otherwise */ 1169 + 1170 + hdr_mult = spl_fixpt_from_fraction((long long)hdr_multx100_int, 100LL); 1184 1171 c0_mult = spl_fixpt_from_fraction(2126LL, 10000LL); 1185 1172 c1_mult = spl_fixpt_from_fraction(7152LL, 10000LL); 1186 1173 c2_mult = spl_fixpt_from_fraction(722LL, 10000LL); ··· 1214 1191 static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *spl_out, bool enable_easf_v, 1215 1192 bool enable_easf_h, enum linear_light_scaling lls_pref, 1216 1193 enum spl_pixel_format format, enum system_setup setup, 1217 - uint32_t hdr_multx100) 1194 + uint32_t sdr_white_level_nits) 1218 1195 { 1219 1196 struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data; 1220 1197 if (enable_easf_v) { ··· 1522 1499 dscl_prog_data->easf_ltonl_en = 1; // Linear input 1523 1500 if ((setup == HDR_L) && (spl_is_rgb8(format))) { 1524 1501 /* Calculate C0-C3 coefficients based on HDR multiplier */ 1525 - spl_calculate_c0_c3_hdr(dscl_prog_data, hdr_multx100); 1502 + spl_calculate_c0_c3_hdr(dscl_prog_data, sdr_white_level_nits); 1526 1503 } else { // HDR_L ( DWM ) and SDR_L 1527 1504 dscl_prog_data->easf_matrix_c0 = 1528 1505 0x4EF7; // fp1.5.10, C0 coefficient (LN_rec709: 0.2126 * (2^14)/125 = 27.86590720) ··· 1580 1557 struct adaptive_sharpness adp_sharpness, bool enable_isharp, 1581 1558 enum linear_light_scaling lls_pref, enum spl_pixel_format format, 1582 1559 const struct spl_scaler_data *data, struct spl_fixed31_32 ratio, 1583 - enum system_setup setup) 1560 + enum system_setup setup, enum scale_to_sharpness_policy scale_to_sharpness_policy) 1584 1561 { 1585 1562 /* Turn off sharpener if not required */ 1586 1563 if (!enable_isharp) { 1587 1564 dscl_prog_data->isharp_en = 0; 1588 1565 return; 1589 1566 } 1567 + 1568 + spl_build_isharp_1dlut_from_reference_curve(ratio, setup, adp_sharpness, 1569 + scale_to_sharpness_policy); 1570 + dscl_prog_data->isharp_delta = spl_get_pregen_filter_isharp_1D_lut(setup); 1571 + dscl_prog_data->sharpness_level = adp_sharpness.sharpness_level; 1590 1572 1591 1573 dscl_prog_data->isharp_en = 1; // ISHARP_EN 1592 1574 // Set ISHARP_NOISEDET_MODE if htaps = 6-tap ··· 1690 1662 dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format 1691 1663 } 1692 1664 1693 - 1694 - spl_build_isharp_1dlut_from_reference_curve(ratio, setup, adp_sharpness); 1695 - dscl_prog_data->isharp_delta = spl_get_pregen_filter_isharp_1D_lut(setup); 1696 - dscl_prog_data->sharpness_level = adp_sharpness.sharpness_level; 1697 - 1698 1665 // Program the nldelta soft clip values 1699 1666 if (lls_pref == LLS_PREF_YES) { 1700 1667 dscl_prog_data->isharp_nldelta_sclip.enable_p = 0; /* ISHARP_NLDELTA_SCLIP_EN_P */ ··· 1773 1750 1774 1751 // Set EASF 1775 1752 spl_set_easf_data(&spl_scratch, spl_out, enable_easf_v, enable_easf_h, spl_in->lls_pref, 1776 - spl_in->basic_in.format, setup, spl_in->hdr_multx100); 1753 + spl_in->basic_in.format, setup, spl_in->sdr_white_level_nits); 1777 1754 1778 1755 // Set iSHARP 1779 1756 vratio = spl_fixpt_ceil(spl_scratch.scl_data.ratios.vert); ··· 1784 1761 isharp_scale_ratio = spl_scratch.scl_data.recip_ratios.horz; 1785 1762 1786 1763 spl_set_isharp_data(spl_out->dscl_prog_data, spl_in->adaptive_sharpness, enable_isharp, 1787 - spl_in->lls_pref, spl_in->basic_in.format, data, isharp_scale_ratio, setup); 1764 + spl_in->lls_pref, spl_in->basic_in.format, data, isharp_scale_ratio, setup, 1765 + spl_in->debug.scale_to_sharpness_policy); 1788 1766 1789 1767 return res; 1790 1768 }
+79 -6
drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c
··· 500 500 }, 501 501 }; 502 502 503 + struct scale_ratio_to_sharpness_level_adj sharpness_level_adj[NUM_SHARPNESS_ADJ_LEVELS] = { 504 + {1125, 1000, 0}, 505 + {11, 10, 1}, 506 + {1075, 1000, 2}, 507 + {105, 100, 3}, 508 + {1025, 1000, 4}, 509 + {1, 1, 5}, 510 + }; 511 + 503 512 const uint32_t *spl_get_filter_isharp_1D_lut_0(void) 504 513 { 505 514 return filter_isharp_1D_lut_0; ··· 550 541 return filter_isharp_bs_3tap_64p_s1_12; 551 542 } 552 543 553 - static unsigned int spl_calculate_sharpness_level(int discrete_sharpness_level, enum system_setup setup, 554 - struct spl_sharpness_range sharpness_range) 544 + static unsigned int spl_calculate_sharpness_level_adj(struct spl_fixed31_32 ratio) 545 + { 546 + int j; 547 + struct spl_fixed31_32 ratio_level; 548 + struct scale_ratio_to_sharpness_level_adj *lookup_ptr; 549 + unsigned int sharpness_level_down_adj; 550 + 551 + /* 552 + * Adjust sharpness level based on current scaling ratio 553 + * 554 + * We have 5 discrete scaling ratios which we will use to adjust the 555 + * sharpness level down by 1 as we pass each ratio. The ratios 556 + * are 557 + * 558 + * 1.125 upscale and higher - no adj 559 + * 1.100 - under 1.125 - adj level down 1 560 + * 1.075 - under 1.100 - adj level down 2 561 + * 1.050 - under 1.075 - adj level down 3 562 + * 1.025 - under 1.050 - adj level down 4 563 + * 1.000 - under 1.025 - adj level down 5 564 + * 565 + */ 566 + j = 0; 567 + sharpness_level_down_adj = 0; 568 + lookup_ptr = sharpness_level_adj; 569 + while (j < NUM_SHARPNESS_ADJ_LEVELS) { 570 + ratio_level = spl_fixpt_from_fraction(lookup_ptr->ratio_numer, 571 + lookup_ptr->ratio_denom); 572 + if (ratio.value >= ratio_level.value) { 573 + sharpness_level_down_adj = lookup_ptr->level_down_adj; 574 + break; 575 + } 576 + lookup_ptr++; 577 + j++; 578 + } 579 + return sharpness_level_down_adj; 580 + } 581 + 582 + static unsigned int spl_calculate_sharpness_level(struct spl_fixed31_32 ratio, 583 + int discrete_sharpness_level, enum system_setup setup, 584 + struct spl_sharpness_range sharpness_range, 585 + enum scale_to_sharpness_policy scale_to_sharpness_policy) 555 586 { 556 587 unsigned int sharpness_level = 0; 588 + unsigned int sharpness_level_down_adj = 0; 557 589 558 590 int min_sharpness, max_sharpness, mid_sharpness; 559 591 592 + /* 593 + * Adjust sharpness level if policy requires we adjust it based on 594 + * scale ratio. Based on scale ratio, we may adjust the sharpness 595 + * level down by a certain number of steps. We will not select 596 + * a sharpness value of 0 so the lowest sharpness level will be 597 + * 0 or 1 depending on what the min_sharpness is 598 + * 599 + * If the policy is no required, this code maybe removed at a later 600 + * date 601 + */ 560 602 switch (setup) { 561 603 562 604 case HDR_L: 563 605 min_sharpness = sharpness_range.hdr_rgb_min; 564 606 max_sharpness = sharpness_range.hdr_rgb_max; 565 607 mid_sharpness = sharpness_range.hdr_rgb_mid; 608 + if (scale_to_sharpness_policy == SCALE_TO_SHARPNESS_ADJ_ALL) 609 + sharpness_level_down_adj = spl_calculate_sharpness_level_adj(ratio); 566 610 break; 567 611 case HDR_NL: 568 612 /* currently no use case, use Non-linear SDR values for now */ ··· 623 561 min_sharpness = sharpness_range.sdr_yuv_min; 624 562 max_sharpness = sharpness_range.sdr_yuv_max; 625 563 mid_sharpness = sharpness_range.sdr_yuv_mid; 564 + if (scale_to_sharpness_policy >= SCALE_TO_SHARPNESS_ADJ_YUV) 565 + sharpness_level_down_adj = spl_calculate_sharpness_level_adj(ratio); 626 566 break; 627 567 case SDR_L: 628 568 default: 629 569 min_sharpness = sharpness_range.sdr_rgb_min; 630 570 max_sharpness = sharpness_range.sdr_rgb_max; 631 571 mid_sharpness = sharpness_range.sdr_rgb_mid; 572 + if (scale_to_sharpness_policy == SCALE_TO_SHARPNESS_ADJ_ALL) 573 + sharpness_level_down_adj = spl_calculate_sharpness_level_adj(ratio); 632 574 break; 633 575 } 576 + 577 + if ((min_sharpness == 0) && (sharpness_level_down_adj >= discrete_sharpness_level)) 578 + discrete_sharpness_level = 1; 579 + else if (sharpness_level_down_adj >= discrete_sharpness_level) 580 + discrete_sharpness_level = 0; 581 + else 582 + discrete_sharpness_level -= sharpness_level_down_adj; 634 583 635 584 int lower_half_step_size = (mid_sharpness - min_sharpness) / 5; 636 585 int upper_half_step_size = (max_sharpness - mid_sharpness) / 5; ··· 657 584 } 658 585 659 586 void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup, 660 - struct adaptive_sharpness sharpness) 587 + struct adaptive_sharpness sharpness, enum scale_to_sharpness_policy scale_to_sharpness_policy) 661 588 { 662 589 uint8_t *byte_ptr_1dlut_src, *byte_ptr_1dlut_dst; 663 590 struct spl_fixed31_32 sharp_base, sharp_calc, sharp_level; ··· 667 594 uint32_t filter_pregen_store[ISHARP_LUT_TABLE_SIZE]; 668 595 669 596 /* Custom sharpnessX1000 value */ 670 - unsigned int sharpnessX1000 = spl_calculate_sharpness_level(sharpness.sharpness_level, 671 - setup, sharpness.sharpness_range); 597 + unsigned int sharpnessX1000 = spl_calculate_sharpness_level(ratio, 598 + sharpness.sharpness_level, setup, 599 + sharpness.sharpness_range, scale_to_sharpness_policy); 672 600 sharp_level = spl_fixpt_from_fraction(sharpnessX1000, 1000); 673 601 674 602 /* ··· 679 605 if ((filter_isharp_1D_lut_pregen[setup].sharpness_numer == sharpnessX1000) && 680 606 (filter_isharp_1D_lut_pregen[setup].sharpness_denom == 1000)) 681 607 return; 682 - 683 608 684 609 /* 685 610 * Calculate LUT_128_gained with this equation:
+5 -4
drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h
··· 20 20 const uint16_t *spl_get_filter_isharp_wide_6tap_64p(void); 21 21 uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps); 22 22 23 - struct scale_ratio_to_sharpness_level_lookup { 23 + #define NUM_SHARPNESS_ADJ_LEVELS 6 24 + struct scale_ratio_to_sharpness_level_adj { 24 25 unsigned int ratio_numer; 25 26 unsigned int ratio_denom; 26 - unsigned int sharpness_numer; 27 - unsigned int sharpness_denom; 27 + unsigned int level_down_adj; /* adjust sharpness level down */ 28 28 }; 29 29 30 30 struct isharp_1D_lut_pregen { ··· 45 45 void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, 46 46 const struct spl_scaler_data *data); 47 47 48 - void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup, struct adaptive_sharpness sharpness); 48 + void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup, 49 + struct adaptive_sharpness sharpness, enum scale_to_sharpness_policy scale_to_sharpness_policy); 49 50 uint32_t *spl_get_pregen_filter_isharp_1D_lut(enum system_setup setup); 50 51 #endif /* __DC_SPL_ISHARP_FILTERS_H__ */
+14 -1
drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h
··· 487 487 LLS_PREF_YES, 488 488 LLS_PREF_NO 489 489 }; 490 + enum sharpen_policy { 491 + SHARPEN_ALWAYS = 0, 492 + SHARPEN_YUV = 1, 493 + SHARPEN_RGB_FULLSCREEN_YUV = 2, 494 + SHARPEN_FULLSCREEN_ALL = 3 495 + }; 496 + enum scale_to_sharpness_policy { 497 + NO_SCALE_TO_SHARPNESS_ADJ = 0, 498 + SCALE_TO_SHARPNESS_ADJ_YUV = 1, 499 + SCALE_TO_SHARPNESS_ADJ_ALL = 2 500 + }; 490 501 struct spl_funcs { 491 502 void (*spl_calc_lb_num_partitions) 492 503 (bool alpha_en, ··· 510 499 struct spl_debug { 511 500 int visual_confirm_base_offset; 512 501 int visual_confirm_dpp_offset; 502 + enum sharpen_policy sharpen_policy; 503 + enum scale_to_sharpness_policy scale_to_sharpness_policy; 513 504 }; 514 505 515 506 struct spl_in { ··· 531 518 bool is_hdr_on; 532 519 int h_active; 533 520 int v_active; 534 - int hdr_multx100; 521 + int sdr_white_level_nits; 535 522 }; 536 523 // end of SPL inputs 537 524
+1
drivers/gpu/drm/amd/display/dmub/dmub_srv.h
··· 300 300 enum dmub_ips_disable_type disable_ips; 301 301 bool disallow_phy_access; 302 302 bool disable_sldo_opt; 303 + bool enable_non_transparent_setconfig; 303 304 }; 304 305 305 306 /**
+24 -1
drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
··· 682 682 uint32_t gpint_scratch8: 1; /* 1 if GPINT is in scratch8*/ 683 683 uint32_t usb4_cm_version: 1; /**< 1 CM support */ 684 684 uint32_t dpia_hpd_int_enable_supported: 1; /* 1 if dpia hpd int enable supported */ 685 - uint32_t reserved0: 1; 685 + uint32_t enable_non_transparent_setconfig: 1; /* 1 if dpia use conventional dp lt flow*/ 686 686 uint32_t disable_clk_ds: 1; /* 1 if disallow dispclk_ds and dppclk_ds*/ 687 687 uint32_t disable_timeout_recovery : 1; /* 1 if timeout recovery should be disabled */ 688 688 uint32_t ips_pg_disable: 1; /* 1 to disable ONO domains power gating*/ ··· 1308 1308 DMUB_CMD__DPIA_DIG1_DPIA_CONTROL = 0, 1309 1309 DMUB_CMD__DPIA_SET_CONFIG_ACCESS = 1, 1310 1310 DMUB_CMD__DPIA_MST_ALLOC_SLOTS = 2, 1311 + DMUB_CMD__DPIA_SET_TPS_NOTIFICATION = 3, 1311 1312 }; 1312 1313 1313 1314 /* DMUB_OUT_CMD__DPIA_NOTIFICATION command types. */ ··· 2137 2136 struct dmub_rb_cmd_set_mst_alloc_slots { 2138 2137 struct dmub_cmd_header header; /* header */ 2139 2138 struct dmub_cmd_mst_alloc_slots_control_data mst_slots_control; /* mst slots control */ 2139 + }; 2140 + 2141 + /** 2142 + * Data passed from driver to FW in a DMUB_CMD__SET_TPS_NOTIFICATION command. 2143 + */ 2144 + struct dmub_cmd_tps_notification_data { 2145 + uint8_t instance; /* DPIA instance */ 2146 + uint8_t tps; /* requested training pattern */ 2147 + uint8_t reserved1; 2148 + uint8_t reserved2; 2149 + }; 2150 + 2151 + /** 2152 + * DMUB command structure for SET_TPS_NOTIFICATION command. 2153 + */ 2154 + struct dmub_rb_cmd_set_tps_notification { 2155 + struct dmub_cmd_header header; /* header */ 2156 + struct dmub_cmd_tps_notification_data tps_notification; /* set tps_notification data */ 2140 2157 }; 2141 2158 2142 2159 /** ··· 5323 5304 * Definition of a DMUB_CMD__DPIA_MST_ALLOC_SLOTS command. 5324 5305 */ 5325 5306 struct dmub_rb_cmd_set_mst_alloc_slots set_mst_alloc_slots; 5307 + /** 5308 + * Definition of a DMUB_CMD__DPIA_SET_TPS_NOTIFICATION command. 5309 + */ 5310 + struct dmub_rb_cmd_set_tps_notification set_tps_notification; 5326 5311 /** 5327 5312 * Definition of a DMUB_CMD__EDID_CEA command. 5328 5313 */
+1
drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c
··· 425 425 boot_options.bits.ips_disable = params->disable_ips; 426 426 boot_options.bits.ips_sequential_ono = params->ips_sequential_ono; 427 427 boot_options.bits.disable_sldo_opt = params->disable_sldo_opt; 428 + boot_options.bits.enable_non_transparent_setconfig = params->enable_non_transparent_setconfig; 428 429 429 430 REG_WRITE(DMCUB_SCRATCH14, boot_options.all); 430 431 }
+1 -1
drivers/gpu/drm/amd/display/modules/freesync/freesync.c
··· 134 134 135 135 v_total = div64_u64(div64_u64(((unsigned long long)( 136 136 frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), 137 - stream->timing.h_total), 1000000); 137 + stream->timing.h_total) + 500000, 1000000); 138 138 139 139 /* v_total cannot be less than nominal */ 140 140 if (v_total < stream->timing.v_total) {
+1 -1
drivers/gpu/drm/amd/include/amd_shared.h
··· 85 85 * @AMD_IP_BLOCK_TYPE_MES: Micro-Engine Scheduler 86 86 * @AMD_IP_BLOCK_TYPE_JPEG: JPEG Engine 87 87 * @AMD_IP_BLOCK_TYPE_VPE: Video Processing Engine 88 - * @AMD_IP_BLOCK_TYPE_UMSCH_MM: User Mode Schduler for Multimedia 88 + * @AMD_IP_BLOCK_TYPE_UMSCH_MM: User Mode Scheduler for Multimedia 89 89 * @AMD_IP_BLOCK_TYPE_ISP: Image Signal Processor 90 90 * @AMD_IP_BLOCK_TYPE_NUM: Total number of IP block types 91 91 */
+8 -2
drivers/gpu/drm/amd/include/kgd_kfd_interface.h
··· 71 71 KGD_POOL_FRAMEBUFFER = 3, 72 72 }; 73 73 74 + struct kfd_cu_occupancy { 75 + u32 wave_cnt; 76 + u32 doorbell_off; 77 + }; 78 + 74 79 /** 75 80 * enum kfd_sched_policy 76 81 * ··· 318 313 uint32_t grace_period, 319 314 uint32_t *reg_offset, 320 315 uint32_t *reg_data); 321 - void (*get_cu_occupancy)(struct amdgpu_device *adev, int pasid, 322 - int *wave_cnt, int *max_waves_per_cu, uint32_t inst); 316 + void (*get_cu_occupancy)(struct amdgpu_device *adev, 317 + struct kfd_cu_occupancy *cu_occupancy, 318 + int *max_waves_per_cu, uint32_t inst); 323 319 void (*program_trap_handler_settings)(struct amdgpu_device *adev, 324 320 uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, 325 321 uint32_t inst);
+5 -1
drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
··· 123 123 VOLTAGE_GUARDBAND_COUNT 124 124 } GFX_GUARDBAND_e; 125 125 126 - #define SMU_METRICS_TABLE_VERSION 0xC 126 + #define SMU_METRICS_TABLE_VERSION 0xD 127 127 128 128 typedef struct __attribute__((packed, aligned(4))) { 129 129 uint32_t AccumulationCounter; ··· 227 227 // PCIE LINK Speed and width 228 228 uint32_t PCIeLinkSpeed; 229 229 uint32_t PCIeLinkWidth; 230 + 231 + // PER XCD ACTIVITY 232 + uint32_t GfxBusy[8]; 233 + uint64_t GfxBusyAcc[8]; 230 234 } MetricsTableX_t; 231 235 232 236 typedef struct __attribute__((packed, aligned(4))) {
+5 -1
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
··· 2569 2569 } 2570 2570 } 2571 2571 2572 - return smu_cmn_send_smc_msg_with_param(smu, 2572 + ret = smu_cmn_send_smc_msg_with_param(smu, 2573 2573 SMU_MSG_SetWorkloadMask, 2574 2574 workload_mask, 2575 2575 NULL); 2576 + if (!ret) 2577 + smu->workload_mask = workload_mask; 2578 + 2579 + return ret; 2576 2580 } 2577 2581 2578 2582 static bool smu_v13_0_0_is_mode1_reset_supported(struct smu_context *smu)
+6 -2
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
··· 2107 2107 } 2108 2108 mutex_lock(&adev->pm.mutex); 2109 2109 r = smu_v13_0_6_request_i2c_xfer(smu, req); 2110 - if (r) 2111 - goto fail; 2110 + if (r) { 2111 + /* Retry once, in case of an i2c collision */ 2112 + r = smu_v13_0_6_request_i2c_xfer(smu, req); 2113 + if (r) 2114 + goto fail; 2115 + } 2112 2116 2113 2117 for (c = i = 0; i < num_msgs; i++) { 2114 2118 if (!(msg[i].flags & I2C_M_RD)) {
+3
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
··· 2501 2501 return -EINVAL; 2502 2502 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, 2503 2503 1 << workload_type, NULL); 2504 + 2504 2505 if (ret) 2505 2506 dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); 2507 + else 2508 + smu->workload_mask = (1 << workload_type); 2506 2509 2507 2510 return ret; 2508 2511 }
+5 -1
drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
··· 1861 1861 if (workload_type < 0) 1862 1862 return -EINVAL; 1863 1863 1864 - return smu_cmn_send_smc_msg_with_param(smu, 1864 + ret = smu_cmn_send_smc_msg_with_param(smu, 1865 1865 SMU_MSG_SetWorkloadMask, 1866 1866 1 << workload_type, 1867 1867 NULL); 1868 + if (!ret) 1869 + smu->workload_mask = 1 << workload_type; 1870 + 1871 + return ret; 1868 1872 } 1869 1873 1870 1874 static int smu_v14_0_2_baco_enter(struct smu_context *smu)
+1 -1
drivers/gpu/drm/i915/display/intel_ddi.c
··· 916 916 * instead of a specific AUX_IO_<port> reference without powering up any 917 917 * extra wells. 918 918 */ 919 - if (intel_encoder_can_psr(&dig_port->base)) 919 + if (intel_psr_needs_aux_io_power(&dig_port->base, crtc_state)) 920 920 return intel_display_power_aux_io_domain(i915, dig_port->aux_ch); 921 921 else if (DISPLAY_VER(i915) < 14 && 922 922 (intel_crtc_has_dp_encoder(crtc_state) ||
+17 -5
drivers/gpu/drm/i915/display/intel_dp.c
··· 531 531 intel_dp_set_source_rates(struct intel_dp *intel_dp) 532 532 { 533 533 /* The values must be in increasing order */ 534 + static const int bmg_rates[] = { 535 + 162000, 216000, 243000, 270000, 324000, 432000, 540000, 675000, 536 + 810000, 1000000, 1350000, 537 + }; 534 538 static const int mtl_rates[] = { 535 539 162000, 216000, 243000, 270000, 324000, 432000, 540000, 675000, 536 540 810000, 1000000, 2000000, ··· 565 561 intel_dp->source_rates || intel_dp->num_source_rates); 566 562 567 563 if (DISPLAY_VER(dev_priv) >= 14) { 568 - source_rates = mtl_rates; 569 - size = ARRAY_SIZE(mtl_rates); 564 + if (IS_BATTLEMAGE(dev_priv)) { 565 + source_rates = bmg_rates; 566 + size = ARRAY_SIZE(bmg_rates); 567 + } else { 568 + source_rates = mtl_rates; 569 + size = ARRAY_SIZE(mtl_rates); 570 + } 570 571 max_rate = mtl_max_source_rate(intel_dp); 571 572 } else if (DISPLAY_VER(dev_priv) >= 11) { 572 573 source_rates = icl_rates; ··· 4067 4058 drm_dp_is_branch(intel_dp->dpcd)); 4068 4059 intel_init_dpcd_quirks(intel_dp, &intel_dp->desc.ident); 4069 4060 4061 + intel_dp->colorimetry_support = 4062 + intel_dp_get_colorimetry_status(intel_dp); 4063 + 4070 4064 /* 4071 4065 * Read the eDP display control registers. 4072 4066 * ··· 4182 4170 drm_dp_is_branch(intel_dp->dpcd)); 4183 4171 4184 4172 intel_init_dpcd_quirks(intel_dp, &intel_dp->desc.ident); 4173 + 4174 + intel_dp->colorimetry_support = 4175 + intel_dp_get_colorimetry_status(intel_dp); 4185 4176 4186 4177 intel_dp_update_sink_caps(intel_dp); 4187 4178 } ··· 6936 6921 drm_dbg_kms(&dev_priv->drm, 6937 6922 "HDCP init failed, skipping.\n"); 6938 6923 } 6939 - 6940 - intel_dp->colorimetry_support = 6941 - intel_dp_get_colorimetry_status(intel_dp); 6942 6924 6943 6925 intel_dp->frl.is_trained = false; 6944 6926 intel_dp->frl.trained_rate_gbps = 0;
+21 -11
drivers/gpu/drm/i915/display/intel_psr.c
··· 203 203 return false; 204 204 } 205 205 206 + bool intel_psr_needs_aux_io_power(struct intel_encoder *encoder, 207 + const struct intel_crtc_state *crtc_state) 208 + { 209 + /* 210 + * For PSR/PR modes only eDP requires the AUX IO power to be enabled whenever 211 + * the output is enabled. For non-eDP outputs the main link is always 212 + * on, hence it doesn't require the HW initiated AUX wake-up signaling used 213 + * for eDP. 214 + * 215 + * TODO: 216 + * - Consider leaving AUX IO disabled for eDP / PR as well, in case 217 + * the ALPM with main-link off mode is not enabled. 218 + * - Leave AUX IO enabled for DP / PR, once support for ALPM with 219 + * main-link off mode is added for it and this mode gets enabled. 220 + */ 221 + return intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && 222 + intel_encoder_can_psr(encoder); 223 + } 224 + 206 225 static bool psr_global_enabled(struct intel_dp *intel_dp) 207 226 { 208 227 struct intel_display *display = to_intel_display(intel_dp); ··· 2803 2784 EDP_PSR_STATUS_STATE_MASK, 50); 2804 2785 } 2805 2786 2806 - static int _panel_replay_ready_for_pipe_update_locked(struct intel_dp *intel_dp) 2807 - { 2808 - return intel_dp_is_edp(intel_dp) ? 2809 - _psr2_ready_for_pipe_update_locked(intel_dp) : 2810 - _psr1_ready_for_pipe_update_locked(intel_dp); 2811 - } 2812 - 2813 2787 /** 2814 2788 * intel_psr_wait_for_idle_locked - wait for PSR be ready for a pipe update 2815 2789 * @new_crtc_state: new CRTC state ··· 2825 2813 2826 2814 lockdep_assert_held(&intel_dp->psr.lock); 2827 2815 2828 - if (!intel_dp->psr.enabled) 2816 + if (!intel_dp->psr.enabled || intel_dp->psr.panel_replay_enabled) 2829 2817 continue; 2830 2818 2831 - if (intel_dp->psr.panel_replay_enabled) 2832 - ret = _panel_replay_ready_for_pipe_update_locked(intel_dp); 2833 - else if (intel_dp->psr.sel_update_enabled) 2819 + if (intel_dp->psr.sel_update_enabled) 2834 2820 ret = _psr2_ready_for_pipe_update_locked(intel_dp); 2835 2821 else 2836 2822 ret = _psr1_ready_for_pipe_update_locked(intel_dp);
+2
drivers/gpu/drm/i915/display/intel_psr.h
··· 25 25 (intel_dp)->psr.source_panel_replay_support) 26 26 27 27 bool intel_encoder_can_psr(struct intel_encoder *encoder); 28 + bool intel_psr_needs_aux_io_power(struct intel_encoder *encoder, 29 + const struct intel_crtc_state *crtc_state); 28 30 void intel_psr_init_dpcd(struct intel_dp *intel_dp); 29 31 void intel_psr_enable_sink(struct intel_dp *intel_dp, 30 32 const struct intel_crtc_state *crtc_state);
+2 -1
drivers/gpu/drm/xe/xe_bb.c
··· 65 65 { 66 66 u32 size = drm_suballoc_size(bb->bo); 67 67 68 - bb->cs[bb->len++] = MI_BATCH_BUFFER_END; 68 + if (bb->len == 0 || bb->cs[bb->len - 1] != MI_BATCH_BUFFER_END) 69 + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; 69 70 70 71 xe_gt_assert(q->gt, bb->len * 4 + bb_prefetch(q->gt) <= size); 71 72
+14
drivers/gpu/drm/xe/xe_bo.c
··· 2320 2320 drm_gem_object_free(&bo->ttm.base.refcount); 2321 2321 } 2322 2322 2323 + void xe_bo_put(struct xe_bo *bo) 2324 + { 2325 + might_sleep(); 2326 + if (bo) { 2327 + #ifdef CONFIG_PROC_FS 2328 + if (bo->client) 2329 + might_lock(&bo->client->bos_lock); 2330 + #endif 2331 + if (bo->ggtt_node && bo->ggtt_node->ggtt) 2332 + might_lock(&bo->ggtt_node->ggtt->lock); 2333 + drm_gem_object_put(&bo->ttm.base); 2334 + } 2335 + } 2336 + 2323 2337 /** 2324 2338 * xe_bo_dumb_create - Create a dumb bo as backing for a fb 2325 2339 * @file_priv: ...
+1 -5
drivers/gpu/drm/xe/xe_bo.h
··· 126 126 return bo; 127 127 } 128 128 129 - static inline void xe_bo_put(struct xe_bo *bo) 130 - { 131 - if (bo) 132 - drm_gem_object_put(&bo->ttm.base); 133 - } 129 + void xe_bo_put(struct xe_bo *bo); 134 130 135 131 static inline void __xe_bo_unset_bulk_move(struct xe_bo *bo) 136 132 {
+1 -6
drivers/gpu/drm/xe/xe_drm_client.c
··· 168 168 struct drm_memory_stats stats[TTM_NUM_MEM_TYPES]) 169 169 { 170 170 u64 sz = bo->size; 171 - u32 mem_type; 171 + u32 mem_type = bo->ttm.resource->mem_type; 172 172 173 173 xe_bo_assert_held(bo); 174 - 175 - if (bo->placement.placement) 176 - mem_type = bo->placement.placement->mem_type; 177 - else 178 - mem_type = XE_PL_TT; 179 174 180 175 if (drm_gem_object_is_shared_for_memory_stats(&bo->ttm.base)) 181 176 stats[mem_type].shared += sz;
+6
drivers/gpu/drm/xe/xe_gt_pagefault.c
··· 212 212 * TODO: Change to read lock? Using write lock for simplicity. 213 213 */ 214 214 down_write(&vm->lock); 215 + 216 + if (xe_vm_is_closed(vm)) { 217 + err = -ENOENT; 218 + goto unlock_vm; 219 + } 220 + 215 221 vma = lookup_vma(vm, pf->page_addr); 216 222 if (!vma) { 217 223 err = -EINVAL;
+4 -2
drivers/gpu/drm/xe/xe_guc.h
··· 18 18 */ 19 19 #define MAKE_GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat)) 20 20 #define MAKE_GUC_VER_STRUCT(ver) MAKE_GUC_VER((ver).major, (ver).minor, (ver).patch) 21 - #define GUC_SUBMIT_VER(guc) MAKE_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]) 22 - #define GUC_FIRMWARE_VER(guc) MAKE_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_RELEASE]) 21 + #define GUC_SUBMIT_VER(guc) \ 22 + MAKE_GUC_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]) 23 + #define GUC_FIRMWARE_VER(guc) \ 24 + MAKE_GUC_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_RELEASE]) 23 25 24 26 struct drm_printer; 25 27
+1
drivers/gpu/drm/xe/xe_vram.c
··· 182 182 offset = offset_hi << 32; /* HW view bits 39:32 */ 183 183 offset |= offset_lo << 6; /* HW view bits 31:6 */ 184 184 offset *= num_enabled; /* convert to SW view */ 185 + offset = round_up(offset, SZ_128K); /* SW must round up to nearest 128K */ 185 186 186 187 /* We don't expect any holes */ 187 188 xe_assert_msg(xe, offset == (xe_mmio_read64_2x32(gt, GSMBASE) - ccs_size),