Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'amd-drm-next-6.14-2025-01-10' of https://gitlab.freedesktop.org/agd5f/linux into drm-next

amd-drm-next-6.14-2025-01-10:

amdgpu:
- Fix max surface handling in DC
- clang fixes
- DCN 3.5 fixes
- DCN 4.0.1 fixes
- DC CRC fixes
- DML updates
- DSC fixes
- PSR fixes
- DC add some divide by 0 checks
- SMU13 updates
- SR-IOV fixes
- RAS fixes
- Cleaner shader support for gfx10.3 dGPUs
- fix drm buddy trim handling
- SDMA engine reset updates
_ Fix RB bitmap setup
- Fix doorbell ttm cleanup
- Add CEC notifier support
- DPIA updates
- MST fixes

amdkfd:
- Shader debugger fixes
- Trap handler cleanup
- Cleanup includes
- Eviction fence wq fix

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250110172731.2960668-1-alexander.deucher@amd.com

+4852 -2026
+6
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
··· 1131 1131 uint32_t low, high; 1132 1132 uint64_t queue_addr = 0; 1133 1133 1134 + if (!amdgpu_gpu_recovery) 1135 + return 0; 1136 + 1134 1137 kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); 1135 1138 amdgpu_gfx_rlc_enter_safe_mode(adev, inst); 1136 1139 ··· 1181 1178 { 1182 1179 uint32_t low, high, pipe_reset_data = 0; 1183 1180 uint64_t queue_addr = 0; 1181 + 1182 + if (!amdgpu_gpu_recovery) 1183 + return 0; 1184 1184 1185 1185 kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); 1186 1186 amdgpu_gfx_rlc_enter_safe_mode(adev, inst);
+2 -4
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
··· 88 88 { 89 89 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj); 90 90 91 - if (aobj) { 92 - amdgpu_hmm_unregister(aobj); 93 - ttm_bo_put(&aobj->tbo); 94 - } 91 + amdgpu_hmm_unregister(aobj); 92 + ttm_bo_put(&aobj->tbo); 95 93 } 96 94 97 95 int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
+11 -6
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
··· 3020 3020 struct amdgpu_device *adev = ip_block->adev; 3021 3021 3022 3022 mutex_lock(&adev->firmware.mutex); 3023 - /* 3024 - * This sequence is just used on hw_init only once, no need on 3025 - * resume. 3026 - */ 3023 + 3027 3024 ret = amdgpu_ucode_init_bo(adev); 3028 3025 if (ret) 3029 3026 goto failed; ··· 3144 3147 } 3145 3148 3146 3149 mutex_lock(&adev->firmware.mutex); 3150 + 3151 + ret = amdgpu_ucode_init_bo(adev); 3152 + if (ret) 3153 + goto failed; 3147 3154 3148 3155 ret = psp_hw_start(psp); 3149 3156 if (ret) ··· 3892 3891 { 3893 3892 struct drm_device *ddev = dev_get_drvdata(dev); 3894 3893 struct amdgpu_device *adev = drm_to_adev(ddev); 3894 + struct amdgpu_ip_block *ip_block; 3895 3895 uint32_t fw_ver; 3896 3896 int ret; 3897 3897 3898 - if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) { 3898 + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP); 3899 + if (!ip_block || !ip_block->status.late_initialized) { 3899 3900 dev_info(adev->dev, "PSP block is not ready yet\n."); 3900 3901 return -EBUSY; 3901 3902 } ··· 3926 3923 struct amdgpu_bo *fw_buf_bo = NULL; 3927 3924 uint64_t fw_pri_mc_addr; 3928 3925 void *fw_pri_cpu_addr; 3926 + struct amdgpu_ip_block *ip_block; 3929 3927 3930 - if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) { 3928 + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP); 3929 + if (!ip_block || !ip_block->status.late_initialized) { 3931 3930 dev_err(adev->dev, "PSP block is not ready yet."); 3932 3931 return -EBUSY; 3933 3932 }
+16 -5
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
··· 2832 2832 2833 2833 mutex_lock(&con->recovery_lock); 2834 2834 data = con->eh_data; 2835 - if (!data) 2835 + if (!data) { 2836 + /* Returning 0 as the absence of eh_data is acceptable */ 2836 2837 goto free; 2838 + } 2837 2839 2838 2840 for (i = 0; i < pages; i++) { 2839 2841 if (from_rom && ··· 2847 2845 * one row 2848 2846 */ 2849 2847 if (amdgpu_umc_pages_in_a_row(adev, &err_data, 2850 - bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) 2848 + bps[i].retired_page << 2849 + AMDGPU_GPU_PAGE_SHIFT)) { 2850 + ret = -EINVAL; 2851 2851 goto free; 2852 - else 2852 + } else { 2853 2853 find_pages_per_pa = true; 2854 + } 2854 2855 } else { 2855 2856 /* unsupported cases */ 2857 + ret = -EOPNOTSUPP; 2856 2858 goto free; 2857 2859 } 2858 2860 } 2859 2861 } else { 2860 2862 if (amdgpu_umc_pages_in_a_row(adev, &err_data, 2861 - bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) 2863 + bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) { 2864 + ret = -EINVAL; 2862 2865 goto free; 2866 + } 2863 2867 } 2864 2868 } else { 2865 2869 if (from_rom && !find_pages_per_pa) { 2866 2870 if (bps[i].retired_page & UMC_CHANNEL_IDX_V2) { 2867 2871 /* bad page in any NPS mode in eeprom */ 2868 - if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i], &err_data)) 2872 + if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i], &err_data)) { 2873 + ret = -EINVAL; 2869 2874 goto free; 2875 + } 2870 2876 } else { 2871 2877 /* legacy bad page in eeprom, generated only in 2872 2878 * NPS1 mode ··· 2891 2881 /* non-nps1 mode, old RAS TA 2892 2882 * can't support it 2893 2883 */ 2884 + ret = -EOPNOTSUPP; 2894 2885 goto free; 2895 2886 } 2896 2887 }
+2 -2
drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
··· 362 362 if (!adev) 363 363 return -ENODEV; 364 364 365 - mask = (1 << adev->sdma.num_instances) - 1; 365 + mask = BIT_ULL(adev->sdma.num_instances) - 1; 366 366 if ((val & mask) == 0) 367 367 return -EINVAL; 368 368 369 369 for (i = 0; i < adev->sdma.num_instances; ++i) { 370 370 ring = &adev->sdma.instance[i].ring; 371 - if (val & (1 << i)) 371 + if (val & BIT_ULL(i)) 372 372 ring->sched.ready = true; 373 373 else 374 374 ring->sched.ready = false;
+1
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
··· 2066 2066 ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS); 2067 2067 ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS); 2068 2068 ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA); 2069 + ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DOORBELL); 2069 2070 ttm_device_fini(&adev->mman.bdev); 2070 2071 adev->mman.initialized = false; 2071 2072 DRM_INFO("amdgpu: ttm finalized\n");
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
··· 567 567 else 568 568 remaining_size -= size; 569 569 } 570 - mutex_unlock(&mgr->lock); 571 570 572 571 if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) { 573 572 struct drm_buddy_block *dcc_block; ··· 583 584 (u64)vres->base.size, 584 585 &vres->blocks); 585 586 } 587 + mutex_unlock(&mgr->lock); 586 588 587 589 vres->base.start = 0; 588 590 size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),
+18
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
··· 45 45 #include "clearstate_gfx10.h" 46 46 #include "v10_structs.h" 47 47 #include "gfx_v10_0.h" 48 + #include "gfx_v10_0_cleaner_shader.h" 48 49 #include "nbio_v2_3.h" 49 50 50 51 /* ··· 4739 4738 break; 4740 4739 } 4741 4740 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 4741 + case IP_VERSION(10, 3, 0): 4742 + case IP_VERSION(10, 3, 2): 4743 + case IP_VERSION(10, 3, 4): 4744 + case IP_VERSION(10, 3, 5): 4745 + adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex; 4746 + adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex); 4747 + if (adev->gfx.me_fw_version >= 64 && 4748 + adev->gfx.pfp_fw_version >= 100 && 4749 + adev->gfx.mec_fw_version >= 122) { 4750 + adev->gfx.enable_cleaner_shader = true; 4751 + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 4752 + if (r) { 4753 + adev->gfx.enable_cleaner_shader = false; 4754 + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 4755 + } 4756 + } 4757 + break; 4742 4758 default: 4743 4759 adev->gfx.enable_cleaner_shader = false; 4744 4760 break;
+56
drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright 2025 Advanced Micro Devices, Inc. 4 + * 5 + * Permission is hereby granted, free of charge, to any person obtaining a 6 + * copy of this software and associated documentation files (the "Software"), 7 + * to deal in the Software without restriction, including without limitation 8 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 + * and/or sell copies of the Software, and to permit persons to whom the 10 + * Software is furnished to do so, subject to the following conditions: 11 + * 12 + * The above copyright notice and this permission notice shall be included in 13 + * all copies or substantial portions of the Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 + * OTHER DEALINGS IN THE SOFTWARE. 22 + */ 23 + 24 + /* Define the cleaner shader gfx_10_3_0 */ 25 + static const u32 gfx_10_3_0_cleaner_shader_hex[] = { 26 + 0xb0804004, 0xbf8a0000, 27 + 0xbe8203b8, 0xbefc0380, 28 + 0x7e008480, 0x7e028480, 29 + 0x7e048480, 0x7e068480, 30 + 0x7e088480, 0x7e0a8480, 31 + 0x7e0c8480, 0x7e0e8480, 32 + 0xbefc0302, 0x80828802, 33 + 0xbf84fff5, 0xbe8203ff, 34 + 0x80000000, 0x87020002, 35 + 0xbf840012, 0xbefe03c1, 36 + 0xbeff03c1, 0xd7650001, 37 + 0x0001007f, 0xd7660001, 38 + 0x0002027e, 0x16020288, 39 + 0xbe8203bf, 0xbefc03c1, 40 + 0xd9382000, 0x00020201, 41 + 0xd9386040, 0x00040401, 42 + 0xd70f6a01, 0x000202ff, 43 + 0x00000400, 0x80828102, 44 + 0xbf84fff7, 0xbefc03ff, 45 + 0x00000068, 0xbe803080, 46 + 0xbe813080, 0xbe823080, 47 + 0xbe833080, 0x80fc847c, 48 + 0xbf84fffa, 0xbeea0480, 49 + 0xbeec0480, 0xbeee0480, 50 + 0xbef00480, 0xbef20480, 51 + 0xbef40480, 0xbef60480, 52 + 0xbef80480, 0xbefa0480, 53 + 0xbf810000, 0xbf9f0000, 54 + 0xbf9f0000, 0xbf9f0000, 55 + 0xbf9f0000, 0xbf9f0000, 56 + };
+124
drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm
··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright 2025 Advanced Micro Devices, Inc. 4 + * 5 + * Permission is hereby granted, free of charge, to any person obtaining a 6 + * copy of this software and associated documentation files (the "Software"), 7 + * to deal in the Software without restriction, including without limitation 8 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 + * and/or sell copies of the Software, and to permit persons to whom the 10 + * Software is furnished to do so, subject to the following conditions: 11 + * 12 + * The above copyright notice and this permission notice shall be included in 13 + * all copies or substantial portions of the Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 + * OTHER DEALINGS IN THE SOFTWARE. 22 + */ 23 + 24 + // This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader. 25 + //To turn this shader program on for complitaion change this to main and lower shader main to main_1 26 + 27 + // GFX10.3 : Clear SGPRs, VGPRs and LDS 28 + // Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot 29 + // Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD 30 + // Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS) 31 + // It takes 2 workgroups to use all of LDS: one on each CU of the WGP 32 + // Each wave clears SGPRs 0 - 107 33 + // Each wave clears VGPRs 0 - 63 34 + // The first wave of the workgroup clears its 64KB of LDS 35 + // The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup 36 + // before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared. 37 + 38 + 39 + shader main 40 + asic(GFX10) 41 + type(CS) 42 + wave_size(32) 43 + // Note: original source code from SQ team 44 + 45 + // 46 + // Create 32 waves in a threadgroup (CS waves) 47 + // Each allocates 64 VGPRs 48 + // The workgroup allocates all of LDS (64kbytes) 49 + // 50 + // Takes about 2500 clocks to run. 51 + // (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks) 52 + // 53 + S_BARRIER 54 + s_mov_b32 s2, 0x00000038 // Loop 64/8=8 times (loop unrolled for performance) 55 + s_mov_b32 m0, 0 56 + // 57 + // CLEAR VGPRs 58 + // 59 + label_0005: 60 + v_movreld_b32 v0, 0 61 + v_movreld_b32 v1, 0 62 + v_movreld_b32 v2, 0 63 + v_movreld_b32 v3, 0 64 + v_movreld_b32 v4, 0 65 + v_movreld_b32 v5, 0 66 + v_movreld_b32 v6, 0 67 + v_movreld_b32 v7, 0 68 + s_mov_b32 m0, s2 69 + s_sub_u32 s2, s2, 8 70 + s_cbranch_scc0 label_0005 71 + // 72 + s_mov_b32 s2, 0x80000000 // Bit31 is first_wave 73 + s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set 74 + s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup 75 + // CLEAR LDS 76 + // 77 + s_mov_b32 exec_lo, 0xffffffff 78 + s_mov_b32 exec_hi, 0xffffffff 79 + v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63) 80 + v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63) 81 + v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte) 82 + s_mov_b32 s2, 0x00000003f // 64 loop iterations 83 + s_mov_b32 m0, 0xffffffff 84 + // Clear all of LDS space 85 + // Each FirstWave of WorkGroup clears 64kbyte block 86 + 87 + label_001F: 88 + ds_write2_b64 v1, v[2:3], v[2:3] offset1:32 89 + ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96 90 + v_add_co_u32 v1, vcc, 0x00000400, v1 91 + s_sub_u32 s2, s2, 1 92 + s_cbranch_scc0 label_001F 93 + 94 + // 95 + // CLEAR SGPRs 96 + // 97 + label_0023: 98 + s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance) 99 + label_sgpr_loop: 100 + s_movreld_b32 s0, 0 101 + s_movreld_b32 s1, 0 102 + s_movreld_b32 s2, 0 103 + s_movreld_b32 s3, 0 104 + s_sub_u32 m0, m0, 4 105 + s_cbranch_scc0 label_sgpr_loop 106 + 107 + //clear vcc 108 + s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR 109 + s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR 110 + s_mov_b64 vcc, 0 //clear vcc 111 + s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1 112 + s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3 113 + s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5 114 + s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7 115 + s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9 116 + s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11 117 + s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13 118 + s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15 119 + 120 + s_endpgm 121 + 122 + end 123 + 124 +
+4 -1
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
··· 1891 1891 1892 1892 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) 1893 1893 { 1894 + u32 rb_bitmap_per_sa; 1894 1895 u32 rb_bitmap_width_per_sa; 1895 1896 u32 max_sa; 1896 1897 u32 active_sa_bitmap; ··· 1909 1908 adev->gfx.config.max_sh_per_se; 1910 1909 rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / 1911 1910 adev->gfx.config.max_sh_per_se; 1911 + rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa); 1912 + 1912 1913 for (i = 0; i < max_sa; i++) { 1913 1914 if (active_sa_bitmap & (1 << i)) 1914 - active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); 1915 + active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa)); 1915 1916 } 1916 1917 1917 1918 active_rb_bitmap &= global_active_rb_bitmap;
+14 -3
drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
··· 1442 1442 } 1443 1443 } 1444 1444 1445 - /* TODO: Add queue reset mask when FW fully supports it */ 1446 1445 adev->gfx.gfx_supported_reset = 1447 1446 amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); 1448 1447 adev->gfx.compute_supported_reset = 1449 1448 amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); 1449 + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1450 + case IP_VERSION(12, 0, 0): 1451 + case IP_VERSION(12, 0, 1): 1452 + if ((adev->gfx.me_fw_version >= 2660) && 1453 + (adev->gfx.mec_fw_version >= 2920)) { 1454 + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1455 + adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1456 + } 1457 + } 1450 1458 1451 1459 if (!adev->enable_mes_kiq) { 1452 1460 r = amdgpu_gfx_kiq_init(adev, GFX12_MEC_HPD_SIZE, 0); ··· 1623 1615 1624 1616 static void gfx_v12_0_setup_rb(struct amdgpu_device *adev) 1625 1617 { 1618 + u32 rb_bitmap_per_sa; 1626 1619 u32 rb_bitmap_width_per_sa; 1627 1620 u32 max_sa; 1628 1621 u32 active_sa_bitmap; ··· 1641 1632 adev->gfx.config.max_sh_per_se; 1642 1633 rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / 1643 1634 adev->gfx.config.max_sh_per_se; 1635 + rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa); 1636 + 1644 1637 for (i = 0; i < max_sa; i++) { 1645 1638 if (active_sa_bitmap & (1 << i)) 1646 - active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); 1639 + active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa)); 1647 1640 } 1648 1641 1649 - active_rb_bitmap |= global_active_rb_bitmap; 1642 + active_rb_bitmap &= global_active_rb_bitmap; 1650 1643 adev->gfx.config.backend_enable_mask = active_rb_bitmap; 1651 1644 adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); 1652 1645 }
+4 -8
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
··· 5639 5639 { 5640 5640 uint32_t temp, data; 5641 5641 5642 - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5643 - 5644 5642 /* It is disabled by HW by default */ 5645 5643 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 5646 5644 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { ··· 5732 5734 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5733 5735 gfx_v8_0_wait_for_rlc_serdes(adev); 5734 5736 } 5735 - 5736 - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5737 5737 } 5738 5738 5739 5739 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, ··· 5740 5744 uint32_t temp, temp1, data, data1; 5741 5745 5742 5746 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5743 - 5744 - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5745 5747 5746 5748 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5747 5749 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); ··· 5821 5827 } 5822 5828 5823 5829 gfx_v8_0_wait_for_rlc_serdes(adev); 5824 - 5825 - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5826 5830 } 5827 5831 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5828 5832 bool enable) 5829 5833 { 5834 + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5835 + 5830 5836 if (enable) { 5831 5837 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 5832 5838 * === MGCG + MGLS + TS(CG/LS) === ··· 5840 5846 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5841 5847 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5842 5848 } 5849 + 5850 + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5843 5851 return 0; 5844 5852 } 5845 5853
+2 -12
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
··· 4964 4964 { 4965 4965 uint32_t data, def; 4966 4966 4967 - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 4968 - 4969 4967 /* It is disabled by HW by default */ 4970 4968 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4971 4969 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ ··· 5028 5030 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 5029 5031 } 5030 5032 } 5031 - 5032 - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5033 5033 } 5034 5034 5035 5035 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, ··· 5037 5041 5038 5042 if (!adev->gfx.num_gfx_rings) 5039 5043 return; 5040 - 5041 - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5042 5044 5043 5045 /* Enable 3D CGCG/CGLS */ 5044 5046 if (enable) { ··· 5079 5085 if (def != data) 5080 5086 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 5081 5087 } 5082 - 5083 - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5084 5088 } 5085 5089 5086 5090 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5087 5091 bool enable) 5088 5092 { 5089 5093 uint32_t def, data; 5090 - 5091 - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5092 5094 5093 5095 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5094 5096 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); ··· 5127 5137 if (def != data) 5128 5138 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5129 5139 } 5130 - 5131 - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5132 5140 } 5133 5141 5134 5142 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5135 5143 bool enable) 5136 5144 { 5145 + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5137 5146 if (enable) { 5138 5147 /* CGCG/CGLS should be enabled after MGCG/MGLS 5139 5148 * === MGCG + MGLS === ··· 5152 5163 /* === MGCG + MGLS === */ 5153 5164 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5154 5165 } 5166 + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5155 5167 return 0; 5156 5168 } 5157 5169
+7 -2
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
··· 579 579 { 580 580 int err; 581 581 582 - if (amdgpu_sriov_vf(adev)) 582 + if (amdgpu_sriov_vf(adev)) { 583 583 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 584 584 AMDGPU_UCODE_REQUIRED, 585 585 "amdgpu/%s_sjt_mec.bin", chip_name); 586 - else 586 + 587 + if (err) 588 + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 589 + AMDGPU_UCODE_REQUIRED, 590 + "amdgpu/%s_mec.bin", chip_name); 591 + } else 587 592 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 588 593 AMDGPU_UCODE_REQUIRED, 589 594 "amdgpu/%s_mec.bin", chip_name);
+1 -1
drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
··· 1602 1602 int i, r; 1603 1603 u32 inst_mask; 1604 1604 1605 - if ((adev->flags & AMD_IS_APU) || amdgpu_sriov_vf(adev)) 1605 + if (amdgpu_sriov_vf(adev)) 1606 1606 return -EINVAL; 1607 1607 1608 1608 /* stop queue */
+1 -201
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
··· 34 34 * cpp -DASIC_FAMILY=CHIP_PLUM_BONITO cwsr_trap_handler_gfx10.asm -P -o gfx11.sp3 35 35 * sp3 gfx11.sp3 -hex gfx11.hex 36 36 * 37 - * gfx12: 38 - * cpp -DASIC_FAMILY=CHIP_GFX12 cwsr_trap_handler_gfx10.asm -P -o gfx12.sp3 39 - * sp3 gfx12.sp3 -hex gfx12.hex 40 37 */ 41 38 42 39 #define CHIP_NAVI10 26 43 40 #define CHIP_SIENNA_CICHLID 30 44 41 #define CHIP_PLUM_BONITO 36 45 - #define CHIP_GFX12 37 46 42 47 43 #define NO_SQC_STORE (ASIC_FAMILY >= CHIP_SIENNA_CICHLID) 48 44 #define HAVE_XNACK (ASIC_FAMILY < CHIP_SIENNA_CICHLID) 49 45 #define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO) 50 46 #define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO) 51 - #define SW_SA_TRAP (ASIC_FAMILY >= CHIP_PLUM_BONITO && ASIC_FAMILY < CHIP_GFX12) 47 + #define SW_SA_TRAP (ASIC_FAMILY == CHIP_PLUM_BONITO) 52 48 #define SAVE_AFTER_XNACK_ERROR (HAVE_XNACK && !NO_SQC_STORE) // workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger 53 49 #define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised 54 50 55 - #if ASIC_FAMILY < CHIP_GFX12 56 51 #define S_COHERENCE glc:1 57 52 #define V_COHERENCE slc:1 glc:1 58 53 #define S_WAITCNT_0 s_waitcnt 0 59 - #else 60 - #define S_COHERENCE scope:SCOPE_SYS 61 - #define V_COHERENCE scope:SCOPE_SYS 62 - #define S_WAITCNT_0 s_wait_idle 63 54 64 - #define HW_REG_SHADER_FLAT_SCRATCH_LO HW_REG_WAVE_SCRATCH_BASE_LO 65 - #define HW_REG_SHADER_FLAT_SCRATCH_HI HW_REG_WAVE_SCRATCH_BASE_HI 66 - #define HW_REG_GPR_ALLOC HW_REG_WAVE_GPR_ALLOC 67 - #define HW_REG_LDS_ALLOC HW_REG_WAVE_LDS_ALLOC 68 - #define HW_REG_MODE HW_REG_WAVE_MODE 69 - #endif 70 - 71 - #if ASIC_FAMILY < CHIP_GFX12 72 55 var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 73 56 var SQ_WAVE_STATUS_HALT_MASK = 0x2000 74 57 var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000 ··· 64 81 var S_STATUS_HALT_MASK = SQ_WAVE_STATUS_HALT_MASK 65 82 var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000 66 83 var S_SAVE_PC_HI_HT_MASK = 0x01000000 67 - #else 68 - var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4 69 - var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9 70 - var SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK = 0xC00 71 - var SQ_WAVE_STATE_PRIV_HALT_MASK = 0x4000 72 - var SQ_WAVE_STATE_PRIV_POISON_ERR_MASK = 0x8000 73 - var SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT = 15 74 - var SQ_WAVE_STATUS_WAVE64_SHIFT = 29 75 - var SQ_WAVE_STATUS_WAVE64_SIZE = 1 76 - var SQ_WAVE_LDS_ALLOC_GRANULARITY = 9 77 - var S_STATUS_HWREG = HW_REG_WAVE_STATE_PRIV 78 - var S_STATUS_ALWAYS_CLEAR_MASK = SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK|SQ_WAVE_STATE_PRIV_POISON_ERR_MASK 79 - var S_STATUS_HALT_MASK = SQ_WAVE_STATE_PRIV_HALT_MASK 80 - var S_SAVE_PC_HI_TRAP_ID_MASK = 0xF0000000 81 - #endif 82 84 83 85 var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24 84 86 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 ··· 78 110 var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12 79 111 #endif 80 112 81 - #if ASIC_FAMILY < CHIP_GFX12 82 113 var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 83 114 var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF 84 115 var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 ··· 128 161 var S_TRAPSTS_HWREG = HW_REG_TRAPSTS 129 162 var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_TRAPSTS_SAVECTX_MASK 130 163 var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_TRAPSTS_SAVECTX_SHIFT 131 - #else 132 - var SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK = 0xF 133 - var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK = 0x10 134 - var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT = 5 135 - var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK = 0x20 136 - var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK = 0x40 137 - var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6 138 - var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK = 0x80 139 - var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT = 7 140 - var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK = 0x100 141 - var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT = 8 142 - var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK = 0x200 143 - var SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK = 0x800 144 - var SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK = 0x80 145 - var SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK = 0x200 146 - 147 - var S_TRAPSTS_HWREG = HW_REG_WAVE_EXCP_FLAG_PRIV 148 - var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK 149 - var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT 150 - var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK |\ 151 - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK |\ 152 - SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK |\ 153 - SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK |\ 154 - SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK |\ 155 - SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK 156 - var S_TRAPSTS_RESTORE_PART_1_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT 157 - var S_TRAPSTS_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT 158 - var S_TRAPSTS_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT 159 - var S_TRAPSTS_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT 160 - var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT 161 - var BARRIER_STATE_SIGNAL_OFFSET = 16 162 - var BARRIER_STATE_VALID_OFFSET = 0 163 - #endif 164 164 165 165 // bits [31:24] unused by SPI debug data 166 166 var TTMP11_SAVE_REPLAY_W64H_SHIFT = 31 ··· 239 305 240 306 L_HALTED: 241 307 // Host trap may occur while wave is halted. 242 - #if ASIC_FAMILY < CHIP_GFX12 243 308 s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK 244 - #else 245 - s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK 246 - #endif 247 309 s_cbranch_scc1 L_FETCH_2ND_TRAP 248 310 249 311 L_CHECK_SAVE: ··· 266 336 // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi. 267 337 // Maskable exceptions only cause the wave to enter the trap handler if 268 338 // their respective bit in mode.excp_en is set. 269 - #if ASIC_FAMILY < CHIP_GFX12 270 339 s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK 271 340 s_cbranch_scc0 L_CHECK_TRAP_ID 272 341 ··· 278 349 s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT 279 350 s_and_b32 ttmp2, ttmp2, ttmp3 280 351 s_cbranch_scc1 L_FETCH_2ND_TRAP 281 - #else 282 - s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) 283 - s_and_b32 ttmp3, s_save_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK 284 - s_cbranch_scc0 L_NOT_ADDR_WATCH 285 - s_or_b32 ttmp2, ttmp2, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK 286 - 287 - L_NOT_ADDR_WATCH: 288 - s_getreg_b32 ttmp3, hwreg(HW_REG_WAVE_TRAP_CTRL) 289 - s_and_b32 ttmp2, ttmp3, ttmp2 290 - s_cbranch_scc1 L_FETCH_2ND_TRAP 291 - #endif 292 352 293 353 L_CHECK_TRAP_ID: 294 354 // Check trap_id != 0 ··· 287 369 #if SINGLE_STEP_MISSED_WORKAROUND 288 370 // Prioritize single step exception over context save. 289 371 // Second-level trap will halt wave and RFE, re-entering for SAVECTX. 290 - #if ASIC_FAMILY < CHIP_GFX12 291 372 s_getreg_b32 ttmp2, hwreg(HW_REG_MODE) 292 373 s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK 293 - #else 294 - // WAVE_TRAP_CTRL is already in ttmp3. 295 - s_and_b32 ttmp3, ttmp3, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK 296 - #endif 297 374 s_cbranch_scc1 L_FETCH_2ND_TRAP 298 375 #endif 299 376 ··· 338 425 s_cbranch_scc1 L_TRAP_CASE 339 426 340 427 // Host trap will not cause trap re-entry. 341 - #if ASIC_FAMILY < CHIP_GFX12 342 428 s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK 343 - #else 344 - s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) 345 - s_and_b32 ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK 346 - #endif 347 429 s_cbranch_scc1 L_EXIT_TRAP 348 430 s_or_b32 s_save_status, s_save_status, S_STATUS_HALT_MASK 349 431 ··· 365 457 s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 366 458 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 367 459 368 - #if ASIC_FAMILY < CHIP_GFX12 369 460 s_setreg_b32 hwreg(S_STATUS_HWREG), s_save_status 370 - #else 371 - // STATE_PRIV.BARRIER_COMPLETE may have changed since we read it. 372 - // Only restore fields which the trap handler changes. 373 - s_lshr_b32 s_save_status, s_save_status, SQ_WAVE_STATE_PRIV_SCC_SHIFT 374 - s_setreg_b32 hwreg(S_STATUS_HWREG, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \ 375 - SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_status 376 - #endif 377 - 378 461 s_rfe_b64 [ttmp0, ttmp1] 379 462 380 463 L_SAVE: ··· 377 478 s_endpgm 378 479 L_HAVE_VGPRS: 379 480 #endif 380 - #if ASIC_FAMILY >= CHIP_GFX12 381 - s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS) 382 - s_bitcmp1_b32 s_save_tmp, SQ_WAVE_STATUS_NO_VGPRS_SHIFT 383 - s_cbranch_scc0 L_HAVE_VGPRS 384 - s_endpgm 385 - L_HAVE_VGPRS: 386 - #endif 387 - 388 481 s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] 389 482 s_mov_b32 s_save_tmp, 0 390 483 s_setreg_b32 hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit ··· 562 671 s_mov_b32 m0, 0x0 //Next lane of v2 to write to 563 672 #endif 564 673 565 - #if ASIC_FAMILY >= CHIP_GFX12 566 - // Ensure no further changes to barrier or LDS state. 567 - // STATE_PRIV.BARRIER_COMPLETE may change up to this point. 568 - s_barrier_signal -2 569 - s_barrier_wait -2 570 - 571 - // Re-read final state of BARRIER_COMPLETE field for save. 572 - s_getreg_b32 s_save_tmp, hwreg(S_STATUS_HWREG) 573 - s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK 574 - s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK 575 - s_or_b32 s_save_status, s_save_status, s_save_tmp 576 - #endif 577 - 578 674 write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) 579 675 write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) 580 676 s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK ··· 584 706 585 707 s_getreg_b32 s_save_m0, hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI) 586 708 write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) 587 - 588 - #if ASIC_FAMILY >= CHIP_GFX12 589 - s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) 590 - write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) 591 - 592 - s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_TRAP_CTRL) 593 - write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) 594 - 595 - s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS) 596 - write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset) 597 - 598 - s_get_barrier_state s_save_tmp, -1 599 - s_wait_kmcnt (0) 600 - write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset) 601 - #endif 602 709 603 710 #if NO_SQC_STORE 604 711 // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this. ··· 677 814 s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero? 678 815 s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE 679 816 680 - #if ASIC_FAMILY < CHIP_GFX12 681 817 s_barrier //LDS is used? wait for other waves in the same TG 682 - #endif 683 818 s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK 684 819 s_cbranch_scc0 L_SAVE_LDS_DONE 685 820 ··· 942 1081 s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) 943 1082 s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC 944 1083 945 - #if ASIC_FAMILY >= CHIP_GFX12 946 - // Save s_restore_spi_init_hi for later use. 947 - s_mov_b32 s_restore_spi_init_hi_save, s_restore_spi_init_hi 948 - #endif 949 - 950 1084 //determine it is wave32 or wave64 951 1085 get_wave_size2(s_restore_size) 952 1086 ··· 1176 1320 // s_barrier with MODE.DEBUG_EN=1, STATUS.PRIV=1 incorrectly asserts debug exception. 1177 1321 // Clear DEBUG_EN before and restore MODE after the barrier. 1178 1322 s_setreg_imm32_b32 hwreg(HW_REG_MODE), 0 1179 - #if ASIC_FAMILY < CHIP_GFX12 1180 1323 s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG 1181 - #endif 1182 1324 1183 1325 /* restore HW registers */ 1184 1326 L_RESTORE_HWREG: ··· 1187 1333 s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes() 1188 1334 1189 1335 s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes 1190 - 1191 - #if ASIC_FAMILY >= CHIP_GFX12 1192 - // Restore s_restore_spi_init_hi before the saved value gets clobbered. 1193 - s_mov_b32 s_restore_spi_init_hi, s_restore_spi_init_hi_save 1194 - #endif 1195 1336 1196 1337 read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) 1197 1338 read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) ··· 1206 1357 S_WAITCNT_0 1207 1358 1208 1359 s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI), s_restore_flat_scratch 1209 - 1210 - #if ASIC_FAMILY >= CHIP_GFX12 1211 - read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) 1212 - S_WAITCNT_0 1213 - s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_USER), s_restore_tmp 1214 - 1215 - read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) 1216 - S_WAITCNT_0 1217 - s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp 1218 - 1219 - // Only the first wave needs to restore the workgroup barrier. 1220 - s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK 1221 - s_cbranch_scc0 L_SKIP_BARRIER_RESTORE 1222 - 1223 - // Skip over WAVE_STATUS, since there is no state to restore from it 1224 - s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4 1225 - 1226 - read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) 1227 - S_WAITCNT_0 1228 - 1229 - s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET 1230 - s_cbranch_scc0 L_SKIP_BARRIER_RESTORE 1231 - 1232 - // extract the saved signal count from s_restore_tmp 1233 - s_lshr_b32 s_restore_tmp, s_restore_tmp, BARRIER_STATE_SIGNAL_OFFSET 1234 - 1235 - // We need to call s_barrier_signal repeatedly to restore the signal 1236 - // count of the work group barrier. The member count is already 1237 - // initialized with the number of waves in the work group. 1238 - L_BARRIER_RESTORE_LOOP: 1239 - s_and_b32 s_restore_tmp, s_restore_tmp, s_restore_tmp 1240 - s_cbranch_scc0 L_SKIP_BARRIER_RESTORE 1241 - s_barrier_signal -1 1242 - s_add_i32 s_restore_tmp, s_restore_tmp, -1 1243 - s_branch L_BARRIER_RESTORE_LOOP 1244 - 1245 - L_SKIP_BARRIER_RESTORE: 1246 - #endif 1247 1360 1248 1361 s_mov_b32 m0, s_restore_m0 1249 1362 s_mov_b32 exec_lo, s_restore_exec_lo ··· 1263 1452 #endif 1264 1453 1265 1454 s_setreg_b32 hwreg(S_STATUS_HWREG), s_restore_status // SCC is included, which is changed by previous salu 1266 - 1267 - #if ASIC_FAMILY >= CHIP_GFX12 1268 - // Make barrier and LDS state visible to all waves in the group. 1269 - // STATE_PRIV.BARRIER_COMPLETE may change after this point. 1270 - s_barrier_signal -2 1271 - s_barrier_wait -2 1272 - #endif 1273 1455 1274 1456 s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution 1275 1457 ··· 1402 1598 end 1403 1599 1404 1600 function get_wave_size2(s_reg) 1405 - #if ASIC_FAMILY < CHIP_GFX12 1406 1601 s_getreg_b32 s_reg, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE) 1407 - #else 1408 - s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE) 1409 - #endif 1410 1602 s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE 1411 1603 end 1412 1604
+1126
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
··· 1 + /* 2 + * Copyright 2018 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + */ 22 + 23 + /* To compile this assembly code: 24 + * 25 + * gfx12: 26 + * cpp -DASIC_FAMILY=CHIP_GFX12 cwsr_trap_handler_gfx12.asm -P -o gfx12.sp3 27 + * sp3 gfx12.sp3 -hex gfx12.hex 28 + */ 29 + 30 + #define CHIP_GFX12 37 31 + 32 + #define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost TRAP_AFTER_INST exception when SAVECTX raised 33 + 34 + var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4 35 + var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9 36 + var SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK = 0xC00 37 + var SQ_WAVE_STATE_PRIV_HALT_MASK = 0x4000 38 + var SQ_WAVE_STATE_PRIV_POISON_ERR_MASK = 0x8000 39 + var SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT = 15 40 + var SQ_WAVE_STATUS_WAVE64_SHIFT = 29 41 + var SQ_WAVE_STATUS_WAVE64_SIZE = 1 42 + var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24 43 + var SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK = SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK|SQ_WAVE_STATE_PRIV_POISON_ERR_MASK 44 + var S_SAVE_PC_HI_TRAP_ID_MASK = 0xF0000000 45 + 46 + var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 47 + var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 48 + var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 8 49 + var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12 50 + var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24 51 + var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4 52 + var SQ_WAVE_LDS_ALLOC_GRANULARITY = 9 53 + 54 + var SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK = 0xF 55 + var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK = 0x10 56 + var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT = 5 57 + var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK = 0x20 58 + var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK = 0x40 59 + var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6 60 + var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK = 0x80 61 + var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT = 7 62 + var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK = 0x100 63 + var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT = 8 64 + var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK = 0x200 65 + var SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK = 0x800 66 + var SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK = 0x80 67 + var SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK = 0x200 68 + 69 + var SQ_WAVE_EXCP_FLAG_PRIV_NON_MASKABLE_EXCP_MASK= SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK |\ 70 + SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK |\ 71 + SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK |\ 72 + SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK |\ 73 + SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK |\ 74 + SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK 75 + var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_1_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT 76 + var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT 77 + var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT 78 + var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT 79 + var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE = 32 - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT 80 + var BARRIER_STATE_SIGNAL_OFFSET = 16 81 + var BARRIER_STATE_VALID_OFFSET = 0 82 + 83 + var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23 84 + var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000 85 + 86 + // SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] 87 + // when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE 88 + var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 89 + var S_SAVE_BUF_RSRC_WORD3_MISC = 0x10807FAC 90 + var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 91 + var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 92 + 93 + var S_SAVE_PC_HI_FIRST_WAVE_MASK = 0x80000000 94 + var S_SAVE_PC_HI_FIRST_WAVE_SHIFT = 31 95 + 96 + var s_sgpr_save_num = 108 97 + 98 + var s_save_spi_init_lo = exec_lo 99 + var s_save_spi_init_hi = exec_hi 100 + var s_save_pc_lo = ttmp0 101 + var s_save_pc_hi = ttmp1 102 + var s_save_exec_lo = ttmp2 103 + var s_save_exec_hi = ttmp3 104 + var s_save_state_priv = ttmp12 105 + var s_save_excp_flag_priv = ttmp15 106 + var s_save_xnack_mask = s_save_excp_flag_priv 107 + var s_wave_size = ttmp7 108 + var s_save_buf_rsrc0 = ttmp8 109 + var s_save_buf_rsrc1 = ttmp9 110 + var s_save_buf_rsrc2 = ttmp10 111 + var s_save_buf_rsrc3 = ttmp11 112 + var s_save_mem_offset = ttmp4 113 + var s_save_alloc_size = s_save_excp_flag_priv 114 + var s_save_tmp = ttmp14 115 + var s_save_m0 = ttmp5 116 + var s_save_ttmps_lo = s_save_tmp 117 + var s_save_ttmps_hi = s_save_excp_flag_priv 118 + 119 + var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE 120 + var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC 121 + 122 + var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 123 + var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26 124 + var S_WAVE_SIZE = 25 125 + 126 + var s_restore_spi_init_lo = exec_lo 127 + var s_restore_spi_init_hi = exec_hi 128 + var s_restore_mem_offset = ttmp12 129 + var s_restore_alloc_size = ttmp3 130 + var s_restore_tmp = ttmp2 131 + var s_restore_mem_offset_save = s_restore_tmp 132 + var s_restore_m0 = s_restore_alloc_size 133 + var s_restore_mode = ttmp7 134 + var s_restore_flat_scratch = s_restore_tmp 135 + var s_restore_pc_lo = ttmp0 136 + var s_restore_pc_hi = ttmp1 137 + var s_restore_exec_lo = ttmp4 138 + var s_restore_exec_hi = ttmp5 139 + var s_restore_state_priv = ttmp14 140 + var s_restore_excp_flag_priv = ttmp15 141 + var s_restore_xnack_mask = ttmp13 142 + var s_restore_buf_rsrc0 = ttmp8 143 + var s_restore_buf_rsrc1 = ttmp9 144 + var s_restore_buf_rsrc2 = ttmp10 145 + var s_restore_buf_rsrc3 = ttmp11 146 + var s_restore_size = ttmp6 147 + var s_restore_ttmps_lo = s_restore_tmp 148 + var s_restore_ttmps_hi = s_restore_alloc_size 149 + var s_restore_spi_init_hi_save = s_restore_exec_hi 150 + 151 + shader main 152 + asic(DEFAULT) 153 + type(CS) 154 + wave_size(32) 155 + 156 + s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save 157 + 158 + L_JUMP_TO_RESTORE: 159 + s_branch L_RESTORE 160 + 161 + L_SKIP_RESTORE: 162 + s_getreg_b32 s_save_state_priv, hwreg(HW_REG_WAVE_STATE_PRIV) //save STATUS since we will change SCC 163 + 164 + // Clear SPI_PRIO: do not save with elevated priority. 165 + // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd. 166 + s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK 167 + 168 + s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) 169 + 170 + s_and_b32 ttmp2, s_save_state_priv, SQ_WAVE_STATE_PRIV_HALT_MASK 171 + s_cbranch_scc0 L_NOT_HALTED 172 + 173 + L_HALTED: 174 + // Host trap may occur while wave is halted. 175 + s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK 176 + s_cbranch_scc1 L_FETCH_2ND_TRAP 177 + 178 + L_CHECK_SAVE: 179 + s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK 180 + s_cbranch_scc1 L_SAVE 181 + 182 + // Wave is halted but neither host trap nor SAVECTX is raised. 183 + // Caused by instruction fetch memory violation. 184 + // Spin wait until context saved to prevent interrupt storm. 185 + s_sleep 0x10 186 + s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) 187 + s_branch L_CHECK_SAVE 188 + 189 + L_NOT_HALTED: 190 + // Let second-level handle non-SAVECTX exception or trap. 191 + // Any concurrent SAVECTX will be handled upon re-entry once halted. 192 + 193 + // Check non-maskable exceptions. memory_violation, illegal_instruction 194 + // and xnack_error exceptions always cause the wave to enter the trap 195 + // handler. 196 + s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_NON_MASKABLE_EXCP_MASK 197 + s_cbranch_scc1 L_FETCH_2ND_TRAP 198 + 199 + // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi. 200 + // Maskable exceptions only cause the wave to enter the trap handler if 201 + // their respective bit in mode.excp_en is set. 202 + s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) 203 + s_and_b32 ttmp3, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK 204 + s_cbranch_scc0 L_NOT_ADDR_WATCH 205 + s_or_b32 ttmp2, ttmp2, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK 206 + 207 + L_NOT_ADDR_WATCH: 208 + s_getreg_b32 ttmp3, hwreg(HW_REG_WAVE_TRAP_CTRL) 209 + s_and_b32 ttmp2, ttmp3, ttmp2 210 + s_cbranch_scc1 L_FETCH_2ND_TRAP 211 + 212 + L_CHECK_TRAP_ID: 213 + // Check trap_id != 0 214 + s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK 215 + s_cbranch_scc1 L_FETCH_2ND_TRAP 216 + 217 + #if SINGLE_STEP_MISSED_WORKAROUND 218 + // Prioritize single step exception over context save. 219 + // Second-level trap will halt wave and RFE, re-entering for SAVECTX. 220 + // WAVE_TRAP_CTRL is already in ttmp3. 221 + s_and_b32 ttmp3, ttmp3, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK 222 + s_cbranch_scc1 L_FETCH_2ND_TRAP 223 + #endif 224 + 225 + s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK 226 + s_cbranch_scc1 L_SAVE 227 + 228 + L_FETCH_2ND_TRAP: 229 + // Read second-level TBA/TMA from first-level TMA and jump if available. 230 + // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) 231 + // ttmp12 holds SQ_WAVE_STATUS 232 + s_sendmsg_rtn_b64 [ttmp14, ttmp15], sendmsg(MSG_RTN_GET_TMA) 233 + s_wait_idle 234 + s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 235 + 236 + s_bitcmp1_b32 ttmp15, 0xF 237 + s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA 238 + s_or_b32 ttmp15, ttmp15, 0xFFFF0000 239 + L_NO_SIGN_EXTEND_TMA: 240 + 241 + s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 scope:SCOPE_SYS // debug trap enabled flag 242 + s_wait_idle 243 + s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT 244 + s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK 245 + s_or_b32 ttmp11, ttmp11, ttmp2 246 + 247 + s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 scope:SCOPE_SYS // second-level TBA 248 + s_wait_idle 249 + s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 scope:SCOPE_SYS // second-level TMA 250 + s_wait_idle 251 + 252 + s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] 253 + s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set 254 + s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler 255 + 256 + L_NO_NEXT_TRAP: 257 + // If not caused by trap then halt wave to prevent re-entry. 258 + s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK 259 + s_cbranch_scc1 L_TRAP_CASE 260 + 261 + // Host trap will not cause trap re-entry. 262 + s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) 263 + s_and_b32 ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK 264 + s_cbranch_scc1 L_EXIT_TRAP 265 + s_or_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_HALT_MASK 266 + 267 + // If the PC points to S_ENDPGM then context save will fail if STATE_PRIV.HALT is set. 268 + // Rewind the PC to prevent this from occurring. 269 + s_sub_u32 ttmp0, ttmp0, 0x8 270 + s_subb_u32 ttmp1, ttmp1, 0x0 271 + 272 + s_branch L_EXIT_TRAP 273 + 274 + L_TRAP_CASE: 275 + // Advance past trap instruction to prevent re-entry. 276 + s_add_u32 ttmp0, ttmp0, 0x4 277 + s_addc_u32 ttmp1, ttmp1, 0x0 278 + 279 + L_EXIT_TRAP: 280 + s_and_b32 ttmp1, ttmp1, 0xFFFF 281 + 282 + // Restore SQ_WAVE_STATUS. 283 + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 284 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 285 + 286 + // STATE_PRIV.BARRIER_COMPLETE may have changed since we read it. 287 + // Only restore fields which the trap handler changes. 288 + s_lshr_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_SCC_SHIFT 289 + s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \ 290 + SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_state_priv 291 + 292 + s_rfe_b64 [ttmp0, ttmp1] 293 + 294 + L_SAVE: 295 + // If VGPRs have been deallocated then terminate the wavefront. 296 + // It has no remaining program to run and cannot save without VGPRs. 297 + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS) 298 + s_bitcmp1_b32 s_save_tmp, SQ_WAVE_STATUS_NO_VGPRS_SHIFT 299 + s_cbranch_scc0 L_HAVE_VGPRS 300 + s_endpgm 301 + L_HAVE_VGPRS: 302 + 303 + s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] 304 + s_mov_b32 s_save_tmp, 0 305 + s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit 306 + 307 + /* inform SPI the readiness and wait for SPI's go signal */ 308 + s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI 309 + s_mov_b32 s_save_exec_hi, exec_hi 310 + s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive 311 + 312 + s_sendmsg_rtn_b64 [exec_lo, exec_hi], sendmsg(MSG_RTN_SAVE_WAVE) 313 + s_wait_idle 314 + 315 + // Save first_wave flag so we can clear high bits of save address. 316 + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK 317 + s_lshl_b32 s_save_tmp, s_save_tmp, (S_SAVE_PC_HI_FIRST_WAVE_SHIFT - S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT) 318 + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp 319 + 320 + // Trap temporaries must be saved via VGPR but all VGPRs are in use. 321 + // There is no ttmp space to hold the resource constant for VGPR save. 322 + // Save v0 by itself since it requires only two SGPRs. 323 + s_mov_b32 s_save_ttmps_lo, exec_lo 324 + s_and_b32 s_save_ttmps_hi, exec_hi, 0xFFFF 325 + s_mov_b32 exec_lo, 0xFFFFFFFF 326 + s_mov_b32 exec_hi, 0xFFFFFFFF 327 + global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] scope:SCOPE_SYS 328 + v_mov_b32 v0, 0x0 329 + s_mov_b32 exec_lo, s_save_ttmps_lo 330 + s_mov_b32 exec_hi, s_save_ttmps_hi 331 + 332 + // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic 333 + // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40 334 + get_wave_size2(s_save_ttmps_hi) 335 + get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi) 336 + get_svgpr_size_bytes(s_save_ttmps_hi) 337 + s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi 338 + s_and_b32 s_save_ttmps_hi, s_save_spi_init_hi, 0xFFFF 339 + s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, get_sgpr_size_bytes() 340 + s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo 341 + s_addc_u32 s_save_ttmps_hi, s_save_ttmps_hi, 0x0 342 + 343 + v_writelane_b32 v0, ttmp4, 0x4 344 + v_writelane_b32 v0, ttmp5, 0x5 345 + v_writelane_b32 v0, ttmp6, 0x6 346 + v_writelane_b32 v0, ttmp7, 0x7 347 + v_writelane_b32 v0, ttmp8, 0x8 348 + v_writelane_b32 v0, ttmp9, 0x9 349 + v_writelane_b32 v0, ttmp10, 0xA 350 + v_writelane_b32 v0, ttmp11, 0xB 351 + v_writelane_b32 v0, ttmp13, 0xD 352 + v_writelane_b32 v0, exec_lo, 0xE 353 + v_writelane_b32 v0, exec_hi, 0xF 354 + 355 + s_mov_b32 exec_lo, 0x3FFF 356 + s_mov_b32 exec_hi, 0x0 357 + global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] offset:0x40 scope:SCOPE_SYS 358 + v_readlane_b32 ttmp14, v0, 0xE 359 + v_readlane_b32 ttmp15, v0, 0xF 360 + s_mov_b32 exec_lo, ttmp14 361 + s_mov_b32 exec_hi, ttmp15 362 + 363 + /* setup Resource Contants */ 364 + s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo 365 + s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi 366 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE 367 + s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited 368 + s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC 369 + 370 + s_mov_b32 s_save_m0, m0 371 + 372 + /* global mem offset */ 373 + s_mov_b32 s_save_mem_offset, 0x0 374 + get_wave_size2(s_wave_size) 375 + 376 + /* save first 4 VGPRs, needed for SGPR save */ 377 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on 378 + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE 379 + s_and_b32 m0, m0, 1 380 + s_cmp_eq_u32 m0, 1 381 + s_cbranch_scc1 L_ENABLE_SAVE_4VGPR_EXEC_HI 382 + s_mov_b32 exec_hi, 0x00000000 383 + s_branch L_SAVE_4VGPR_WAVE32 384 + L_ENABLE_SAVE_4VGPR_EXEC_HI: 385 + s_mov_b32 exec_hi, 0xFFFFFFFF 386 + s_branch L_SAVE_4VGPR_WAVE64 387 + L_SAVE_4VGPR_WAVE32: 388 + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes 389 + 390 + // VGPR Allocated in 4-GPR granularity 391 + 392 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128 393 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*2 394 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*3 395 + s_branch L_SAVE_HWREG 396 + 397 + L_SAVE_4VGPR_WAVE64: 398 + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes 399 + 400 + // VGPR Allocated in 4-GPR granularity 401 + 402 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256 403 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*2 404 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*3 405 + 406 + /* save HW registers */ 407 + 408 + L_SAVE_HWREG: 409 + // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR) 410 + get_vgpr_size_bytes(s_save_mem_offset, s_wave_size) 411 + get_svgpr_size_bytes(s_save_tmp) 412 + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp 413 + s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes() 414 + 415 + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes 416 + 417 + v_mov_b32 v0, 0x0 //Offset[31:0] from buffer resource 418 + v_mov_b32 v1, 0x0 //Offset[63:32] from buffer resource 419 + v_mov_b32 v2, 0x0 //Set of SGPRs for TCP store 420 + s_mov_b32 m0, 0x0 //Next lane of v2 to write to 421 + 422 + // Ensure no further changes to barrier or LDS state. 423 + // STATE_PRIV.BARRIER_COMPLETE may change up to this point. 424 + s_barrier_signal -2 425 + s_barrier_wait -2 426 + 427 + // Re-read final state of BARRIER_COMPLETE field for save. 428 + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATE_PRIV) 429 + s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK 430 + s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK 431 + s_or_b32 s_save_state_priv, s_save_state_priv, s_save_tmp 432 + 433 + write_hwreg_to_v2(s_save_m0) 434 + write_hwreg_to_v2(s_save_pc_lo) 435 + s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK 436 + write_hwreg_to_v2(s_save_tmp) 437 + write_hwreg_to_v2(s_save_exec_lo) 438 + write_hwreg_to_v2(s_save_exec_hi) 439 + write_hwreg_to_v2(s_save_state_priv) 440 + 441 + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) 442 + write_hwreg_to_v2(s_save_tmp) 443 + 444 + write_hwreg_to_v2(s_save_xnack_mask) 445 + 446 + s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_MODE) 447 + write_hwreg_to_v2(s_save_m0) 448 + 449 + s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_SCRATCH_BASE_LO) 450 + write_hwreg_to_v2(s_save_m0) 451 + 452 + s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_SCRATCH_BASE_HI) 453 + write_hwreg_to_v2(s_save_m0) 454 + 455 + s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) 456 + write_hwreg_to_v2(s_save_m0) 457 + 458 + s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_TRAP_CTRL) 459 + write_hwreg_to_v2(s_save_m0) 460 + 461 + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS) 462 + write_hwreg_to_v2(s_save_tmp) 463 + 464 + s_get_barrier_state s_save_tmp, -1 465 + s_wait_kmcnt (0) 466 + write_hwreg_to_v2(s_save_tmp) 467 + 468 + // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this. 469 + s_mov_b32 exec_lo, 0xFFFF 470 + s_mov_b32 exec_hi, 0x0 471 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS 472 + 473 + // Write SGPRs with 32 VGPR lanes. This works in wave32 and wave64 mode. 474 + s_mov_b32 exec_lo, 0xFFFFFFFF 475 + 476 + /* save SGPRs */ 477 + // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save... 478 + 479 + // SGPR SR memory offset : size(VGPR)+size(SVGPR) 480 + get_vgpr_size_bytes(s_save_mem_offset, s_wave_size) 481 + get_svgpr_size_bytes(s_save_tmp) 482 + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp 483 + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes 484 + 485 + s_mov_b32 ttmp13, 0x0 //next VGPR lane to copy SGPR into 486 + 487 + s_mov_b32 m0, 0x0 //SGPR initial index value =0 488 + s_nop 0x0 //Manually inserted wait states 489 + L_SAVE_SGPR_LOOP: 490 + // SGPR is allocated in 16 SGPR granularity 491 + s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0] 492 + s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0] 493 + s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0] 494 + s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0] 495 + s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0] 496 + s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0] 497 + s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0] 498 + s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0] 499 + 500 + write_16sgpr_to_v2(s0) 501 + 502 + s_cmp_eq_u32 ttmp13, 0x20 //have 32 VGPR lanes filled? 503 + s_cbranch_scc0 L_SAVE_SGPR_SKIP_TCP_STORE 504 + 505 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS 506 + s_add_u32 s_save_mem_offset, s_save_mem_offset, 0x80 507 + s_mov_b32 ttmp13, 0x0 508 + v_mov_b32 v2, 0x0 509 + L_SAVE_SGPR_SKIP_TCP_STORE: 510 + 511 + s_add_u32 m0, m0, 16 //next sgpr index 512 + s_cmp_lt_u32 m0, 96 //scc = (m0 < first 96 SGPR) ? 1 : 0 513 + s_cbranch_scc1 L_SAVE_SGPR_LOOP //first 96 SGPR save is complete? 514 + 515 + //save the rest 12 SGPR 516 + s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0] 517 + s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0] 518 + s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0] 519 + s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0] 520 + s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0] 521 + s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0] 522 + write_12sgpr_to_v2(s0) 523 + 524 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS 525 + 526 + /* save LDS */ 527 + 528 + L_SAVE_LDS: 529 + // Change EXEC to all threads... 530 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on 531 + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE 532 + s_and_b32 m0, m0, 1 533 + s_cmp_eq_u32 m0, 1 534 + s_cbranch_scc1 L_ENABLE_SAVE_LDS_EXEC_HI 535 + s_mov_b32 exec_hi, 0x00000000 536 + s_branch L_SAVE_LDS_NORMAL 537 + L_ENABLE_SAVE_LDS_EXEC_HI: 538 + s_mov_b32 exec_hi, 0xFFFFFFFF 539 + L_SAVE_LDS_NORMAL: 540 + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) 541 + s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero? 542 + s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE 543 + 544 + s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK 545 + s_cbranch_scc0 L_SAVE_LDS_DONE 546 + 547 + // first wave do LDS save; 548 + 549 + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY 550 + s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes 551 + 552 + // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG) 553 + // 554 + get_vgpr_size_bytes(s_save_mem_offset, s_wave_size) 555 + get_svgpr_size_bytes(s_save_tmp) 556 + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp 557 + s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes() 558 + s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes() 559 + 560 + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes 561 + 562 + //load 0~63*4(byte address) to vgpr v0 563 + v_mbcnt_lo_u32_b32 v0, -1, 0 564 + v_mbcnt_hi_u32_b32 v0, -1, v0 565 + v_mul_u32_u24 v0, 4, v0 566 + 567 + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE 568 + s_and_b32 m0, m0, 1 569 + s_cmp_eq_u32 m0, 1 570 + s_mov_b32 m0, 0x0 571 + s_cbranch_scc1 L_SAVE_LDS_W64 572 + 573 + L_SAVE_LDS_W32: 574 + s_mov_b32 s3, 128 575 + s_nop 0 576 + s_nop 0 577 + s_nop 0 578 + L_SAVE_LDS_LOOP_W32: 579 + ds_read_b32 v1, v0 580 + s_wait_idle 581 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS 582 + 583 + s_add_u32 m0, m0, s3 //every buffer_store_lds does 128 bytes 584 + s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 585 + v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes 586 + s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 587 + s_cbranch_scc1 L_SAVE_LDS_LOOP_W32 //LDS save is complete? 588 + 589 + s_branch L_SAVE_LDS_DONE 590 + 591 + L_SAVE_LDS_W64: 592 + s_mov_b32 s3, 256 593 + s_nop 0 594 + s_nop 0 595 + s_nop 0 596 + L_SAVE_LDS_LOOP_W64: 597 + ds_read_b32 v1, v0 598 + s_wait_idle 599 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS 600 + 601 + s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes 602 + s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 603 + v_add_nc_u32 v0, v0, 256 //mem offset increased by 256 bytes 604 + s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 605 + s_cbranch_scc1 L_SAVE_LDS_LOOP_W64 //LDS save is complete? 606 + 607 + L_SAVE_LDS_DONE: 608 + /* save VGPRs - set the Rest VGPRs */ 609 + L_SAVE_VGPR: 610 + // VGPR SR memory offset: 0 611 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on 612 + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE 613 + s_and_b32 m0, m0, 1 614 + s_cmp_eq_u32 m0, 1 615 + s_cbranch_scc1 L_ENABLE_SAVE_VGPR_EXEC_HI 616 + s_mov_b32 s_save_mem_offset, (0+128*4) // for the rest VGPRs 617 + s_mov_b32 exec_hi, 0x00000000 618 + s_branch L_SAVE_VGPR_NORMAL 619 + L_ENABLE_SAVE_VGPR_EXEC_HI: 620 + s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs 621 + s_mov_b32 exec_hi, 0xFFFFFFFF 622 + L_SAVE_VGPR_NORMAL: 623 + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) 624 + s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 625 + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) 626 + //determine it is wave32 or wave64 627 + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE 628 + s_and_b32 m0, m0, 1 629 + s_cmp_eq_u32 m0, 1 630 + s_cbranch_scc1 L_SAVE_VGPR_WAVE64 631 + 632 + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes 633 + 634 + // VGPR Allocated in 4-GPR granularity 635 + 636 + // VGPR store using dw burst 637 + s_mov_b32 m0, 0x4 //VGPR initial index value =4 638 + s_cmp_lt_u32 m0, s_save_alloc_size 639 + s_cbranch_scc0 L_SAVE_VGPR_END 640 + 641 + L_SAVE_VGPR_W32_LOOP: 642 + v_movrels_b32 v0, v0 //v0 = v[0+m0] 643 + v_movrels_b32 v1, v1 //v1 = v[1+m0] 644 + v_movrels_b32 v2, v2 //v2 = v[2+m0] 645 + v_movrels_b32 v3, v3 //v3 = v[3+m0] 646 + 647 + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS 648 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128 649 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*2 650 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*3 651 + 652 + s_add_u32 m0, m0, 4 //next vgpr index 653 + s_add_u32 s_save_mem_offset, s_save_mem_offset, 128*4 //every buffer_store_dword does 128 bytes 654 + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 655 + s_cbranch_scc1 L_SAVE_VGPR_W32_LOOP //VGPR save is complete? 656 + 657 + s_branch L_SAVE_VGPR_END 658 + 659 + L_SAVE_VGPR_WAVE64: 660 + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes 661 + 662 + // VGPR store using dw burst 663 + s_mov_b32 m0, 0x4 //VGPR initial index value =4 664 + s_cmp_lt_u32 m0, s_save_alloc_size 665 + s_cbranch_scc0 L_SAVE_SHARED_VGPR 666 + 667 + L_SAVE_VGPR_W64_LOOP: 668 + v_movrels_b32 v0, v0 //v0 = v[0+m0] 669 + v_movrels_b32 v1, v1 //v1 = v[1+m0] 670 + v_movrels_b32 v2, v2 //v2 = v[2+m0] 671 + v_movrels_b32 v3, v3 //v3 = v[3+m0] 672 + 673 + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS 674 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256 675 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*2 676 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*3 677 + 678 + s_add_u32 m0, m0, 4 //next vgpr index 679 + s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes 680 + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 681 + s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP //VGPR save is complete? 682 + 683 + L_SAVE_SHARED_VGPR: 684 + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) 685 + s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero? 686 + s_cbranch_scc0 L_SAVE_VGPR_END //no shared_vgpr used? jump to L_SAVE_LDS 687 + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value) 688 + //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count. 689 + //save shared_vgpr will start from the index of m0 690 + s_add_u32 s_save_alloc_size, s_save_alloc_size, m0 691 + s_mov_b32 exec_lo, 0xFFFFFFFF 692 + s_mov_b32 exec_hi, 0x00000000 693 + 694 + L_SAVE_SHARED_VGPR_WAVE64_LOOP: 695 + v_movrels_b32 v0, v0 //v0 = v[0+m0] 696 + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS 697 + s_add_u32 m0, m0, 1 //next vgpr index 698 + s_add_u32 s_save_mem_offset, s_save_mem_offset, 128 699 + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 700 + s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP //SHARED_VGPR save is complete? 701 + 702 + L_SAVE_VGPR_END: 703 + s_branch L_END_PGM 704 + 705 + L_RESTORE: 706 + /* Setup Resource Contants */ 707 + s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo 708 + s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi 709 + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE 710 + s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) 711 + s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC 712 + 713 + // Save s_restore_spi_init_hi for later use. 714 + s_mov_b32 s_restore_spi_init_hi_save, s_restore_spi_init_hi 715 + 716 + //determine it is wave32 or wave64 717 + get_wave_size2(s_restore_size) 718 + 719 + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK 720 + s_cbranch_scc0 L_RESTORE_VGPR 721 + 722 + /* restore LDS */ 723 + L_RESTORE_LDS: 724 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on 725 + s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE 726 + s_and_b32 m0, m0, 1 727 + s_cmp_eq_u32 m0, 1 728 + s_cbranch_scc1 L_ENABLE_RESTORE_LDS_EXEC_HI 729 + s_mov_b32 exec_hi, 0x00000000 730 + s_branch L_RESTORE_LDS_NORMAL 731 + L_ENABLE_RESTORE_LDS_EXEC_HI: 732 + s_mov_b32 exec_hi, 0xFFFFFFFF 733 + L_RESTORE_LDS_NORMAL: 734 + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) 735 + s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero? 736 + s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR 737 + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY 738 + s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes 739 + 740 + // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG) 741 + // 742 + get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size) 743 + get_svgpr_size_bytes(s_restore_tmp) 744 + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp 745 + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes() 746 + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes() 747 + 748 + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes 749 + 750 + s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE 751 + s_and_b32 m0, m0, 1 752 + s_cmp_eq_u32 m0, 1 753 + s_mov_b32 m0, 0x0 754 + s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 755 + 756 + L_RESTORE_LDS_LOOP_W32: 757 + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset 758 + s_wait_idle 759 + ds_store_addtid_b32 v0 760 + s_add_u32 m0, m0, 128 // 128 DW 761 + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //mem offset increased by 128DW 762 + s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 763 + s_cbranch_scc1 L_RESTORE_LDS_LOOP_W32 //LDS restore is complete? 764 + s_branch L_RESTORE_VGPR 765 + 766 + L_RESTORE_LDS_LOOP_W64: 767 + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset 768 + s_wait_idle 769 + ds_store_addtid_b32 v0 770 + s_add_u32 m0, m0, 256 // 256 DW 771 + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256DW 772 + s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 773 + s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 //LDS restore is complete? 774 + 775 + /* restore VGPRs */ 776 + L_RESTORE_VGPR: 777 + // VGPR SR memory offset : 0 778 + s_mov_b32 s_restore_mem_offset, 0x0 779 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on 780 + s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE 781 + s_and_b32 m0, m0, 1 782 + s_cmp_eq_u32 m0, 1 783 + s_cbranch_scc1 L_ENABLE_RESTORE_VGPR_EXEC_HI 784 + s_mov_b32 exec_hi, 0x00000000 785 + s_branch L_RESTORE_VGPR_NORMAL 786 + L_ENABLE_RESTORE_VGPR_EXEC_HI: 787 + s_mov_b32 exec_hi, 0xFFFFFFFF 788 + L_RESTORE_VGPR_NORMAL: 789 + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) 790 + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 791 + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) 792 + //determine it is wave32 or wave64 793 + s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE 794 + s_and_b32 m0, m0, 1 795 + s_cmp_eq_u32 m0, 1 796 + s_cbranch_scc1 L_RESTORE_VGPR_WAVE64 797 + 798 + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes 799 + 800 + // VGPR load using dw burst 801 + s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last 802 + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4 803 + s_mov_b32 m0, 4 //VGPR initial index value = 4 804 + s_cmp_lt_u32 m0, s_restore_alloc_size 805 + s_cbranch_scc0 L_RESTORE_SGPR 806 + 807 + L_RESTORE_VGPR_WAVE32_LOOP: 808 + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS 809 + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128 810 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128*2 811 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128*3 812 + s_wait_idle 813 + v_movreld_b32 v0, v0 //v[0+m0] = v0 814 + v_movreld_b32 v1, v1 815 + v_movreld_b32 v2, v2 816 + v_movreld_b32 v3, v3 817 + s_add_u32 m0, m0, 4 //next vgpr index 818 + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4 //every buffer_load_dword does 128 bytes 819 + s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 820 + s_cbranch_scc1 L_RESTORE_VGPR_WAVE32_LOOP //VGPR restore (except v0) is complete? 821 + 822 + /* VGPR restore on v0 */ 823 + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS 824 + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128 825 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128*2 826 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128*3 827 + s_wait_idle 828 + 829 + s_branch L_RESTORE_SGPR 830 + 831 + L_RESTORE_VGPR_WAVE64: 832 + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes 833 + 834 + // VGPR load using dw burst 835 + s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v4, v0 will be the last 836 + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 837 + s_mov_b32 m0, 4 //VGPR initial index value = 4 838 + s_cmp_lt_u32 m0, s_restore_alloc_size 839 + s_cbranch_scc0 L_RESTORE_SHARED_VGPR 840 + 841 + L_RESTORE_VGPR_WAVE64_LOOP: 842 + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS 843 + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256 844 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256*2 845 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256*3 846 + s_wait_idle 847 + v_movreld_b32 v0, v0 //v[0+m0] = v0 848 + v_movreld_b32 v1, v1 849 + v_movreld_b32 v2, v2 850 + v_movreld_b32 v3, v3 851 + s_add_u32 m0, m0, 4 //next vgpr index 852 + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes 853 + s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 854 + s_cbranch_scc1 L_RESTORE_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete? 855 + 856 + L_RESTORE_SHARED_VGPR: 857 + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size 858 + s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero? 859 + s_cbranch_scc0 L_RESTORE_V0 //no shared_vgpr used? 860 + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value) 861 + //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count. 862 + //restore shared_vgpr will start from the index of m0 863 + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, m0 864 + s_mov_b32 exec_lo, 0xFFFFFFFF 865 + s_mov_b32 exec_hi, 0x00000000 866 + L_RESTORE_SHARED_VGPR_WAVE64_LOOP: 867 + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS 868 + s_wait_idle 869 + v_movreld_b32 v0, v0 //v[0+m0] = v0 870 + s_add_u32 m0, m0, 1 //next vgpr index 871 + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 872 + s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 873 + s_cbranch_scc1 L_RESTORE_SHARED_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete? 874 + 875 + s_mov_b32 exec_hi, 0xFFFFFFFF //restore back exec_hi before restoring V0!! 876 + 877 + /* VGPR restore on v0 */ 878 + L_RESTORE_V0: 879 + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS 880 + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256 881 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256*2 882 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256*3 883 + s_wait_idle 884 + 885 + /* restore SGPRs */ 886 + //will be 2+8+16*6 887 + // SGPR SR memory offset : size(VGPR)+size(SVGPR) 888 + L_RESTORE_SGPR: 889 + get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size) 890 + get_svgpr_size_bytes(s_restore_tmp) 891 + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp 892 + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes() 893 + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 20*4 //s108~s127 is not saved 894 + 895 + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes 896 + 897 + s_mov_b32 m0, s_sgpr_save_num 898 + 899 + read_4sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) 900 + s_wait_idle 901 + 902 + s_sub_u32 m0, m0, 4 // Restore from S[0] to S[104] 903 + s_nop 0 // hazard SALU M0=> S_MOVREL 904 + 905 + s_movreld_b64 s0, s0 //s[0+m0] = s0 906 + s_movreld_b64 s2, s2 907 + 908 + read_8sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) 909 + s_wait_idle 910 + 911 + s_sub_u32 m0, m0, 8 // Restore from S[0] to S[96] 912 + s_nop 0 // hazard SALU M0=> S_MOVREL 913 + 914 + s_movreld_b64 s0, s0 //s[0+m0] = s0 915 + s_movreld_b64 s2, s2 916 + s_movreld_b64 s4, s4 917 + s_movreld_b64 s6, s6 918 + 919 + L_RESTORE_SGPR_LOOP: 920 + read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) 921 + s_wait_idle 922 + 923 + s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0] 924 + s_nop 0 // hazard SALU M0=> S_MOVREL 925 + 926 + s_movreld_b64 s0, s0 //s[0+m0] = s0 927 + s_movreld_b64 s2, s2 928 + s_movreld_b64 s4, s4 929 + s_movreld_b64 s6, s6 930 + s_movreld_b64 s8, s8 931 + s_movreld_b64 s10, s10 932 + s_movreld_b64 s12, s12 933 + s_movreld_b64 s14, s14 934 + 935 + s_cmp_eq_u32 m0, 0 //scc = (m0 < s_sgpr_save_num) ? 1 : 0 936 + s_cbranch_scc0 L_RESTORE_SGPR_LOOP 937 + 938 + // s_barrier with STATE_PRIV.TRAP_AFTER_INST=1, STATUS.PRIV=1 incorrectly asserts debug exception. 939 + // Clear DEBUG_EN before and restore MODE after the barrier. 940 + s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE), 0 941 + 942 + /* restore HW registers */ 943 + L_RESTORE_HWREG: 944 + // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR) 945 + get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size) 946 + get_svgpr_size_bytes(s_restore_tmp) 947 + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp 948 + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes() 949 + 950 + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes 951 + 952 + // Restore s_restore_spi_init_hi before the saved value gets clobbered. 953 + s_mov_b32 s_restore_spi_init_hi, s_restore_spi_init_hi_save 954 + 955 + read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) 956 + read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) 957 + read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset) 958 + read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset) 959 + read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset) 960 + read_hwreg_from_mem(s_restore_state_priv, s_restore_buf_rsrc0, s_restore_mem_offset) 961 + read_hwreg_from_mem(s_restore_excp_flag_priv, s_restore_buf_rsrc0, s_restore_mem_offset) 962 + read_hwreg_from_mem(s_restore_xnack_mask, s_restore_buf_rsrc0, s_restore_mem_offset) 963 + read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset) 964 + read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset) 965 + s_wait_idle 966 + 967 + s_setreg_b32 hwreg(HW_REG_WAVE_SCRATCH_BASE_LO), s_restore_flat_scratch 968 + 969 + read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset) 970 + s_wait_idle 971 + 972 + s_setreg_b32 hwreg(HW_REG_WAVE_SCRATCH_BASE_HI), s_restore_flat_scratch 973 + 974 + read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) 975 + s_wait_idle 976 + s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_USER), s_restore_tmp 977 + 978 + read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) 979 + s_wait_idle 980 + s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp 981 + 982 + // Only the first wave needs to restore the workgroup barrier. 983 + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK 984 + s_cbranch_scc0 L_SKIP_BARRIER_RESTORE 985 + 986 + // Skip over WAVE_STATUS, since there is no state to restore from it 987 + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4 988 + 989 + read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) 990 + s_wait_idle 991 + 992 + s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET 993 + s_cbranch_scc0 L_SKIP_BARRIER_RESTORE 994 + 995 + // extract the saved signal count from s_restore_tmp 996 + s_lshr_b32 s_restore_tmp, s_restore_tmp, BARRIER_STATE_SIGNAL_OFFSET 997 + 998 + // We need to call s_barrier_signal repeatedly to restore the signal 999 + // count of the work group barrier. The member count is already 1000 + // initialized with the number of waves in the work group. 1001 + L_BARRIER_RESTORE_LOOP: 1002 + s_and_b32 s_restore_tmp, s_restore_tmp, s_restore_tmp 1003 + s_cbranch_scc0 L_SKIP_BARRIER_RESTORE 1004 + s_barrier_signal -1 1005 + s_add_i32 s_restore_tmp, s_restore_tmp, -1 1006 + s_branch L_BARRIER_RESTORE_LOOP 1007 + 1008 + L_SKIP_BARRIER_RESTORE: 1009 + 1010 + s_mov_b32 m0, s_restore_m0 1011 + s_mov_b32 exec_lo, s_restore_exec_lo 1012 + s_mov_b32 exec_hi, s_restore_exec_hi 1013 + 1014 + // EXCP_FLAG_PRIV.SAVE_CONTEXT and HOST_TRAP may have changed. 1015 + // Only restore the other fields to avoid clobbering them. 1016 + s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, 0, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_1_SIZE), s_restore_excp_flag_priv 1017 + s_lshr_b32 s_restore_excp_flag_priv, s_restore_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT 1018 + s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE), s_restore_excp_flag_priv 1019 + s_lshr_b32 s_restore_excp_flag_priv, s_restore_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT 1020 + s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE), s_restore_excp_flag_priv 1021 + 1022 + s_setreg_b32 hwreg(HW_REG_WAVE_MODE), s_restore_mode 1023 + 1024 + // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic 1025 + // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40 1026 + get_vgpr_size_bytes(s_restore_ttmps_lo, s_restore_size) 1027 + get_svgpr_size_bytes(s_restore_ttmps_hi) 1028 + s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi 1029 + s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, get_sgpr_size_bytes() 1030 + s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0 1031 + s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0 1032 + s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF 1033 + s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 scope:SCOPE_SYS 1034 + s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 scope:SCOPE_SYS 1035 + s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 scope:SCOPE_SYS 1036 + s_wait_idle 1037 + 1038 + s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS 1039 + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 1040 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 1041 + 1042 + s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV), s_restore_state_priv // SCC is included, which is changed by previous salu 1043 + 1044 + // Make barrier and LDS state visible to all waves in the group. 1045 + // STATE_PRIV.BARRIER_COMPLETE may change after this point. 1046 + s_barrier_signal -2 1047 + s_barrier_wait -2 1048 + 1049 + s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution 1050 + 1051 + L_END_PGM: 1052 + s_endpgm_saved 1053 + end 1054 + 1055 + function write_hwreg_to_v2(s) 1056 + // Copy into VGPR for later TCP store. 1057 + v_writelane_b32 v2, s, m0 1058 + s_add_u32 m0, m0, 0x1 1059 + end 1060 + 1061 + 1062 + function write_16sgpr_to_v2(s) 1063 + // Copy into VGPR for later TCP store. 1064 + for var sgpr_idx = 0; sgpr_idx < 16; sgpr_idx ++ 1065 + v_writelane_b32 v2, s[sgpr_idx], ttmp13 1066 + s_add_u32 ttmp13, ttmp13, 0x1 1067 + end 1068 + end 1069 + 1070 + function write_12sgpr_to_v2(s) 1071 + // Copy into VGPR for later TCP store. 1072 + for var sgpr_idx = 0; sgpr_idx < 12; sgpr_idx ++ 1073 + v_writelane_b32 v2, s[sgpr_idx], ttmp13 1074 + s_add_u32 ttmp13, ttmp13, 0x1 1075 + end 1076 + end 1077 + 1078 + function read_hwreg_from_mem(s, s_rsrc, s_mem_offset) 1079 + s_buffer_load_dword s, s_rsrc, s_mem_offset scope:SCOPE_SYS 1080 + s_add_u32 s_mem_offset, s_mem_offset, 4 1081 + end 1082 + 1083 + function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset) 1084 + s_sub_u32 s_mem_offset, s_mem_offset, 4*16 1085 + s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset scope:SCOPE_SYS 1086 + end 1087 + 1088 + function read_8sgpr_from_mem(s, s_rsrc, s_mem_offset) 1089 + s_sub_u32 s_mem_offset, s_mem_offset, 4*8 1090 + s_buffer_load_dwordx8 s, s_rsrc, s_mem_offset scope:SCOPE_SYS 1091 + end 1092 + 1093 + function read_4sgpr_from_mem(s, s_rsrc, s_mem_offset) 1094 + s_sub_u32 s_mem_offset, s_mem_offset, 4*4 1095 + s_buffer_load_dwordx4 s, s_rsrc, s_mem_offset scope:SCOPE_SYS 1096 + end 1097 + 1098 + function get_vgpr_size_bytes(s_vgpr_size_byte, s_size) 1099 + s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) 1100 + s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1 1101 + s_bitcmp1_b32 s_size, S_WAVE_SIZE 1102 + s_cbranch_scc1 L_ENABLE_SHIFT_W64 1103 + s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+7) //Number of VGPRs = (vgpr_size + 1) * 4 * 32 * 4 (non-zero value) 1104 + s_branch L_SHIFT_DONE 1105 + L_ENABLE_SHIFT_W64: 1106 + s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value) 1107 + L_SHIFT_DONE: 1108 + end 1109 + 1110 + function get_svgpr_size_bytes(s_svgpr_size_byte) 1111 + s_getreg_b32 s_svgpr_size_byte, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) 1112 + s_lshl_b32 s_svgpr_size_byte, s_svgpr_size_byte, (3+7) 1113 + end 1114 + 1115 + function get_sgpr_size_bytes 1116 + return 512 1117 + end 1118 + 1119 + function get_hwreg_size_bytes 1120 + return 128 1121 + end 1122 + 1123 + function get_wave_size2(s_reg) 1124 + s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE) 1125 + s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE 1126 + end
+17
drivers/gpu/drm/amd/amdkfd/kfd_debug.c
··· 350 350 { 351 351 uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode; 352 352 uint32_t flags = pdd->process->dbg_flags; 353 + struct amdgpu_device *adev = pdd->dev->adev; 354 + int r; 353 355 354 356 if (!kfd_dbg_is_per_vmid_supported(pdd->dev)) 355 357 return 0; 358 + 359 + if (!pdd->proc_ctx_cpu_ptr) { 360 + r = amdgpu_amdkfd_alloc_gtt_mem(adev, 361 + AMDGPU_MES_PROC_CTX_SIZE, 362 + &pdd->proc_ctx_bo, 363 + &pdd->proc_ctx_gpu_addr, 364 + &pdd->proc_ctx_cpu_ptr, 365 + false); 366 + if (r) { 367 + dev_err(adev->dev, 368 + "failed to allocate process context bo\n"); 369 + return r; 370 + } 371 + memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); 372 + } 356 373 357 374 return amdgpu_mes_set_shader_debugger(pdd->dev->adev, pdd->proc_ctx_gpu_addr, spi_dbg_cntl, 358 375 pdd->watch_points, flags, sq_trap_en);
+1 -1
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
··· 32 32 #include <linux/atomic.h> 33 33 #include <linux/workqueue.h> 34 34 #include <linux/spinlock.h> 35 - #include <linux/kfd_ioctl.h> 35 + #include <uapi/linux/kfd_ioctl.h> 36 36 #include <linux/idr.h> 37 37 #include <linux/kfifo.h> 38 38 #include <linux/seq_file.h>
+2 -1
drivers/gpu/drm/amd/amdkfd/kfd_process.c
··· 1160 1160 */ 1161 1161 synchronize_rcu(); 1162 1162 ef = rcu_access_pointer(p->ef); 1163 - dma_fence_signal(ef); 1163 + if (ef) 1164 + dma_fence_signal(ef); 1164 1165 1165 1166 kfd_process_remove_sysfs(p); 1166 1167
+2
drivers/gpu/drm/amd/display/Kconfig
··· 8 8 bool "AMD DC - Enable new display engine" 9 9 default y 10 10 depends on BROKEN || !CC_IS_CLANG || ARM64 || LOONGARCH || RISCV || SPARC64 || X86_64 11 + select CEC_CORE 12 + select CEC_NOTIFIER 11 13 select SND_HDA_COMPONENT if SND_HDA_CORE 12 14 # !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752 13 15 select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && !(CC_IS_CLANG && (ARM64 || LOONGARCH || RISCV))
+123 -50
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
··· 98 98 #include <drm/drm_audio_component.h> 99 99 #include <drm/drm_gem_atomic_helper.h> 100 100 101 + #include <media/cec-notifier.h> 101 102 #include <acpi/video.h> 102 103 103 104 #include "ivsrcid/dcn/irqsrcs_dcn_1_0.h" ··· 2159 2158 amdgpu_dm_crtc_secure_display_create_contexts(adev); 2160 2159 if (!adev->dm.secure_display_ctx.crtc_ctx) 2161 2160 DRM_ERROR("amdgpu: failed to initialize secure display contexts.\n"); 2161 + 2162 + if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(4, 0, 1)) 2163 + adev->dm.secure_display_ctx.support_mul_roi = true; 2164 + 2162 2165 #endif 2163 2166 2164 2167 DRM_DEBUG_DRIVER("KMS initialized.\n"); ··· 2755 2750 mutex_unlock(&mgr->lock); 2756 2751 } 2757 2752 2753 + void hdmi_cec_unset_edid(struct amdgpu_dm_connector *aconnector) 2754 + { 2755 + struct cec_notifier *n = aconnector->notifier; 2756 + 2757 + if (!n) 2758 + return; 2759 + 2760 + cec_notifier_phys_addr_invalidate(n); 2761 + } 2762 + 2763 + void hdmi_cec_set_edid(struct amdgpu_dm_connector *aconnector) 2764 + { 2765 + struct drm_connector *connector = &aconnector->base; 2766 + struct cec_notifier *n = aconnector->notifier; 2767 + 2768 + if (!n) 2769 + return; 2770 + 2771 + cec_notifier_set_phys_addr(n, 2772 + connector->display_info.source_physical_address); 2773 + } 2774 + 2775 + static void s3_handle_hdmi_cec(struct drm_device *ddev, bool suspend) 2776 + { 2777 + struct amdgpu_dm_connector *aconnector; 2778 + struct drm_connector *connector; 2779 + struct drm_connector_list_iter conn_iter; 2780 + 2781 + drm_connector_list_iter_begin(ddev, &conn_iter); 2782 + drm_for_each_connector_iter(connector, &conn_iter) { 2783 + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) 2784 + continue; 2785 + 2786 + aconnector = to_amdgpu_dm_connector(connector); 2787 + if (suspend) 2788 + hdmi_cec_unset_edid(aconnector); 2789 + else 2790 + hdmi_cec_set_edid(aconnector); 2791 + } 2792 + drm_connector_list_iter_end(&conn_iter); 2793 + } 2794 + 2758 2795 static void s3_handle_mst(struct drm_device *dev, bool suspend) 2759 2796 { 2760 2797 struct amdgpu_dm_connector *aconnector; ··· 3067 3020 adev->dm.cached_state = drm_atomic_helper_suspend(adev_to_drm(adev)); 3068 3021 if (IS_ERR(adev->dm.cached_state)) 3069 3022 return PTR_ERR(adev->dm.cached_state); 3023 + 3024 + s3_handle_hdmi_cec(adev_to_drm(adev), true); 3070 3025 3071 3026 s3_handle_mst(adev_to_drm(adev), true); 3072 3027 ··· 3341 3292 * pulse interrupts are used for MST 3342 3293 */ 3343 3294 amdgpu_dm_irq_resume_early(adev); 3295 + 3296 + s3_handle_hdmi_cec(ddev, false); 3344 3297 3345 3298 /* On resume we need to rewrite the MSTM control bits to enable MST*/ 3346 3299 s3_handle_mst(ddev, false); ··· 3658 3607 dc_sink_retain(aconnector->dc_sink); 3659 3608 if (sink->dc_edid.length == 0) { 3660 3609 aconnector->drm_edid = NULL; 3610 + hdmi_cec_unset_edid(aconnector); 3661 3611 if (aconnector->dc_link->aux_mode) { 3662 3612 drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux); 3663 3613 } ··· 3668 3616 aconnector->drm_edid = drm_edid_alloc(edid, sink->dc_edid.length); 3669 3617 drm_edid_connector_update(connector, aconnector->drm_edid); 3670 3618 3619 + hdmi_cec_set_edid(aconnector); 3671 3620 if (aconnector->dc_link->aux_mode) 3672 3621 drm_dp_cec_attach(&aconnector->dm_dp_aux.aux, 3673 3622 connector->display_info.source_physical_address); ··· 3685 3632 amdgpu_dm_update_freesync_caps(connector, aconnector->drm_edid); 3686 3633 update_connector_ext_caps(aconnector); 3687 3634 } else { 3635 + hdmi_cec_unset_edid(aconnector); 3688 3636 drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux); 3689 3637 amdgpu_dm_update_freesync_caps(connector, NULL); 3690 3638 aconnector->num_modes = 0; ··· 7102 7048 if (amdgpu_dm_should_create_sysfs(amdgpu_dm_connector)) 7103 7049 sysfs_remove_group(&connector->kdev->kobj, &amdgpu_group); 7104 7050 7051 + cec_notifier_conn_unregister(amdgpu_dm_connector->notifier); 7105 7052 drm_dp_aux_unregister(&amdgpu_dm_connector->dm_dp_aux.aux); 7106 7053 } 7107 7054 ··· 8339 8284 return i2c; 8340 8285 } 8341 8286 8287 + int amdgpu_dm_initialize_hdmi_connector(struct amdgpu_dm_connector *aconnector) 8288 + { 8289 + struct cec_connector_info conn_info; 8290 + struct drm_device *ddev = aconnector->base.dev; 8291 + struct device *hdmi_dev = ddev->dev; 8292 + 8293 + if (amdgpu_dc_debug_mask & DC_DISABLE_HDMI_CEC) { 8294 + drm_info(ddev, "HDMI-CEC feature masked\n"); 8295 + return -EINVAL; 8296 + } 8297 + 8298 + cec_fill_conn_info_from_drm(&conn_info, &aconnector->base); 8299 + aconnector->notifier = 8300 + cec_notifier_conn_register(hdmi_dev, NULL, &conn_info); 8301 + if (!aconnector->notifier) { 8302 + drm_err(ddev, "Failed to create cec notifier\n"); 8303 + return -ENOMEM; 8304 + } 8305 + 8306 + return 0; 8307 + } 8342 8308 8343 8309 /* 8344 8310 * Note: this function assumes that dc_link_detect() was called for the ··· 8423 8347 drm_connector_attach_encoder( 8424 8348 &aconnector->base, &aencoder->base); 8425 8349 8350 + if (connector_type == DRM_MODE_CONNECTOR_HDMIA || 8351 + connector_type == DRM_MODE_CONNECTOR_HDMIB) 8352 + amdgpu_dm_initialize_hdmi_connector(aconnector); 8353 + 8426 8354 if (connector_type == DRM_MODE_CONNECTOR_DisplayPort 8427 8355 || connector_type == DRM_MODE_CONNECTOR_eDP) 8428 8356 amdgpu_dm_initialize_dp_connector(dm, aconnector, link->link_index); ··· 8486 8406 struct amdgpu_crtc *acrtc, 8487 8407 struct dm_crtc_state *acrtc_state) 8488 8408 { 8489 - /* 8490 - * We have no guarantee that the frontend index maps to the same 8491 - * backend index - some even map to more than one. 8492 - * 8493 - * TODO: Use a different interrupt or check DC itself for the mapping. 8494 - */ 8495 - int irq_type = 8496 - amdgpu_display_crtc_idx_to_irq_type( 8497 - adev, 8498 - acrtc->crtc_id); 8499 8409 struct drm_vblank_crtc_config config = {0}; 8500 8410 struct dc_crtc_timing *timing; 8501 8411 int offdelay; ··· 8511 8441 8512 8442 drm_crtc_vblank_on_config(&acrtc->base, 8513 8443 &config); 8514 - 8515 - amdgpu_irq_get( 8516 - adev, 8517 - &adev->pageflip_irq, 8518 - irq_type); 8519 - #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) 8520 - amdgpu_irq_get( 8521 - adev, 8522 - &adev->vline0_irq, 8523 - irq_type); 8524 - #endif 8525 8444 } else { 8526 - #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) 8527 - amdgpu_irq_put( 8528 - adev, 8529 - &adev->vline0_irq, 8530 - irq_type); 8531 - #endif 8532 - amdgpu_irq_put( 8533 - adev, 8534 - &adev->pageflip_irq, 8535 - irq_type); 8536 8445 drm_crtc_vblank_off(&acrtc->base); 8537 8446 } 8538 8447 } ··· 8982 8933 struct replay_settings *pr = &acrtc_state->stream->link->replay_settings; 8983 8934 struct amdgpu_dm_connector *aconn = 8984 8935 (struct amdgpu_dm_connector *)acrtc_state->stream->dm_stream_context; 8936 + bool vrr_active = amdgpu_dm_crtc_vrr_active(acrtc_state); 8985 8937 8986 8938 if (acrtc_state->update_type > UPDATE_TYPE_FAST) { 8987 8939 if (pr->config.replay_supported && !pr->replay_feature_enabled) ··· 9009 8959 * adequate number of fast atomic commits to notify KMD 9010 8960 * of update events. See `vblank_control_worker()`. 9011 8961 */ 9012 - if (acrtc_attach->dm_irq_params.allow_sr_entry && 8962 + if (!vrr_active && 8963 + acrtc_attach->dm_irq_params.allow_sr_entry && 9013 8964 #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY 9014 8965 !amdgpu_dm_crc_window_is_activated(acrtc_state->base.crtc) && 9015 8966 #endif 9016 8967 (current_ts - psr->psr_dirty_rects_change_timestamp_ns) > 500000000) { 9017 8968 if (pr->replay_feature_enabled && !pr->replay_allow_active) 9018 8969 amdgpu_dm_replay_enable(acrtc_state->stream, true); 9019 - if (psr->psr_version >= DC_PSR_VERSION_SU_1 && 8970 + if (psr->psr_version == DC_PSR_VERSION_SU_1 && 9020 8971 !psr->psr_allow_active && !aconn->disallow_edp_enter_psr) 9021 8972 amdgpu_dm_psr_enable(acrtc_state->stream); 9022 8973 } ··· 9188 9137 acrtc_state->stream->link->psr_settings.psr_dirty_rects_change_timestamp_ns = 9189 9138 timestamp_ns; 9190 9139 if (acrtc_state->stream->link->psr_settings.psr_allow_active) 9191 - amdgpu_dm_psr_disable(acrtc_state->stream); 9140 + amdgpu_dm_psr_disable(acrtc_state->stream, true); 9192 9141 mutex_unlock(&dm->dc_lock); 9193 9142 } 9194 9143 } ··· 9354 9303 bundle->stream_update.abm_level = &acrtc_state->abm_level; 9355 9304 9356 9305 mutex_lock(&dm->dc_lock); 9357 - if (acrtc_state->update_type > UPDATE_TYPE_FAST) { 9306 + if ((acrtc_state->update_type > UPDATE_TYPE_FAST) || vrr_active) { 9358 9307 if (acrtc_state->stream->link->replay_settings.replay_allow_active) 9359 9308 amdgpu_dm_replay_disable(acrtc_state->stream); 9360 9309 if (acrtc_state->stream->link->psr_settings.psr_allow_active) 9361 - amdgpu_dm_psr_disable(acrtc_state->stream); 9310 + amdgpu_dm_psr_disable(acrtc_state->stream, true); 9362 9311 } 9363 9312 mutex_unlock(&dm->dc_lock); 9364 9313 ··· 10117 10066 if (amdgpu_dm_is_valid_crc_source(cur_crc_src)) { 10118 10067 #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) 10119 10068 if (amdgpu_dm_crc_window_is_activated(crtc)) { 10069 + uint8_t cnt; 10120 10070 spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); 10121 - acrtc->dm_irq_params.window_param.update_win = true; 10071 + for (cnt = 0; cnt < MAX_CRC_WINDOW_NUM; cnt++) { 10072 + if (acrtc->dm_irq_params.window_param[cnt].enable) { 10073 + acrtc->dm_irq_params.window_param[cnt].update_win = true; 10122 10074 10123 - /** 10124 - * It takes 2 frames for HW to stably generate CRC when 10125 - * resuming from suspend, so we set skip_frame_cnt 2. 10126 - */ 10127 - acrtc->dm_irq_params.window_param.skip_frame_cnt = 2; 10075 + /** 10076 + * It takes 2 frames for HW to stably generate CRC when 10077 + * resuming from suspend, so we set skip_frame_cnt 2. 10078 + */ 10079 + acrtc->dm_irq_params.window_param[cnt].skip_frame_cnt = 2; 10080 + } 10081 + } 10128 10082 spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags); 10129 10083 } 10130 10084 #endif ··· 11217 11161 int plane_src_w, plane_src_h; 11218 11162 11219 11163 dm_get_oriented_plane_size(plane_state, &plane_src_w, &plane_src_h); 11220 - *out_plane_scale_w = plane_state->crtc_w * 1000 / plane_src_w; 11221 - *out_plane_scale_h = plane_state->crtc_h * 1000 / plane_src_h; 11164 + *out_plane_scale_w = plane_src_w ? plane_state->crtc_w * 1000 / plane_src_w : 0; 11165 + *out_plane_scale_h = plane_src_h ? plane_state->crtc_h * 1000 / plane_src_h : 0; 11222 11166 } 11223 11167 11224 11168 /* ··· 11472 11416 return 0; 11473 11417 } 11474 11418 11419 + static bool amdgpu_dm_crtc_mem_type_changed(struct drm_device *dev, 11420 + struct drm_atomic_state *state, 11421 + struct drm_crtc_state *crtc_state) 11422 + { 11423 + struct drm_plane *plane; 11424 + struct drm_plane_state *new_plane_state, *old_plane_state; 11425 + 11426 + drm_for_each_plane_mask(plane, dev, crtc_state->plane_mask) { 11427 + new_plane_state = drm_atomic_get_plane_state(state, plane); 11428 + old_plane_state = drm_atomic_get_plane_state(state, plane); 11429 + 11430 + if (old_plane_state->fb && new_plane_state->fb && 11431 + get_mem_type(old_plane_state->fb) != get_mem_type(new_plane_state->fb)) 11432 + return true; 11433 + } 11434 + 11435 + return false; 11436 + } 11437 + 11475 11438 /** 11476 11439 * amdgpu_dm_atomic_check() - Atomic check implementation for AMDgpu DM. 11477 11440 * ··· 11688 11613 11689 11614 /* Remove exiting planes if they are modified */ 11690 11615 for_each_oldnew_plane_in_descending_zpos(state, plane, old_plane_state, new_plane_state) { 11691 - if (old_plane_state->fb && new_plane_state->fb && 11692 - get_mem_type(old_plane_state->fb) != 11693 - get_mem_type(new_plane_state->fb)) 11694 - lock_and_validation_needed = true; 11695 11616 11696 11617 ret = dm_update_plane_state(dc, state, plane, 11697 11618 old_plane_state, ··· 11982 11911 11983 11912 /* 11984 11913 * Only allow async flips for fast updates that don't change 11985 - * the FB pitch, the DCC state, rotation, etc. 11914 + * the FB pitch, the DCC state, rotation, mem_type, etc. 11986 11915 */ 11987 - if (new_crtc_state->async_flip && lock_and_validation_needed) { 11916 + if (new_crtc_state->async_flip && 11917 + (lock_and_validation_needed || 11918 + amdgpu_dm_crtc_mem_type_changed(dev, state, new_crtc_state))) { 11988 11919 drm_dbg_atomic(crtc->dev, 11989 11920 "[CRTC:%d:%s] async flips are only supported for fast updates\n", 11990 11921 crtc->base.id, crtc->name);
+8
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
··· 671 671 uint32_t connector_id; 672 672 int bl_idx; 673 673 674 + struct cec_notifier *notifier; 675 + 674 676 /* we need to mind the EDID between detect 675 677 and get modes due to analog/digital/tvencoder */ 676 678 const struct drm_edid *drm_edid; ··· 699 697 struct drm_dp_mst_port *mst_output_port; 700 698 struct amdgpu_dm_connector *mst_root; 701 699 struct drm_dp_aux *dsc_aux; 700 + uint32_t mst_local_bw; 701 + uint16_t vc_full_pbn; 702 702 struct mutex handle_mst_msg_ready; 703 703 704 704 /* TODO see if we can merge with ddc_bus or make a dm_connector */ ··· 1013 1009 void *addr); 1014 1010 1015 1011 bool amdgpu_dm_is_headless(struct amdgpu_device *adev); 1012 + 1013 + void hdmi_cec_set_edid(struct amdgpu_dm_connector *aconnector); 1014 + void hdmi_cec_unset_edid(struct amdgpu_dm_connector *aconnector); 1015 + int amdgpu_dm_initialize_hdmi_connector(struct amdgpu_dm_connector *aconnector); 1016 1016 1017 1017 #endif /* __AMDGPU_DM_H__ */
+204 -67
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
··· 30 30 #include "amdgpu_dm.h" 31 31 #include "dc.h" 32 32 #include "amdgpu_securedisplay.h" 33 + #include "amdgpu_dm_psr.h" 33 34 34 35 static const char *const pipe_crc_sources[] = { 35 36 "none", ··· 296 295 struct drm_device *drm_dev = crtc->dev; 297 296 struct amdgpu_display_manager *dm = &drm_to_adev(drm_dev)->dm; 298 297 struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); 299 - bool was_activated; 300 298 struct amdgpu_dm_connector *aconnector; 299 + bool was_activated; 301 300 uint8_t phy_id; 301 + unsigned long flags; 302 + int i; 302 303 303 - spin_lock_irq(&drm_dev->event_lock); 304 - was_activated = acrtc->dm_irq_params.window_param.activated; 305 - acrtc->dm_irq_params.window_param.x_start = 0; 306 - acrtc->dm_irq_params.window_param.y_start = 0; 307 - acrtc->dm_irq_params.window_param.x_end = 0; 308 - acrtc->dm_irq_params.window_param.y_end = 0; 309 - acrtc->dm_irq_params.window_param.activated = false; 310 - acrtc->dm_irq_params.window_param.update_win = false; 311 - acrtc->dm_irq_params.window_param.skip_frame_cnt = 0; 312 - spin_unlock_irq(&drm_dev->event_lock); 304 + spin_lock_irqsave(&drm_dev->event_lock, flags); 305 + was_activated = acrtc->dm_irq_params.crc_window_activated; 306 + for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) { 307 + acrtc->dm_irq_params.window_param[i].x_start = 0; 308 + acrtc->dm_irq_params.window_param[i].y_start = 0; 309 + acrtc->dm_irq_params.window_param[i].x_end = 0; 310 + acrtc->dm_irq_params.window_param[i].y_end = 0; 311 + acrtc->dm_irq_params.window_param[i].enable = false; 312 + acrtc->dm_irq_params.window_param[i].update_win = false; 313 + acrtc->dm_irq_params.window_param[i].skip_frame_cnt = 0; 314 + } 315 + acrtc->dm_irq_params.crc_window_activated = false; 316 + spin_unlock_irqrestore(&drm_dev->event_lock, flags); 313 317 314 318 /* Disable secure_display if it was enabled */ 315 - if (was_activated) { 319 + if (was_activated && dm->secure_display_ctx.op_mode == LEGACY_MODE) { 316 320 /* stop ROI update on this crtc */ 317 321 flush_work(&dm->secure_display_ctx.crtc_ctx[crtc->index].notify_ta_work); 318 322 flush_work(&dm->secure_display_ctx.crtc_ctx[crtc->index].forward_roi_work); 319 - 320 323 aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; 321 324 322 - if (aconnector && get_phy_id(dm, aconnector, &phy_id)) 323 - dc_stream_forward_crc_window(stream, NULL, phy_id, true); 324 - else 325 + if (aconnector && get_phy_id(dm, aconnector, &phy_id)) { 326 + if (dm->secure_display_ctx.support_mul_roi) 327 + dc_stream_forward_multiple_crc_window(stream, NULL, phy_id, true); 328 + else 329 + dc_stream_forward_crc_window(stream, NULL, phy_id, true); 330 + } else { 325 331 DRM_DEBUG_DRIVER("%s Can't find matching phy id", __func__); 332 + } 326 333 } 327 334 } 328 335 ··· 344 335 struct amdgpu_dm_connector *aconnector; 345 336 uint8_t phy_inst; 346 337 struct amdgpu_display_manager *dm; 338 + struct crc_data crc_cpy[MAX_CRC_WINDOW_NUM]; 339 + unsigned long flags; 340 + uint8_t roi_idx = 0; 347 341 int ret; 342 + int i; 348 343 349 344 crtc_ctx = container_of(work, struct secure_display_crtc_context, notify_ta_work); 350 345 crtc = crtc_ctx->crtc; ··· 377 364 } 378 365 mutex_unlock(&crtc->dev->mode_config.mutex); 379 366 367 + spin_lock_irqsave(&crtc->dev->event_lock, flags); 368 + memcpy(crc_cpy, crtc_ctx->crc_info.crc, sizeof(struct crc_data) * MAX_CRC_WINDOW_NUM); 369 + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); 370 + 380 371 /* need lock for multiple crtcs to use the command buffer */ 381 372 mutex_lock(&psp->securedisplay_context.mutex); 382 - 383 - psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd, 384 - TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC); 385 - 386 - securedisplay_cmd->securedisplay_in_message.send_roi_crc.phy_id = phy_inst; 387 - 388 373 /* PSP TA is expected to finish data transmission over I2C within current frame, 389 374 * even there are up to 4 crtcs request to send in this frame. 390 375 */ 391 - ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC); 376 + if (dm->secure_display_ctx.support_mul_roi) { 377 + psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd, 378 + TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2); 379 + 380 + securedisplay_cmd->securedisplay_in_message.send_roi_crc_v2.phy_id = phy_inst; 381 + 382 + for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) { 383 + if (crc_cpy[i].crc_ready) 384 + roi_idx |= 1 << i; 385 + } 386 + securedisplay_cmd->securedisplay_in_message.send_roi_crc_v2.roi_idx = roi_idx; 387 + 388 + ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2); 389 + } else { 390 + psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd, 391 + TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC); 392 + 393 + securedisplay_cmd->securedisplay_in_message.send_roi_crc.phy_id = phy_inst; 394 + 395 + ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC); 396 + } 392 397 393 398 if (!ret) { 394 399 if (securedisplay_cmd->status != TA_SECUREDISPLAY_STATUS__SUCCESS) ··· 424 393 struct drm_crtc *crtc; 425 394 struct dc_stream_state *stream; 426 395 struct amdgpu_dm_connector *aconnector; 396 + struct crc_window roi_cpy[MAX_CRC_WINDOW_NUM]; 397 + unsigned long flags; 427 398 uint8_t phy_id; 428 399 429 400 crtc_ctx = container_of(work, struct secure_display_crtc_context, forward_roi_work); ··· 449 416 } 450 417 mutex_unlock(&crtc->dev->mode_config.mutex); 451 418 419 + spin_lock_irqsave(&crtc->dev->event_lock, flags); 420 + memcpy(roi_cpy, crtc_ctx->roi, sizeof(struct crc_window) * MAX_CRC_WINDOW_NUM); 421 + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); 422 + 452 423 mutex_lock(&dm->dc_lock); 453 - dc_stream_forward_crc_window(stream, &crtc_ctx->rect, 454 - phy_id, false); 424 + if (dm->secure_display_ctx.support_mul_roi) 425 + dc_stream_forward_multiple_crc_window(stream, roi_cpy, 426 + phy_id, false); 427 + else 428 + dc_stream_forward_crc_window(stream, &roi_cpy[0].rect, 429 + phy_id, false); 455 430 mutex_unlock(&dm->dc_lock); 456 431 } 457 432 ··· 470 429 bool ret = false; 471 430 472 431 spin_lock_irq(&drm_dev->event_lock); 473 - ret = acrtc->dm_irq_params.window_param.activated; 432 + ret = acrtc->dm_irq_params.crc_window_activated; 474 433 spin_unlock_irq(&drm_dev->event_lock); 475 434 476 435 return ret; ··· 508 467 509 468 mutex_lock(&adev->dm.dc_lock); 510 469 470 + /* For PSR1, check that the panel has exited PSR */ 471 + if (stream_state->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1) 472 + amdgpu_dm_psr_wait_disable(stream_state); 473 + 511 474 /* Enable or disable CRTC CRC generation */ 512 475 if (dm_is_crc_source_crtc(source) || source == AMDGPU_DM_PIPE_CRC_SOURCE_NONE) { 513 476 if (!dc_stream_configure_crc(stream_state->ctx->dc, 514 - stream_state, NULL, enable, enable)) { 477 + stream_state, NULL, enable, enable, 0, true)) { 515 478 ret = -EINVAL; 516 479 goto unlock; 517 480 } ··· 649 604 650 605 } 651 606 607 + /* 608 + * Reading the CRC requires the vblank interrupt handler to be 609 + * enabled. Keep a reference until CRC capture stops. 610 + */ 611 + enabled = amdgpu_dm_is_valid_crc_source(cur_crc_src); 612 + if (!enabled && enable) { 613 + ret = drm_crtc_vblank_get(crtc); 614 + if (ret) 615 + goto cleanup; 616 + } 617 + 652 618 #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) 653 619 /* Reset secure_display when we change crc source from debugfs */ 654 620 amdgpu_dm_set_crc_window_default(crtc, crtc_state->stream); ··· 670 614 goto cleanup; 671 615 } 672 616 673 - /* 674 - * Reading the CRC requires the vblank interrupt handler to be 675 - * enabled. Keep a reference until CRC capture stops. 676 - */ 677 - enabled = amdgpu_dm_is_valid_crc_source(cur_crc_src); 678 617 if (!enabled && enable) { 679 - ret = drm_crtc_vblank_get(crtc); 680 - if (ret) 681 - goto cleanup; 682 - 683 618 if (dm_is_crc_source_dprx(source)) { 684 619 if (drm_dp_start_crc(aux, crtc)) { 685 620 DRM_DEBUG_DRIVER("dp start crc failed\n"); ··· 698 651 699 652 #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) 700 653 /* Initialize phy id mapping table for secure display*/ 701 - if (!dm->secure_display_ctx.phy_mapping_updated) 654 + if (dm->secure_display_ctx.op_mode == LEGACY_MODE && 655 + !dm->secure_display_ctx.phy_mapping_updated) 702 656 update_phy_id_mapping(adev); 703 657 #endif 704 658 ··· 757 709 } 758 710 759 711 if (dm_is_crc_source_crtc(cur_crc_src)) { 760 - if (!dc_stream_get_crc(stream_state->ctx->dc, stream_state, 712 + if (!dc_stream_get_crc(stream_state->ctx->dc, stream_state, 0, 761 713 &crcs[0], &crcs[1], &crcs[2])) 762 714 return; 763 715 ··· 774 726 struct amdgpu_crtc *acrtc = NULL; 775 727 struct amdgpu_device *adev = NULL; 776 728 struct secure_display_crtc_context *crtc_ctx = NULL; 729 + bool reset_crc_frame_count[MAX_CRC_WINDOW_NUM] = {false}; 730 + uint32_t crc_r[MAX_CRC_WINDOW_NUM] = {0}; 731 + uint32_t crc_g[MAX_CRC_WINDOW_NUM] = {0}; 732 + uint32_t crc_b[MAX_CRC_WINDOW_NUM] = {0}; 777 733 unsigned long flags1; 734 + bool forward_roi_change = false; 735 + bool notify_ta = false; 736 + bool all_crc_ready = true; 737 + struct dc_stream_state *stream_state; 738 + int i; 778 739 779 740 if (crtc == NULL) 780 741 return; ··· 791 734 acrtc = to_amdgpu_crtc(crtc); 792 735 adev = drm_to_adev(crtc->dev); 793 736 drm_dev = crtc->dev; 737 + stream_state = to_dm_crtc_state(crtc->state)->stream; 794 738 795 739 spin_lock_irqsave(&drm_dev->event_lock, flags1); 796 740 cur_crc_src = acrtc->dm_irq_params.crc_src; 797 741 798 742 /* Early return if CRC capture is not enabled. */ 799 743 if (!amdgpu_dm_is_valid_crc_source(cur_crc_src) || 800 - !dm_is_crc_source_crtc(cur_crc_src)) 801 - goto cleanup; 744 + !dm_is_crc_source_crtc(cur_crc_src)) { 745 + spin_unlock_irqrestore(&drm_dev->event_lock, flags1); 746 + return; 747 + } 802 748 803 - if (!acrtc->dm_irq_params.window_param.activated) 804 - goto cleanup; 805 - 806 - if (acrtc->dm_irq_params.window_param.skip_frame_cnt) { 807 - acrtc->dm_irq_params.window_param.skip_frame_cnt -= 1; 808 - goto cleanup; 749 + if (!acrtc->dm_irq_params.crc_window_activated) { 750 + spin_unlock_irqrestore(&drm_dev->event_lock, flags1); 751 + return; 809 752 } 810 753 811 754 crtc_ctx = &adev->dm.secure_display_ctx.crtc_ctx[acrtc->crtc_id]; ··· 816 759 crtc_ctx->crtc = crtc; 817 760 } 818 761 819 - if (acrtc->dm_irq_params.window_param.update_win) { 820 - /* prepare work for dmub to update ROI */ 821 - crtc_ctx->rect.x = acrtc->dm_irq_params.window_param.x_start; 822 - crtc_ctx->rect.y = acrtc->dm_irq_params.window_param.y_start; 823 - crtc_ctx->rect.width = acrtc->dm_irq_params.window_param.x_end - 824 - acrtc->dm_irq_params.window_param.x_start; 825 - crtc_ctx->rect.height = acrtc->dm_irq_params.window_param.y_end - 826 - acrtc->dm_irq_params.window_param.y_start; 827 - schedule_work(&crtc_ctx->forward_roi_work); 762 + for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) { 763 + struct crc_params crc_window = { 764 + .windowa_x_start = acrtc->dm_irq_params.window_param[i].x_start, 765 + .windowa_y_start = acrtc->dm_irq_params.window_param[i].y_start, 766 + .windowa_x_end = acrtc->dm_irq_params.window_param[i].x_end, 767 + .windowa_y_end = acrtc->dm_irq_params.window_param[i].y_end, 768 + .windowb_x_start = acrtc->dm_irq_params.window_param[i].x_start, 769 + .windowb_y_start = acrtc->dm_irq_params.window_param[i].y_start, 770 + .windowb_x_end = acrtc->dm_irq_params.window_param[i].x_end, 771 + .windowb_y_end = acrtc->dm_irq_params.window_param[i].y_end, 772 + }; 828 773 829 - acrtc->dm_irq_params.window_param.update_win = false; 774 + crtc_ctx->roi[i].enable = acrtc->dm_irq_params.window_param[i].enable; 830 775 831 - /* Statically skip 1 frame, because we may need to wait below things 832 - * before sending ROI to dmub: 833 - * 1. We defer the work by using system workqueue. 834 - * 2. We may need to wait for dc_lock before accessing dmub. 835 - */ 836 - acrtc->dm_irq_params.window_param.skip_frame_cnt = 1; 776 + if (!acrtc->dm_irq_params.window_param[i].enable) { 777 + crtc_ctx->crc_info.crc[i].crc_ready = false; 778 + continue; 779 + } 837 780 838 - } else { 839 - /* prepare work for psp to read ROI/CRC and send to I2C */ 840 - schedule_work(&crtc_ctx->notify_ta_work); 781 + if (acrtc->dm_irq_params.window_param[i].skip_frame_cnt) { 782 + acrtc->dm_irq_params.window_param[i].skip_frame_cnt -= 1; 783 + crtc_ctx->crc_info.crc[i].crc_ready = false; 784 + continue; 785 + } 786 + 787 + if (acrtc->dm_irq_params.window_param[i].update_win) { 788 + crtc_ctx->roi[i].rect.x = crc_window.windowa_x_start; 789 + crtc_ctx->roi[i].rect.y = crc_window.windowa_y_start; 790 + crtc_ctx->roi[i].rect.width = crc_window.windowa_x_end - 791 + crc_window.windowa_x_start; 792 + crtc_ctx->roi[i].rect.height = crc_window.windowa_y_end - 793 + crc_window.windowa_y_start; 794 + 795 + if (adev->dm.secure_display_ctx.op_mode == LEGACY_MODE) 796 + /* forward task to dmub to update ROI */ 797 + forward_roi_change = true; 798 + else if (adev->dm.secure_display_ctx.op_mode == DISPLAY_CRC_MODE) 799 + /* update ROI via dm*/ 800 + dc_stream_configure_crc(stream_state->ctx->dc, stream_state, 801 + &crc_window, true, true, i, false); 802 + 803 + reset_crc_frame_count[i] = true; 804 + 805 + acrtc->dm_irq_params.window_param[i].update_win = false; 806 + 807 + /* Statically skip 1 frame, because we may need to wait below things 808 + * before sending ROI to dmub: 809 + * 1. We defer the work by using system workqueue. 810 + * 2. We may need to wait for dc_lock before accessing dmub. 811 + */ 812 + acrtc->dm_irq_params.window_param[i].skip_frame_cnt = 1; 813 + crtc_ctx->crc_info.crc[i].crc_ready = false; 814 + } else { 815 + if (!dc_stream_get_crc(stream_state->ctx->dc, stream_state, i, 816 + &crc_r[i], &crc_g[i], &crc_b[i])) 817 + DRM_ERROR("Secure Display: fail to get crc from engine %d\n", i); 818 + 819 + if (adev->dm.secure_display_ctx.op_mode == LEGACY_MODE) 820 + /* forward task to psp to read ROI/CRC and output via I2C */ 821 + notify_ta = true; 822 + else if (adev->dm.secure_display_ctx.op_mode == DISPLAY_CRC_MODE) 823 + /* Avoid ROI window get changed, keep overwriting. */ 824 + dc_stream_configure_crc(stream_state->ctx->dc, stream_state, 825 + &crc_window, true, true, i, false); 826 + 827 + /* crc ready for psp to read out */ 828 + crtc_ctx->crc_info.crc[i].crc_ready = true; 829 + } 841 830 } 842 831 843 - cleanup: 844 832 spin_unlock_irqrestore(&drm_dev->event_lock, flags1); 833 + 834 + if (forward_roi_change) 835 + schedule_work(&crtc_ctx->forward_roi_work); 836 + 837 + if (notify_ta) 838 + schedule_work(&crtc_ctx->notify_ta_work); 839 + 840 + spin_lock_irqsave(&crtc_ctx->crc_info.lock, flags1); 841 + for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) { 842 + crtc_ctx->crc_info.crc[i].crc_R = crc_r[i]; 843 + crtc_ctx->crc_info.crc[i].crc_G = crc_g[i]; 844 + crtc_ctx->crc_info.crc[i].crc_B = crc_b[i]; 845 + 846 + if (!crtc_ctx->roi[i].enable) { 847 + crtc_ctx->crc_info.crc[i].frame_count = 0; 848 + continue; 849 + } 850 + 851 + if (!crtc_ctx->crc_info.crc[i].crc_ready) 852 + all_crc_ready = false; 853 + 854 + if (reset_crc_frame_count[i] || crtc_ctx->crc_info.crc[i].frame_count == UINT_MAX) 855 + /* Reset the reference frame count after user update the ROI 856 + * or it reaches the maximum value. 857 + */ 858 + crtc_ctx->crc_info.crc[i].frame_count = 0; 859 + else 860 + crtc_ctx->crc_info.crc[i].frame_count += 1; 861 + } 862 + spin_unlock_irqrestore(&crtc_ctx->crc_info.lock, flags1); 863 + 864 + if (all_crc_ready) 865 + complete_all(&crtc_ctx->crc_info.completion); 845 866 } 846 867 847 868 void amdgpu_dm_crtc_secure_display_create_contexts(struct amdgpu_device *adev) ··· 940 805 INIT_WORK(&crtc_ctx[i].forward_roi_work, amdgpu_dm_forward_crc_window); 941 806 INIT_WORK(&crtc_ctx[i].notify_ta_work, amdgpu_dm_crtc_notify_ta_to_read); 942 807 crtc_ctx[i].crtc = &adev->mode_info.crtcs[i]->base; 808 + spin_lock_init(&crtc_ctx[i].crc_info.lock); 943 809 } 944 810 945 811 adev->dm.secure_display_ctx.crtc_ctx = crtc_ctx; 946 - return; 812 + 813 + adev->dm.secure_display_ctx.op_mode = DISPLAY_CRC_MODE; 947 814 } 948 815 #endif
+29 -3
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h
··· 42 42 #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY 43 43 #define MAX_CRTC 6 44 44 45 + enum secure_display_mode { 46 + /* via dmub + psp */ 47 + LEGACY_MODE = 0, 48 + /* driver directly */ 49 + DISPLAY_CRC_MODE, 50 + SECURE_DISPLAY_MODE_MAX, 51 + }; 52 + 45 53 struct phy_id_mapping { 46 54 bool assigned; 47 55 bool is_mst; ··· 59 51 u8 rad[8]; 60 52 }; 61 53 54 + struct crc_data { 55 + uint32_t crc_R; 56 + uint32_t crc_G; 57 + uint32_t crc_B; 58 + uint32_t frame_count; 59 + bool crc_ready; 60 + }; 61 + 62 + struct crc_info { 63 + struct crc_data crc[MAX_CRC_WINDOW_NUM]; 64 + struct completion completion; 65 + spinlock_t lock; 66 + }; 67 + 62 68 struct crc_window_param { 63 69 uint16_t x_start; 64 70 uint16_t y_start; 65 71 uint16_t x_end; 66 72 uint16_t y_end; 67 73 /* CRC window is activated or not*/ 68 - bool activated; 74 + bool enable; 69 75 /* Update crc window during vertical blank or not */ 70 76 bool update_win; 71 77 /* skip reading/writing for few frames */ ··· 96 74 struct drm_crtc *crtc; 97 75 98 76 /* Region of Interest (ROI) */ 99 - struct rect rect; 77 + struct crc_window roi[MAX_CRC_WINDOW_NUM]; 78 + 79 + struct crc_info crc_info; 100 80 }; 101 81 102 82 struct secure_display_context { 103 83 104 84 struct secure_display_crtc_context *crtc_ctx; 105 - 85 + /* Whether dmub support multiple ROI setting */ 86 + bool support_mul_roi; 87 + enum secure_display_mode op_mode; 106 88 bool phy_mapping_updated; 107 89 int phy_id_mapping_cnt; 108 90 struct phy_id_mapping phy_id_mapping[MAX_CRTC];
+2 -2
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
··· 93 93 return rc; 94 94 } 95 95 96 - bool amdgpu_dm_crtc_vrr_active(struct dm_crtc_state *dm_state) 96 + bool amdgpu_dm_crtc_vrr_active(const struct dm_crtc_state *dm_state) 97 97 { 98 98 return dm_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE || 99 99 dm_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED; ··· 142 142 amdgpu_dm_replay_enable(vblank_work->stream, true); 143 143 } else if (vblank_enabled) { 144 144 if (link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 && is_sr_active) 145 - amdgpu_dm_psr_disable(vblank_work->stream); 145 + amdgpu_dm_psr_disable(vblank_work->stream, false); 146 146 } else if (link->psr_settings.psr_feature_enabled && 147 147 allow_sr_entry && !is_sr_active && !is_crc_window_active) { 148 148
+1 -1
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h
··· 37 37 38 38 bool amdgpu_dm_crtc_vrr_active_irq(struct amdgpu_crtc *acrtc); 39 39 40 - bool amdgpu_dm_crtc_vrr_active(struct dm_crtc_state *dm_state); 40 + bool amdgpu_dm_crtc_vrr_active(const struct dm_crtc_state *dm_state); 41 41 42 42 int amdgpu_dm_crtc_enable_vblank(struct drm_crtc *crtc); 43 43
+82 -17
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
··· 25 25 26 26 #include <linux/string_helpers.h> 27 27 #include <linux/uaccess.h> 28 + #include <media/cec-notifier.h> 28 29 29 30 #include "dc.h" 30 31 #include "amdgpu.h" ··· 2849 2848 return 0; 2850 2849 } 2851 2850 2851 + /** 2852 + * hdmi_cec_state_show - Read out the HDMI-CEC feature status 2853 + * @m: sequence file. 2854 + * @data: unused. 2855 + * 2856 + * Return 0 on success 2857 + */ 2858 + static int hdmi_cec_state_show(struct seq_file *m, void *data) 2859 + { 2860 + struct drm_connector *connector = m->private; 2861 + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); 2862 + 2863 + seq_printf(m, "%s:%d\n", connector->name, connector->base.id); 2864 + seq_printf(m, "HDMI-CEC status: %d\n", aconnector->notifier ? 1 : 0); 2865 + 2866 + return 0; 2867 + } 2868 + 2869 + /** 2870 + * hdmi_cec_state_write - Enable/Disable HDMI-CEC feature from driver side 2871 + * @f: file structure. 2872 + * @buf: userspace buffer. set to '1' to enable; '0' to disable cec feature. 2873 + * @size: size of buffer from userpsace. 2874 + * @pos: unused. 2875 + * 2876 + * Return size on success, error code on failure 2877 + */ 2878 + static ssize_t hdmi_cec_state_write(struct file *f, const char __user *buf, 2879 + size_t size, loff_t *pos) 2880 + { 2881 + int ret; 2882 + bool enable; 2883 + struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private; 2884 + struct drm_device *ddev = aconnector->base.dev; 2885 + 2886 + if (size == 0) 2887 + return -EINVAL; 2888 + 2889 + ret = kstrtobool_from_user(buf, size, &enable); 2890 + if (ret) { 2891 + drm_dbg_driver(ddev, "invalid user data !\n"); 2892 + return ret; 2893 + } 2894 + 2895 + if (enable) { 2896 + if (aconnector->notifier) 2897 + return -EINVAL; 2898 + ret = amdgpu_dm_initialize_hdmi_connector(aconnector); 2899 + if (ret) 2900 + return ret; 2901 + hdmi_cec_set_edid(aconnector); 2902 + } else { 2903 + if (!aconnector->notifier) 2904 + return -EINVAL; 2905 + cec_notifier_conn_unregister(aconnector->notifier); 2906 + aconnector->notifier = NULL; 2907 + } 2908 + 2909 + return size; 2910 + } 2911 + 2852 2912 DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support); 2853 2913 DEFINE_SHOW_ATTRIBUTE(dmub_fw_state); 2854 2914 DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer); ··· 2922 2860 DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector); 2923 2861 DEFINE_SHOW_ATTRIBUTE(dp_mst_progress_status); 2924 2862 DEFINE_SHOW_ATTRIBUTE(is_dpia_link); 2863 + DEFINE_SHOW_STORE_ATTRIBUTE(hdmi_cec_state); 2925 2864 2926 2865 static const struct file_operations dp_dsc_clock_en_debugfs_fops = { 2927 2866 .owner = THIS_MODULE, ··· 3058 2995 char *name; 3059 2996 const struct file_operations *fops; 3060 2997 } hdmi_debugfs_entries[] = { 3061 - {"hdcp_sink_capability", &hdcp_sink_capability_fops} 2998 + {"hdcp_sink_capability", &hdcp_sink_capability_fops}, 2999 + {"hdmi_cec_state", &hdmi_cec_state_fops} 3062 3000 }; 3063 3001 3064 3002 /* ··· 3544 3480 struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); 3545 3481 3546 3482 spin_lock_irq(&drm_dev->event_lock); 3547 - acrtc->dm_irq_params.window_param.x_start = (uint16_t) val; 3548 - acrtc->dm_irq_params.window_param.update_win = false; 3483 + acrtc->dm_irq_params.window_param[0].x_start = (uint16_t) val; 3484 + acrtc->dm_irq_params.window_param[0].update_win = false; 3549 3485 spin_unlock_irq(&drm_dev->event_lock); 3550 3486 3551 3487 return 0; ··· 3561 3497 struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); 3562 3498 3563 3499 spin_lock_irq(&drm_dev->event_lock); 3564 - *val = acrtc->dm_irq_params.window_param.x_start; 3500 + *val = acrtc->dm_irq_params.window_param[0].x_start; 3565 3501 spin_unlock_irq(&drm_dev->event_lock); 3566 3502 3567 3503 return 0; ··· 3581 3517 struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); 3582 3518 3583 3519 spin_lock_irq(&drm_dev->event_lock); 3584 - acrtc->dm_irq_params.window_param.y_start = (uint16_t) val; 3585 - acrtc->dm_irq_params.window_param.update_win = false; 3520 + acrtc->dm_irq_params.window_param[0].y_start = (uint16_t) val; 3521 + acrtc->dm_irq_params.window_param[0].update_win = false; 3586 3522 spin_unlock_irq(&drm_dev->event_lock); 3587 3523 3588 3524 return 0; ··· 3598 3534 struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); 3599 3535 3600 3536 spin_lock_irq(&drm_dev->event_lock); 3601 - *val = acrtc->dm_irq_params.window_param.y_start; 3537 + *val = acrtc->dm_irq_params.window_param[0].y_start; 3602 3538 spin_unlock_irq(&drm_dev->event_lock); 3603 3539 3604 3540 return 0; ··· 3617 3553 struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); 3618 3554 3619 3555 spin_lock_irq(&drm_dev->event_lock); 3620 - acrtc->dm_irq_params.window_param.x_end = (uint16_t) val; 3621 - acrtc->dm_irq_params.window_param.update_win = false; 3556 + acrtc->dm_irq_params.window_param[0].x_end = (uint16_t) val; 3557 + acrtc->dm_irq_params.window_param[0].update_win = false; 3622 3558 spin_unlock_irq(&drm_dev->event_lock); 3623 3559 3624 3560 return 0; ··· 3634 3570 struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); 3635 3571 3636 3572 spin_lock_irq(&drm_dev->event_lock); 3637 - *val = acrtc->dm_irq_params.window_param.x_end; 3573 + *val = acrtc->dm_irq_params.window_param[0].x_end; 3638 3574 spin_unlock_irq(&drm_dev->event_lock); 3639 3575 3640 3576 return 0; ··· 3653 3589 struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); 3654 3590 3655 3591 spin_lock_irq(&drm_dev->event_lock); 3656 - acrtc->dm_irq_params.window_param.y_end = (uint16_t) val; 3657 - acrtc->dm_irq_params.window_param.update_win = false; 3592 + acrtc->dm_irq_params.window_param[0].y_end = (uint16_t) val; 3593 + acrtc->dm_irq_params.window_param[0].update_win = false; 3658 3594 spin_unlock_irq(&drm_dev->event_lock); 3659 3595 3660 3596 return 0; ··· 3670 3606 struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); 3671 3607 3672 3608 spin_lock_irq(&drm_dev->event_lock); 3673 - *val = acrtc->dm_irq_params.window_param.y_end; 3609 + *val = acrtc->dm_irq_params.window_param[0].y_end; 3674 3610 spin_unlock_irq(&drm_dev->event_lock); 3675 3611 3676 3612 return 0; ··· 3693 3629 /* PSR may write to OTG CRC window control register, 3694 3630 * so close it before starting secure_display. 3695 3631 */ 3696 - amdgpu_dm_psr_disable(acrtc->dm_irq_params.stream); 3632 + amdgpu_dm_psr_disable(acrtc->dm_irq_params.stream, true); 3697 3633 3698 3634 spin_lock_irq(&adev_to_drm(adev)->event_lock); 3699 3635 3700 - acrtc->dm_irq_params.window_param.activated = true; 3701 - acrtc->dm_irq_params.window_param.update_win = true; 3702 - acrtc->dm_irq_params.window_param.skip_frame_cnt = 0; 3636 + acrtc->dm_irq_params.window_param[0].enable = true; 3637 + acrtc->dm_irq_params.window_param[0].update_win = true; 3638 + acrtc->dm_irq_params.window_param[0].skip_frame_cnt = 0; 3639 + acrtc->dm_irq_params.crc_window_activated = true; 3703 3640 3704 3641 spin_unlock_irq(&adev_to_drm(adev)->event_lock); 3705 3642 mutex_unlock(&adev->dm.dc_lock);
+6
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
··· 885 885 return ret; 886 886 } 887 887 888 + bool dm_helpers_dp_write_hblank_reduction(struct dc_context *ctx, const struct dc_stream_state *stream) 889 + { 890 + // TODO 891 + return false; 892 + } 893 + 888 894 bool dm_helpers_is_dp_sink_present(struct dc_link *link) 889 895 { 890 896 bool dp_sink_present;
+3 -1
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h
··· 39 39 #ifdef CONFIG_DEBUG_FS 40 40 enum amdgpu_dm_pipe_crc_source crc_src; 41 41 #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY 42 - struct crc_window_param window_param; 42 + struct crc_window_param window_param[MAX_CRC_WINDOW_NUM]; 43 + /* At least one CRC window is activated or not*/ 44 + bool crc_window_activated; 43 45 #endif 44 46 #endif 45 47 };
+34 -14
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
··· 155 155 return 0; 156 156 } 157 157 158 + 159 + static inline void 160 + amdgpu_dm_mst_reset_mst_connector_setting(struct amdgpu_dm_connector *aconnector) 161 + { 162 + aconnector->drm_edid = NULL; 163 + aconnector->dsc_aux = NULL; 164 + aconnector->mst_output_port->passthrough_aux = NULL; 165 + aconnector->mst_local_bw = 0; 166 + aconnector->vc_full_pbn = 0; 167 + } 168 + 158 169 static void 159 170 amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector) 160 171 { ··· 193 182 194 183 dc_sink_release(dc_sink); 195 184 aconnector->dc_sink = NULL; 196 - aconnector->drm_edid = NULL; 197 - aconnector->dsc_aux = NULL; 198 - port->passthrough_aux = NULL; 185 + amdgpu_dm_mst_reset_mst_connector_setting(aconnector); 199 186 } 200 187 201 188 aconnector->mst_status = MST_STATUS_DEFAULT; ··· 513 504 514 505 dc_sink_release(aconnector->dc_sink); 515 506 aconnector->dc_sink = NULL; 516 - aconnector->drm_edid = NULL; 517 - aconnector->dsc_aux = NULL; 518 - port->passthrough_aux = NULL; 507 + amdgpu_dm_mst_reset_mst_connector_setting(aconnector); 519 508 520 509 amdgpu_dm_set_mst_status(&aconnector->mst_status, 521 510 MST_REMOTE_EDID | MST_ALLOCATE_NEW_PAYLOAD | MST_CLEAR_ALLOCATED_PAYLOAD, ··· 1827 1820 struct drm_dp_mst_port *immediate_upstream_port = NULL; 1828 1821 uint32_t end_link_bw = 0; 1829 1822 1830 - /*Get last DP link BW capability*/ 1831 - if (dp_get_link_current_set_bw(&aconnector->mst_output_port->aux, &end_link_bw)) { 1832 - if (stream_kbps > end_link_bw) { 1823 + /*Get last DP link BW capability. Mode shall be supported by Legacy peer*/ 1824 + if (aconnector->mst_output_port->pdt != DP_PEER_DEVICE_DP_LEGACY_CONV && 1825 + aconnector->mst_output_port->pdt != DP_PEER_DEVICE_NONE) { 1826 + if (aconnector->vc_full_pbn != aconnector->mst_output_port->full_pbn) { 1827 + dp_get_link_current_set_bw(&aconnector->mst_output_port->aux, &end_link_bw); 1828 + aconnector->vc_full_pbn = aconnector->mst_output_port->full_pbn; 1829 + aconnector->mst_local_bw = end_link_bw; 1830 + } else { 1831 + end_link_bw = aconnector->mst_local_bw; 1832 + } 1833 + 1834 + if (end_link_bw > 0 && stream_kbps > end_link_bw) { 1833 1835 DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link." 1834 1836 "Mode required bw can't fit into last link\n"); 1835 1837 return DC_FAIL_BANDWIDTH_VALIDATE; ··· 1852 1836 if (immediate_upstream_port) { 1853 1837 virtual_channel_bw_in_kbps = kbps_from_pbn(immediate_upstream_port->full_pbn); 1854 1838 virtual_channel_bw_in_kbps = min(root_link_bw_in_kbps, virtual_channel_bw_in_kbps); 1855 - if (bw_range.min_kbps > virtual_channel_bw_in_kbps) { 1856 - DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link." 1857 - "Max dsc compression can't fit into MST available bw\n"); 1858 - return DC_FAIL_BANDWIDTH_VALIDATE; 1859 - } 1839 + } else { 1840 + /* For topology LCT 1 case - only one mstb*/ 1841 + virtual_channel_bw_in_kbps = root_link_bw_in_kbps; 1842 + } 1843 + 1844 + if (bw_range.min_kbps > virtual_channel_bw_in_kbps) { 1845 + DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link." 1846 + "Max dsc compression can't fit into MST available bw\n"); 1847 + return DC_FAIL_BANDWIDTH_VALIDATE; 1860 1848 } 1861 1849 } 1862 1850
+10 -7
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
··· 177 177 return AMD_FMT_MOD_GET(TILE, modifier); 178 178 } 179 179 180 - static void amdgpu_dm_plane_fill_gfx8_tiling_info_from_flags(union dc_tiling_info *tiling_info, 180 + static void amdgpu_dm_plane_fill_gfx8_tiling_info_from_flags(struct dc_tiling_info *tiling_info, 181 181 uint64_t tiling_flags) 182 182 { 183 183 /* Fill GFX8 params */ ··· 190 190 tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT); 191 191 num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS); 192 192 193 + tiling_info->gfxversion = DcGfxVersion8; 193 194 /* XXX fix me for VI */ 194 195 tiling_info->gfx8.num_banks = num_banks; 195 196 tiling_info->gfx8.array_mode = ··· 211 210 } 212 211 213 212 static void amdgpu_dm_plane_fill_gfx9_tiling_info_from_device(const struct amdgpu_device *adev, 214 - union dc_tiling_info *tiling_info) 213 + struct dc_tiling_info *tiling_info) 215 214 { 216 215 /* Fill GFX9 params */ 217 216 tiling_info->gfx9.num_pipes = ··· 232 231 } 233 232 234 233 static void amdgpu_dm_plane_fill_gfx9_tiling_info_from_modifier(const struct amdgpu_device *adev, 235 - union dc_tiling_info *tiling_info, 234 + struct dc_tiling_info *tiling_info, 236 235 uint64_t modifier) 237 236 { 238 237 unsigned int mod_bank_xor_bits = AMD_FMT_MOD_GET(BANK_XOR_BITS, modifier); ··· 262 261 static int amdgpu_dm_plane_validate_dcc(struct amdgpu_device *adev, 263 262 const enum surface_pixel_format format, 264 263 const enum dc_rotation_angle rotation, 265 - const union dc_tiling_info *tiling_info, 264 + const struct dc_tiling_info *tiling_info, 266 265 const struct dc_plane_dcc_param *dcc, 267 266 const struct dc_plane_address *address, 268 267 const struct plane_size *plane_size) ··· 309 308 const enum surface_pixel_format format, 310 309 const enum dc_rotation_angle rotation, 311 310 const struct plane_size *plane_size, 312 - union dc_tiling_info *tiling_info, 311 + struct dc_tiling_info *tiling_info, 313 312 struct dc_plane_dcc_param *dcc, 314 313 struct dc_plane_address *address) 315 314 { ··· 318 317 319 318 amdgpu_dm_plane_fill_gfx9_tiling_info_from_modifier(adev, tiling_info, modifier); 320 319 tiling_info->gfx9.swizzle = amdgpu_dm_plane_modifier_gfx9_swizzle_mode(modifier); 320 + tiling_info->gfxversion = DcGfxVersion9; 321 321 322 322 if (amdgpu_dm_plane_modifier_has_dcc(modifier)) { 323 323 uint64_t dcc_address = afb->address + afb->base.offsets[1]; ··· 360 358 const enum surface_pixel_format format, 361 359 const enum dc_rotation_angle rotation, 362 360 const struct plane_size *plane_size, 363 - union dc_tiling_info *tiling_info, 361 + struct dc_tiling_info *tiling_info, 364 362 struct dc_plane_dcc_param *dcc, 365 363 struct dc_plane_address *address) 366 364 { ··· 371 369 amdgpu_dm_plane_fill_gfx9_tiling_info_from_device(adev, tiling_info); 372 370 373 371 tiling_info->gfx9.swizzle = amdgpu_dm_plane_modifier_gfx9_swizzle_mode(modifier); 372 + tiling_info->gfxversion = DcGfxAddr3; 374 373 375 374 if (amdgpu_dm_plane_modifier_has_dcc(modifier)) { 376 375 int max_compressed_block = AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier); ··· 837 834 const enum surface_pixel_format format, 838 835 const enum dc_rotation_angle rotation, 839 836 const uint64_t tiling_flags, 840 - union dc_tiling_info *tiling_info, 837 + struct dc_tiling_info *tiling_info, 841 838 struct plane_size *plane_size, 842 839 struct dc_plane_dcc_param *dcc, 843 840 struct dc_plane_address *address,
+1 -1
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h
··· 47 47 const enum surface_pixel_format format, 48 48 const enum dc_rotation_angle rotation, 49 49 const uint64_t tiling_flags, 50 - union dc_tiling_info *tiling_info, 50 + struct dc_tiling_info *tiling_info, 51 51 struct plane_size *plane_size, 52 52 struct dc_plane_dcc_param *dcc, 53 53 struct dc_plane_address *address,
+32 -3
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
··· 201 201 * 202 202 * Return: true if success 203 203 */ 204 - bool amdgpu_dm_psr_disable(struct dc_stream_state *stream) 204 + bool amdgpu_dm_psr_disable(struct dc_stream_state *stream, bool wait) 205 205 { 206 - unsigned int power_opt = 0; 207 206 bool psr_enable = false; 208 207 209 208 DRM_DEBUG_DRIVER("Disabling psr...\n"); 210 209 211 - return dc_link_set_psr_allow_active(stream->link, &psr_enable, true, false, &power_opt); 210 + return dc_link_set_psr_allow_active(stream->link, &psr_enable, wait, false, NULL); 212 211 } 213 212 214 213 /* ··· 249 250 } 250 251 251 252 return allow_active; 253 + } 254 + 255 + /** 256 + * amdgpu_dm_psr_wait_disable() - Wait for eDP panel to exit PSR 257 + * @stream: stream state attached to the eDP link 258 + * 259 + * Waits for a max of 500ms for the eDP panel to exit PSR. 260 + * 261 + * Return: true if panel exited PSR, false otherwise. 262 + */ 263 + bool amdgpu_dm_psr_wait_disable(struct dc_stream_state *stream) 264 + { 265 + enum dc_psr_state psr_state = PSR_STATE0; 266 + struct dc_link *link = stream->link; 267 + int retry_count; 268 + 269 + if (link == NULL) 270 + return false; 271 + 272 + for (retry_count = 0; retry_count <= 1000; retry_count++) { 273 + dc_link_get_psr_state(link, &psr_state); 274 + if (psr_state == PSR_STATE0) 275 + break; 276 + udelay(500); 277 + } 278 + 279 + if (retry_count == 1000) 280 + return false; 281 + 282 + return true; 252 283 }
+2 -1
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h
··· 34 34 void amdgpu_dm_set_psr_caps(struct dc_link *link); 35 35 void amdgpu_dm_psr_enable(struct dc_stream_state *stream); 36 36 bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream); 37 - bool amdgpu_dm_psr_disable(struct dc_stream_state *stream); 37 + bool amdgpu_dm_psr_disable(struct dc_stream_state *stream, bool wait); 38 38 bool amdgpu_dm_psr_disable_all(struct amdgpu_display_manager *dm); 39 39 bool amdgpu_dm_psr_is_active_allowed(struct amdgpu_display_manager *dm); 40 + bool amdgpu_dm_psr_wait_disable(struct dc_stream_state *stream); 40 41 41 42 #endif /* AMDGPU_DM_AMDGPU_DM_PSR_H_ */
+5 -4
drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
··· 3088 3088 info->ext_disp_conn_info.path[i].ext_encoder_obj_id.id, 3089 3089 info->ext_disp_conn_info.path[i].caps 3090 3090 ); 3091 - if (info->ext_disp_conn_info.path[i].caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) 3092 - DC_LOG_BIOS("BIOS EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN on path %d\n", i); 3091 + if ((info->ext_disp_conn_info.path[i].caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) 3092 + DC_LOG_BIOS("BIOS AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN on path %d\n", i); 3093 3093 else if (bp->base.ctx->dc->config.force_bios_fixed_vs) { 3094 - info->ext_disp_conn_info.path[i].caps |= EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN; 3095 - DC_LOG_BIOS("driver forced EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN on path %d\n", i); 3094 + info->ext_disp_conn_info.path[i].caps &= ~AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK; 3095 + info->ext_disp_conn_info.path[i].caps |= AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN; 3096 + DC_LOG_BIOS("driver forced AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN on path %d\n", i); 3096 3097 } 3097 3098 } 3098 3099 // Log the Checksum and Voltage Swing
+12 -6
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
··· 401 401 if (clk_mgr_base->clks.dtbclk_en && !new_clocks->dtbclk_en) { 402 402 if (clk_mgr->base.ctx->dc->config.allow_0_dtb_clk) 403 403 dcn35_smu_set_dtbclk(clk_mgr, false); 404 + 404 405 clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en; 405 406 } 406 407 /* check that we're not already in lower */ ··· 419 418 } 420 419 421 420 if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) { 422 - dcn35_smu_set_dtbclk(clk_mgr, true); 423 - clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en; 421 + int actual_dtbclk = 0; 424 422 425 423 dcn35_update_clocks_update_dtb_dto(clk_mgr, context, new_clocks->ref_dtbclk_khz); 426 - clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz; 424 + dcn35_smu_set_dtbclk(clk_mgr, true); 425 + 426 + actual_dtbclk = REG_READ(CLK1_CLK4_CURRENT_CNT); 427 + 428 + if (actual_dtbclk) { 429 + clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz; 430 + clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en; 431 + } 427 432 } 428 433 429 434 /* check that we're not already in D0 */ ··· 591 584 592 585 static void init_clk_states(struct clk_mgr *clk_mgr) 593 586 { 594 - struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr); 595 587 uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz; 588 + 596 589 memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); 597 590 598 - if (clk_mgr_int->smu_ver >= SMU_VER_THRESHOLD) 599 - clk_mgr->clks.dtbclk_en = true; // request DTBCLK disable on first commit 600 591 clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk 601 592 clk_mgr->clks.p_state_change_support = true; 602 593 clk_mgr->clks.prev_p_state_change_support = true; ··· 605 600 void dcn35_init_clocks(struct clk_mgr *clk_mgr) 606 601 { 607 602 struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr); 603 + 608 604 init_clk_states(clk_mgr); 609 605 610 606 // to adjust dp_dto reference clock if ssc is enable otherwise to apply dprefclk
+3 -1
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h
··· 43 43 #define DALSMC_MSG_ActiveUclkFclk 0x18 44 44 #define DALSMC_MSG_IdleUclkFclk 0x19 45 45 #define DALSMC_MSG_SetUclkPstateAllow 0x1A 46 - #define DALSMC_Message_Count 0x1B 46 + #define DALSMC_MSG_SubvpUclkFclk 0x1B 47 + #define DALSMC_MSG_GetNumUmcChannels 0x1C 48 + #define DALSMC_Message_Count 0x1D 47 49 48 50 typedef enum { 49 51 FCLK_SWITCH_DISALLOW,
+42 -223
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
··· 628 628 629 629 } 630 630 631 - static void dcn401_update_clocks_legacy(struct clk_mgr *clk_mgr_base, 632 - struct dc_state *context, 633 - bool safe_to_lower) 634 - { 635 - struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); 636 - struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; 637 - struct dc *dc = clk_mgr_base->ctx->dc; 638 - int display_count; 639 - bool update_dppclk = false; 640 - bool update_dispclk = false; 641 - bool enter_display_off = false; 642 - bool dpp_clock_lowered = false; 643 - struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu; 644 - bool force_reset = false; 645 - bool update_uclk = false, update_fclk = false; 646 - bool p_state_change_support; 647 - bool fclk_p_state_change_support; 648 - int total_plane_count; 649 - 650 - if (dc->work_arounds.skip_clock_update) 651 - return; 652 - 653 - if (clk_mgr_base->clks.dispclk_khz == 0 || 654 - (dc->debug.force_clock_mode & 0x1)) { 655 - /* This is from resume or boot up, if forced_clock cfg option used, 656 - * we bypass program dispclk and DPPCLK, but need set them for S3. 657 - */ 658 - force_reset = true; 659 - 660 - dcn2_read_clocks_from_hw_dentist(clk_mgr_base); 661 - 662 - /* Force_clock_mode 0x1: force reset the clock even it is the same clock 663 - * as long as it is in Passive level. 664 - */ 665 - } 666 - display_count = clk_mgr_helper_get_active_display_cnt(dc, context); 667 - 668 - if (display_count == 0) 669 - enter_display_off = true; 670 - 671 - if (clk_mgr->smu_present) { 672 - if (enter_display_off == safe_to_lower) 673 - dcn401_smu_set_num_of_displays(clk_mgr, display_count); 674 - 675 - clk_mgr_base->clks.fclk_prev_p_state_change_support = clk_mgr_base->clks.fclk_p_state_change_support; 676 - 677 - total_plane_count = clk_mgr_helper_get_active_plane_cnt(dc, context); 678 - fclk_p_state_change_support = new_clocks->fclk_p_state_change_support || (total_plane_count == 0); 679 - 680 - if (should_update_pstate_support(safe_to_lower, fclk_p_state_change_support, clk_mgr_base->clks.fclk_p_state_change_support)) { 681 - clk_mgr_base->clks.fclk_p_state_change_support = fclk_p_state_change_support; 682 - 683 - /* To enable FCLK P-state switching, send PSTATE_SUPPORTED message to PMFW */ 684 - if (clk_mgr_base->clks.fclk_p_state_change_support) { 685 - /* Handle the code for sending a message to PMFW that FCLK P-state change is supported */ 686 - dcn401_smu_send_fclk_pstate_message(clk_mgr, true); 687 - } 688 - } 689 - 690 - if (dc->debug.force_min_dcfclk_mhz > 0) 691 - new_clocks->dcfclk_khz = (new_clocks->dcfclk_khz > (dc->debug.force_min_dcfclk_mhz * 1000)) ? 692 - new_clocks->dcfclk_khz : (dc->debug.force_min_dcfclk_mhz * 1000); 693 - 694 - if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr_base->clks.dcfclk_khz)) { 695 - clk_mgr_base->clks.dcfclk_khz = new_clocks->dcfclk_khz; 696 - if (dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DCFCLK)) 697 - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DCFCLK, khz_to_mhz_ceil(clk_mgr_base->clks.dcfclk_khz)); 698 - } 699 - 700 - if (should_set_clock(safe_to_lower, new_clocks->dcfclk_deep_sleep_khz, clk_mgr_base->clks.dcfclk_deep_sleep_khz)) { 701 - clk_mgr_base->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz; 702 - if (dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DCFCLK)) 703 - dcn401_smu_set_min_deep_sleep_dcef_clk(clk_mgr, khz_to_mhz_ceil(clk_mgr_base->clks.dcfclk_deep_sleep_khz)); 704 - } 705 - 706 - if (should_set_clock(safe_to_lower, new_clocks->socclk_khz, clk_mgr_base->clks.socclk_khz)) 707 - /* We don't actually care about socclk, don't notify SMU of hard min */ 708 - clk_mgr_base->clks.socclk_khz = new_clocks->socclk_khz; 709 - 710 - clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support; 711 - clk_mgr_base->clks.prev_num_ways = clk_mgr_base->clks.num_ways; 712 - 713 - if (clk_mgr_base->clks.num_ways != new_clocks->num_ways && 714 - clk_mgr_base->clks.num_ways < new_clocks->num_ways) { 715 - clk_mgr_base->clks.num_ways = new_clocks->num_ways; 716 - if (dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) 717 - dcn401_smu_send_cab_for_uclk_message(clk_mgr, clk_mgr_base->clks.num_ways); 718 - } 719 - 720 - 721 - p_state_change_support = new_clocks->p_state_change_support || (total_plane_count == 0); 722 - if (should_update_pstate_support(safe_to_lower, p_state_change_support, clk_mgr_base->clks.prev_p_state_change_support)) { 723 - clk_mgr_base->clks.p_state_change_support = p_state_change_support; 724 - clk_mgr_base->clks.fw_based_mclk_switching = p_state_change_support && new_clocks->fw_based_mclk_switching; 725 - 726 - /* to disable P-State switching, set UCLK min = max */ 727 - if (!clk_mgr_base->clks.p_state_change_support && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) 728 - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, 729 - clk_mgr_base->bw_params->clk_table.entries[clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_memclk_levels - 1].memclk_mhz); 730 - } 731 - 732 - /* Always update saved value, even if new value not set due to P-State switching unsupported. Also check safe_to_lower for FCLK */ 733 - if (safe_to_lower && (clk_mgr_base->clks.fclk_p_state_change_support != clk_mgr_base->clks.fclk_prev_p_state_change_support)) { 734 - update_fclk = true; 735 - } 736 - 737 - if (!clk_mgr_base->clks.fclk_p_state_change_support && 738 - update_fclk && 739 - dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_FCLK)) { 740 - /* Handle code for sending a message to PMFW that FCLK P-state change is not supported */ 741 - dcn401_smu_send_fclk_pstate_message(clk_mgr, false); 742 - } 743 - 744 - /* Always update saved value, even if new value not set due to P-State switching unsupported */ 745 - if (should_set_clock(safe_to_lower, new_clocks->dramclk_khz, clk_mgr_base->clks.dramclk_khz)) { 746 - clk_mgr_base->clks.dramclk_khz = new_clocks->dramclk_khz; 747 - update_uclk = true; 748 - } 749 - 750 - /* set UCLK to requested value if P-State switching is supported, or to re-enable P-State switching */ 751 - if (clk_mgr_base->clks.p_state_change_support && 752 - (update_uclk || !clk_mgr_base->clks.prev_p_state_change_support) && 753 - dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) 754 - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz)); 755 - 756 - if (clk_mgr_base->clks.num_ways != new_clocks->num_ways && 757 - clk_mgr_base->clks.num_ways > new_clocks->num_ways) { 758 - clk_mgr_base->clks.num_ways = new_clocks->num_ways; 759 - if (dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) 760 - dcn401_smu_send_cab_for_uclk_message(clk_mgr, clk_mgr_base->clks.num_ways); 761 - } 762 - } 763 - 764 - if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr_base->clks.dppclk_khz)) { 765 - if (clk_mgr_base->clks.dppclk_khz > new_clocks->dppclk_khz) 766 - dpp_clock_lowered = true; 767 - 768 - clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz; 769 - clk_mgr_base->clks.actual_dppclk_khz = new_clocks->dppclk_khz; 770 - 771 - if (clk_mgr->smu_present && !dpp_clock_lowered && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DPPCLK)) 772 - clk_mgr_base->clks.actual_dppclk_khz = dcn401_set_hard_min_by_freq_optimized(clk_mgr, PPCLK_DPPCLK, clk_mgr_base->clks.dppclk_khz); 773 - update_dppclk = true; 774 - } 775 - 776 - if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { 777 - clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; 778 - 779 - if (clk_mgr->smu_present && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DISPCLK)) 780 - clk_mgr_base->clks.actual_dispclk_khz = dcn401_set_hard_min_by_freq_optimized(clk_mgr, PPCLK_DISPCLK, clk_mgr_base->clks.dispclk_khz); 781 - 782 - update_dispclk = true; 783 - } 784 - 785 - if (!new_clocks->dtbclk_en && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DTBCLK)) { 786 - new_clocks->ref_dtbclk_khz = clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz * 1000; 787 - } 788 - 789 - /* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */ 790 - if (!dc->debug.disable_dtb_ref_clk_switch && 791 - should_set_clock(safe_to_lower, new_clocks->ref_dtbclk_khz / 1000, clk_mgr_base->clks.ref_dtbclk_khz / 1000) && 792 - dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DTBCLK)) { 793 - /* DCCG requires KHz precision for DTBCLK */ 794 - clk_mgr_base->clks.ref_dtbclk_khz = 795 - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DTBCLK, khz_to_mhz_ceil(new_clocks->ref_dtbclk_khz)); 796 - 797 - dcn401_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz); 798 - } 799 - 800 - if (dc->config.forced_clocks == false || (force_reset && safe_to_lower)) { 801 - if (dpp_clock_lowered) { 802 - /* if clock is being lowered, increase DTO before lowering refclk */ 803 - dcn401_update_clocks_update_dpp_dto(clk_mgr, context, 804 - safe_to_lower, clk_mgr_base->clks.dppclk_khz); 805 - dcn401_update_clocks_update_dentist(clk_mgr, context); 806 - if (clk_mgr->smu_present && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DPPCLK)) { 807 - clk_mgr_base->clks.actual_dppclk_khz = dcn401_set_hard_min_by_freq_optimized(clk_mgr, PPCLK_DPPCLK, 808 - clk_mgr_base->clks.dppclk_khz); 809 - dcn401_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower, 810 - clk_mgr_base->clks.actual_dppclk_khz); 811 - } 812 - 813 - } else { 814 - /* if clock is being raised, increase refclk before lowering DTO */ 815 - if (update_dppclk || update_dispclk) 816 - dcn401_update_clocks_update_dentist(clk_mgr, context); 817 - /* There is a check inside dcn20_update_clocks_update_dpp_dto which ensures 818 - * that we do not lower dto when it is not safe to lower. We do not need to 819 - * compare the current and new dppclk before calling this function. 820 - */ 821 - dcn401_update_clocks_update_dpp_dto(clk_mgr, context, 822 - safe_to_lower, clk_mgr_base->clks.actual_dppclk_khz); 823 - } 824 - } 825 - 826 - if (update_dispclk && dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) 827 - /*update dmcu for wait_loop count*/ 828 - dmcu->funcs->set_psr_wait_loop(dmcu, 829 - clk_mgr_base->clks.dispclk_khz / 1000 / 7); 830 - } 831 - 832 631 static void dcn401_execute_block_sequence(struct clk_mgr *clk_mgr_base, unsigned int num_steps) 833 632 { 834 633 struct clk_mgr_internal *clk_mgr_internal = TO_CLK_MGR_INTERNAL(clk_mgr_base); ··· 807 1008 update_active_fclk = true; 808 1009 update_idle_fclk = true; 809 1010 810 - /* To enable FCLK P-state switching, send PSTATE_SUPPORTED message to PMFW */ 811 - if (clk_mgr_base->clks.fclk_p_state_change_support) { 812 - /* Handle the code for sending a message to PMFW that FCLK P-state change is supported */ 813 - if (dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) { 814 - block_sequence[num_steps].params.update_pstate_support_params.support = true; 815 - block_sequence[num_steps].func = CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT; 816 - num_steps++; 817 - } 818 - } 1011 + /* To enable FCLK P-state switching, send PSTATE_SUPPORTED message to PMFW (message not supported on DCN401)*/ 1012 + // if (clk_mgr_base->clks.fclk_p_state_change_support) { 1013 + // /* Handle the code for sending a message to PMFW that FCLK P-state change is supported */ 1014 + // if (dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) { 1015 + // block_sequence[num_steps].params.update_pstate_support_params.support = true; 1016 + // block_sequence[num_steps].func = CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT; 1017 + // num_steps++; 1018 + // } 1019 + // } 819 1020 } 820 1021 821 1022 if (!clk_mgr_base->clks.fclk_p_state_change_support && dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) { ··· 1023 1224 // (*num_steps)++; 1024 1225 // } 1025 1226 1026 - /* disable FCLK P-State support if needed */ 1027 - if (!fclk_p_state_change_support && 1028 - should_update_pstate_support(safe_to_lower, fclk_p_state_change_support, clk_mgr_base->clks.fclk_prev_p_state_change_support) && 1029 - dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) { 1030 - block_sequence[num_steps].params.update_pstate_support_params.support = false; 1031 - block_sequence[num_steps].func = CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT; 1032 - num_steps++; 1033 - } 1227 + /* disable FCLK P-State support if needed (message not supported on DCN401)*/ 1228 + // if (!fclk_p_state_change_support && 1229 + // should_update_pstate_support(safe_to_lower, fclk_p_state_change_support, clk_mgr_base->clks.fclk_prev_p_state_change_support) && 1230 + // dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) { 1231 + // block_sequence[num_steps].params.update_pstate_support_params.support = false; 1232 + // block_sequence[num_steps].func = CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT; 1233 + // num_steps++; 1234 + // } 1034 1235 } 1035 1236 1036 1237 if (new_clocks->fw_based_mclk_switching != clk_mgr_base->clks.fw_based_mclk_switching && ··· 1211 1412 1212 1413 unsigned int num_steps = 0; 1213 1414 1214 - if (dc->debug.enable_legacy_clock_update) { 1215 - dcn401_update_clocks_legacy(clk_mgr_base, context, safe_to_lower); 1216 - return; 1217 - } 1218 - 1219 1415 /* build bandwidth related clocks update sequence */ 1220 1416 num_steps = dcn401_build_update_bandwidth_clocks_sequence(clk_mgr_base, 1221 1417 context, ··· 1345 1551 dcn401_execute_block_sequence(clk_mgr_base, num_steps); 1346 1552 } 1347 1553 1554 + static int dcn401_get_hard_min_memclk(struct clk_mgr *clk_mgr_base) 1555 + { 1556 + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); 1557 + 1558 + return clk_mgr->base.ctx->dc->current_state->bw_ctx.bw.dcn.clk.dramclk_khz; 1559 + } 1560 + 1561 + static int dcn401_get_hard_min_fclk(struct clk_mgr *clk_mgr_base) 1562 + { 1563 + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); 1564 + 1565 + return clk_mgr->base.ctx->dc->current_state->bw_ctx.bw.dcn.clk.fclk_khz; 1566 + } 1567 + 1348 1568 /* Get current memclk states, update bounding box */ 1349 1569 static void dcn401_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) 1350 1570 { ··· 1402 1594 1403 1595 if (clk_mgr->dpm_present && !num_levels) 1404 1596 clk_mgr->dpm_present = false; 1597 + 1598 + clk_mgr_base->bw_params->num_channels = dcn401_smu_get_num_of_umc_channels(clk_mgr); 1599 + if (clk_mgr_base->ctx->dc_bios) { 1600 + /* use BIOS values if none provided by PMFW */ 1601 + if (clk_mgr_base->bw_params->num_channels == 0) { 1602 + clk_mgr_base->bw_params->num_channels = clk_mgr_base->ctx->dc_bios->vram_info.num_chans; 1603 + } 1604 + clk_mgr_base->bw_params->dram_channel_width_bytes = clk_mgr_base->ctx->dc_bios->vram_info.dram_channel_width_bytes; 1605 + } 1405 1606 1406 1607 /* Refresh bounding box */ 1407 1608 clk_mgr_base->ctx->dc->res_pool->funcs->update_bw_bounding_box( ··· 1501 1684 .enable_pme_wa = dcn401_enable_pme_wa, 1502 1685 .is_smu_present = dcn401_is_smu_present, 1503 1686 .get_dispclk_from_dentist = dcn401_get_dispclk_from_dentist, 1687 + .get_hard_min_memclk = dcn401_get_hard_min_memclk, 1688 + .get_hard_min_fclk = dcn401_get_hard_min_fclk, 1504 1689 }; 1505 1690 1506 1691 struct clk_mgr_internal *dcn401_clk_mgr_construct(
+14
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c
··· 25 25 #ifndef DALSMC_MSG_SubvpUclkFclk 26 26 #define DALSMC_MSG_SubvpUclkFclk 0x1B 27 27 #endif 28 + #ifndef DALSMC_MSG_GetNumUmcChannels 29 + #define DALSMC_MSG_GetNumUmcChannels 0x1C 30 + #endif 28 31 29 32 /* 30 33 * Function to be used instead of REG_WAIT macro because the wait ends when ··· 336 333 337 334 dcn401_smu_send_msg_with_param(clk_mgr, 338 335 DALSMC_MSG_NumOfDisplays, num_displays, NULL); 336 + } 337 + 338 + unsigned int dcn401_smu_get_num_of_umc_channels(struct clk_mgr_internal *clk_mgr) 339 + { 340 + unsigned int response = 0; 341 + 342 + dcn401_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_GetNumUmcChannels, 0, &response); 343 + 344 + smu_print("SMU Get Num UMC Channels: num_umc_channels = %d\n", response); 345 + 346 + return response; 339 347 }
+1
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h
··· 28 28 uint16_t fclk_freq_mhz); 29 29 void dcn401_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz); 30 30 void dcn401_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t num_displays); 31 + unsigned int dcn401_smu_get_num_of_umc_channels(struct clk_mgr_internal *clk_mgr); 31 32 32 33 #endif /* __DCN401_CLK_MGR_SMU_MSG_H_ */
+99 -11
drivers/gpu/drm/amd/display/dc/core/dc.c
··· 615 615 616 616 return true; 617 617 } 618 + 619 + static void 620 + dc_stream_forward_dmub_multiple_crc_window(struct dc_dmub_srv *dmub_srv, 621 + struct crc_window *window, struct otg_phy_mux *mux_mapping, bool stop) 622 + { 623 + int i; 624 + union dmub_rb_cmd cmd = {0}; 625 + 626 + cmd.secure_display.mul_roi_ctl.phy_id = mux_mapping->phy_output_num; 627 + cmd.secure_display.mul_roi_ctl.otg_id = mux_mapping->otg_output_num; 628 + 629 + cmd.secure_display.header.type = DMUB_CMD__SECURE_DISPLAY; 630 + 631 + if (stop) { 632 + cmd.secure_display.header.sub_type = DMUB_CMD__SECURE_DISPLAY_MULTIPLE_CRC_STOP_UPDATE; 633 + } else { 634 + cmd.secure_display.header.sub_type = DMUB_CMD__SECURE_DISPLAY_MULTIPLE_CRC_WIN_NOTIFY; 635 + for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) { 636 + cmd.secure_display.mul_roi_ctl.roi_ctl[i].x_start = window[i].rect.x; 637 + cmd.secure_display.mul_roi_ctl.roi_ctl[i].y_start = window[i].rect.y; 638 + cmd.secure_display.mul_roi_ctl.roi_ctl[i].x_end = window[i].rect.x + window[i].rect.width; 639 + cmd.secure_display.mul_roi_ctl.roi_ctl[i].y_end = window[i].rect.y + window[i].rect.height; 640 + cmd.secure_display.mul_roi_ctl.roi_ctl[i].enable = window[i].enable; 641 + } 642 + } 643 + 644 + dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); 645 + } 646 + 647 + bool 648 + dc_stream_forward_multiple_crc_window(struct dc_stream_state *stream, 649 + struct crc_window *window, uint8_t phy_id, bool stop) 650 + { 651 + struct dc_dmub_srv *dmub_srv; 652 + struct otg_phy_mux mux_mapping; 653 + struct pipe_ctx *pipe; 654 + int i; 655 + struct dc *dc = stream->ctx->dc; 656 + 657 + for (i = 0; i < MAX_PIPES; i++) { 658 + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; 659 + if (pipe->stream == stream && !pipe->top_pipe && !pipe->prev_odm_pipe) 660 + break; 661 + } 662 + 663 + /* Stream not found */ 664 + if (i == MAX_PIPES) 665 + return false; 666 + 667 + mux_mapping.phy_output_num = phy_id; 668 + mux_mapping.otg_output_num = pipe->stream_res.tg->inst; 669 + 670 + dmub_srv = dc->ctx->dmub_srv; 671 + 672 + /* forward to dmub only. no dmcu support*/ 673 + if (dmub_srv) 674 + dc_stream_forward_dmub_multiple_crc_window(dmub_srv, window, &mux_mapping, stop); 675 + else 676 + return false; 677 + 678 + return true; 679 + } 618 680 #endif /* CONFIG_DRM_AMD_SECURE_DISPLAY */ 619 681 620 682 /** ··· 687 625 * @enable: Enable CRC if true, disable otherwise. 688 626 * @continuous: Capture CRC on every frame if true. Otherwise, only capture 689 627 * once. 628 + * @idx: Capture CRC on which CRC engine instance 629 + * @reset: Reset CRC engine before the configuration 690 630 * 691 - * By default, only CRC0 is configured, and the entire frame is used to 692 - * calculate the CRC. 631 + * By default, the entire frame is used to calculate the CRC. 693 632 * 694 633 * Return: %false if the stream is not found or CRC capture is not supported; 695 634 * %true if the stream has been configured. 696 635 */ 697 636 bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream, 698 - struct crc_params *crc_window, bool enable, bool continuous) 637 + struct crc_params *crc_window, bool enable, bool continuous, 638 + uint8_t idx, bool reset) 699 639 { 700 640 struct pipe_ctx *pipe; 701 641 struct crc_params param; ··· 741 677 param.continuous_mode = continuous; 742 678 param.enable = enable; 743 679 680 + param.crc_eng_inst = idx; 681 + param.reset = reset; 682 + 744 683 tg = pipe->stream_res.tg; 745 684 746 685 /* Only call if supported */ ··· 758 691 * 759 692 * @dc: DC object. 760 693 * @stream: The DC stream state of the stream to get CRCs from. 694 + * @idx: index of crc engine to get CRC from 761 695 * @r_cr: CRC value for the red component. 762 696 * @g_y: CRC value for the green component. 763 697 * @b_cb: CRC value for the blue component. ··· 768 700 * Return: 769 701 * %false if stream is not found, or if CRCs are not enabled. 770 702 */ 771 - bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream, 703 + bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream, uint8_t idx, 772 704 uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb) 773 705 { 774 706 int i; ··· 789 721 tg = pipe->stream_res.tg; 790 722 791 723 if (tg->funcs->get_crc) 792 - return tg->funcs->get_crc(tg, r_cr, g_y, b_cb); 724 + return tg->funcs->get_crc(tg, idx, r_cr, g_y, b_cb); 793 725 DC_LOG_WARNING("CRC capture not supported."); 794 726 return false; 795 727 } ··· 1241 1173 get_mclk_switch_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color)); 1242 1174 else if (dc->debug.visual_confirm == VISUAL_CONFIRM_FAMS2) 1243 1175 get_fams2_visual_confirm_color(dc, context, pipe_ctx, &(pipe_ctx->visual_confirm_color)); 1176 + else if (dc->debug.visual_confirm == VISUAL_CONFIRM_VABC) 1177 + get_vabc_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color)); 1244 1178 } 1245 1179 } 1246 1180 } ··· 2562 2492 2563 2493 2564 2494 if (memcmp(&u->plane_info->tiling_info, &u->surface->tiling_info, 2565 - sizeof(union dc_tiling_info)) != 0) { 2495 + sizeof(struct dc_tiling_info)) != 0) { 2566 2496 update_flags->bits.swizzle_change = 1; 2567 2497 elevate_update_type(&update_type, UPDATE_TYPE_MED); 2568 2498 ··· 4589 4519 struct pipe_split_policy_backup policy; 4590 4520 struct dc_state *intermediate_context; 4591 4521 struct dc_state *old_current_state = dc->current_state; 4592 - struct dc_surface_update srf_updates[MAX_SURFACE_NUM] = {0}; 4522 + struct dc_surface_update srf_updates[MAX_SURFACES] = {0}; 4593 4523 int surface_count; 4594 4524 4595 4525 /* ··· 5386 5316 dc->vm_pa_config.valid) { 5387 5317 dc->hwss.init_sys_ctx(dc->hwseq, dc, &dc->vm_pa_config); 5388 5318 } 5389 - /*mark d0 last*/ 5390 - dc->power_state = power_state; 5391 5319 break; 5392 5320 default: 5393 5321 ASSERT(dc->current_state->stream_count == 0); 5394 - /*mark d3 first*/ 5395 - dc->power_state = power_state; 5396 5322 dc_dmub_srv_notify_fw_dc_power_state(dc->ctx->dmub_srv, power_state); 5397 5323 5398 5324 dc_state_destruct(dc->current_state); ··· 5512 5446 5513 5447 void dc_allow_idle_optimizations_internal(struct dc *dc, bool allow, char const *caller_name) 5514 5448 { 5449 + int idle_fclk_khz = 0, idle_dramclk_khz = 0, i = 0; 5450 + enum mall_stream_type subvp_pipe_type[MAX_PIPES] = {0}; 5451 + struct pipe_ctx *pipe = NULL; 5452 + struct dc_state *context = dc->current_state; 5453 + 5515 5454 if (dc->debug.disable_idle_power_optimizations) { 5516 5455 DC_LOG_DEBUG("%s: disabled\n", __func__); 5517 5456 return; ··· 5541 5470 dc->idle_optimizations_allowed = allow; 5542 5471 DC_LOG_DEBUG("%s: %s\n", __func__, allow ? "enabled" : "disabled"); 5543 5472 } 5473 + 5474 + // log idle clocks and sub vp pipe types at idle optimization time 5475 + if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->get_hard_min_fclk) 5476 + idle_fclk_khz = dc->clk_mgr->funcs->get_hard_min_fclk(dc->clk_mgr); 5477 + 5478 + if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->get_hard_min_memclk) 5479 + idle_dramclk_khz = dc->clk_mgr->funcs->get_hard_min_memclk(dc->clk_mgr); 5480 + 5481 + for (i = 0; i < dc->res_pool->pipe_count; i++) { 5482 + pipe = &context->res_ctx.pipe_ctx[i]; 5483 + subvp_pipe_type[i] = dc_state_get_pipe_subvp_type(context, pipe); 5484 + } 5485 + 5486 + DC_LOG_DC("%s: allow_idle=%d\n HardMinUClk_Khz=%d HardMinDramclk_Khz=%d\n Pipe_0=%d Pipe_1=%d Pipe_2=%d Pipe_3=%d Pipe_4=%d Pipe_5=%d (caller=%s)\n", 5487 + __func__, allow, idle_fclk_khz, idle_dramclk_khz, subvp_pipe_type[0], subvp_pipe_type[1], subvp_pipe_type[2], 5488 + subvp_pipe_type[3], subvp_pipe_type[4], subvp_pipe_type[5], caller_name); 5489 + 5544 5490 } 5545 5491 5546 5492 void dc_exit_ips_for_hw_access_internal(struct dc *dc, const char *caller_name)
+38
drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
··· 425 425 } 426 426 } 427 427 428 + /* Visual Confirm color definition for VABC */ 429 + void get_vabc_visual_confirm_color( 430 + struct pipe_ctx *pipe_ctx, 431 + struct tg_color *color) 432 + { 433 + uint32_t color_value = MAX_TG_COLOR_VALUE; 434 + struct dc_link *edp_link = NULL; 435 + 436 + if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link) { 437 + if (pipe_ctx->stream->link->connector_signal == SIGNAL_TYPE_EDP) 438 + edp_link = pipe_ctx->stream->link; 439 + } 440 + 441 + if (edp_link) { 442 + switch (edp_link->backlight_control_type) { 443 + case BACKLIGHT_CONTROL_PWM: 444 + color->color_r_cr = color_value; 445 + color->color_g_y = 0; 446 + color->color_b_cb = 0; 447 + break; 448 + case BACKLIGHT_CONTROL_AMD_AUX: 449 + color->color_r_cr = 0; 450 + color->color_g_y = color_value; 451 + color->color_b_cb = 0; 452 + break; 453 + case BACKLIGHT_CONTROL_VESA_AUX: 454 + color->color_r_cr = 0; 455 + color->color_g_y = 0; 456 + color->color_b_cb = color_value; 457 + break; 458 + } 459 + } else { 460 + color->color_r_cr = 0; 461 + color->color_g_y = 0; 462 + color->color_b_cb = 0; 463 + } 464 + } 465 + 428 466 void get_subvp_visual_confirm_color( 429 467 struct pipe_ctx *pipe_ctx, 430 468 struct tg_color *color)
+10 -6
drivers/gpu/drm/amd/display/dc/core/dc_resource.c
··· 4478 4478 static void adaptive_sync_override_dp_info_packets_sdp_line_num( 4479 4479 const struct dc_crtc_timing *timing, 4480 4480 struct enc_sdp_line_num *sdp_line_num, 4481 - struct _vcs_dpi_display_pipe_dest_params_st *pipe_dlg_param) 4481 + unsigned int vstartup_start) 4482 4482 { 4483 4483 uint32_t asic_blank_start = 0; 4484 4484 uint32_t asic_blank_end = 0; ··· 4493 4493 asic_blank_end = (asic_blank_start - tg->v_border_bottom - 4494 4494 tg->v_addressable - tg->v_border_top); 4495 4495 4496 - if (pipe_dlg_param->vstartup_start > asic_blank_end) { 4497 - v_update = (tg->v_total - (pipe_dlg_param->vstartup_start - asic_blank_end)); 4496 + if (vstartup_start > asic_blank_end) { 4497 + v_update = (tg->v_total - (vstartup_start - asic_blank_end)); 4498 4498 sdp_line_num->adaptive_sync_line_num_valid = true; 4499 4499 sdp_line_num->adaptive_sync_line_num = (tg->v_total - v_update - 1); 4500 4500 } else { ··· 4507 4507 struct dc_info_packet *info_packet, 4508 4508 const struct dc_stream_state *stream, 4509 4509 struct encoder_info_frame *info_frame, 4510 - struct _vcs_dpi_display_pipe_dest_params_st *pipe_dlg_param) 4510 + unsigned int vstartup_start) 4511 4511 { 4512 4512 if (!stream->adaptive_sync_infopacket.valid) 4513 4513 return; ··· 4515 4515 adaptive_sync_override_dp_info_packets_sdp_line_num( 4516 4516 &stream->timing, 4517 4517 &info_frame->sdp_line_num, 4518 - pipe_dlg_param); 4518 + vstartup_start); 4519 4519 4520 4520 *info_packet = stream->adaptive_sync_infopacket; 4521 4521 } ··· 4548 4548 { 4549 4549 enum signal_type signal = SIGNAL_TYPE_NONE; 4550 4550 struct encoder_info_frame *info = &pipe_ctx->stream_res.encoder_info_frame; 4551 + unsigned int vstartup_start = 0; 4551 4552 4552 4553 /* default all packets to invalid */ 4553 4554 info->avi.valid = false; ··· 4561 4560 info->vtem.valid = false; 4562 4561 info->adaptive_sync.valid = false; 4563 4562 signal = pipe_ctx->stream->signal; 4563 + 4564 + if (pipe_ctx->stream->ctx->dc->res_pool->funcs->get_vstartup_for_pipe) 4565 + vstartup_start = pipe_ctx->stream->ctx->dc->res_pool->funcs->get_vstartup_for_pipe(pipe_ctx); 4564 4566 4565 4567 /* HDMi and DP have different info packets*/ 4566 4568 if (dc_is_hdmi_signal(signal)) { ··· 4586 4582 set_adaptive_sync_info_packet(&info->adaptive_sync, 4587 4583 pipe_ctx->stream, 4588 4584 info, 4589 - &pipe_ctx->pipe_dlg_param); 4585 + vstartup_start); 4590 4586 } 4591 4587 4592 4588 patch_gamut_packet_checksum(&info->gamut);
+4 -4
drivers/gpu/drm/amd/display/dc/core/dc_state.c
··· 483 483 if (stream_status == NULL) { 484 484 dm_error("Existing stream not found; failed to attach surface!\n"); 485 485 goto out; 486 - } else if (stream_status->plane_count == MAX_SURFACE_NUM) { 486 + } else if (stream_status->plane_count == MAX_SURFACES) { 487 487 dm_error("Surface: can not attach plane_state %p! Maximum is: %d\n", 488 - plane_state, MAX_SURFACE_NUM); 488 + plane_state, MAX_SURFACES); 489 489 goto out; 490 490 } else if (!otg_master_pipe) { 491 491 goto out; ··· 600 600 { 601 601 int i, old_plane_count; 602 602 struct dc_stream_status *stream_status = NULL; 603 - struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 }; 603 + struct dc_plane_state *del_planes[MAX_SURFACES] = { 0 }; 604 604 605 605 for (i = 0; i < state->stream_count; i++) 606 606 if (state->streams[i] == stream) { ··· 875 875 { 876 876 int i, old_plane_count; 877 877 struct dc_stream_status *stream_status = NULL; 878 - struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 }; 878 + struct dc_plane_state *del_planes[MAX_SURFACES] = { 0 }; 879 879 880 880 for (i = 0; i < state->stream_count; i++) 881 881 if (state->streams[i] == phantom_stream) {
+2
drivers/gpu/drm/amd/display/dc/core/dc_stream.c
··· 37 37 #define DC_LOGGER dc->ctx->logger 38 38 #ifndef MIN 39 39 #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) 40 + #endif 41 + #ifndef MAX 40 42 #define MAX(x, y) ((x > y) ? x : y) 41 43 #endif 42 44
+35 -7
drivers/gpu/drm/amd/display/dc/dc.h
··· 55 55 struct set_config_cmd_payload; 56 56 struct dmub_notification; 57 57 58 - #define DC_VER "3.2.314" 58 + #define DC_VER "3.2.316" 59 59 60 - #define MAX_SURFACES 3 60 + #define MAX_SURFACES 4 61 61 #define MAX_PLANES 6 62 62 #define MAX_STREAMS 6 63 63 #define MIN_VIEWPORT_SIZE 12 ··· 472 472 bool disable_hbr_audio_dp2; 473 473 bool consolidated_dpia_dp_lt; 474 474 bool set_pipe_unlock_order; 475 + bool enable_dpia_pre_training; 475 476 }; 476 477 477 478 enum visual_confirm { ··· 489 488 VISUAL_CONFIRM_MCLK_SWITCH = 16, 490 489 VISUAL_CONFIRM_FAMS2 = 19, 491 490 VISUAL_CONFIRM_HW_CURSOR = 20, 491 + VISUAL_CONFIRM_VABC = 21, 492 492 }; 493 493 494 494 enum dc_psr_power_opts { ··· 777 775 uint32_t enable_force_tbt3_work_around:1; /* bit 4 */ 778 776 uint32_t disable_usb4_pm_support:1; /* bit 5 */ 779 777 uint32_t enable_consolidated_dpia_dp_lt:1; /* bit 6 */ 780 - uint32_t reserved:25; 778 + uint32_t enable_dpia_pre_training:1; /* bit 7 */ 779 + uint32_t reserved:24; 781 780 } bits; 782 781 uint32_t raw; 783 782 }; ··· 1063 1060 uint32_t dml21_disable_pstate_method_mask; 1064 1061 union fw_assisted_mclk_switch_version fams_version; 1065 1062 union dmub_fams2_global_feature_config fams2_config; 1066 - bool enable_legacy_clock_update; 1067 1063 unsigned int force_cositing; 1068 1064 unsigned int disable_spl; 1069 1065 unsigned int force_easf; ··· 1307 1305 struct rect clip_rect; 1308 1306 1309 1307 struct plane_size plane_size; 1310 - union dc_tiling_info tiling_info; 1308 + struct dc_tiling_info tiling_info; 1311 1309 1312 1310 struct dc_plane_dcc_param dcc; 1313 1311 ··· 1378 1376 1379 1377 struct dc_plane_info { 1380 1378 struct plane_size plane_size; 1381 - union dc_tiling_info tiling_info; 1379 + struct dc_tiling_info tiling_info; 1382 1380 struct dc_plane_dcc_param dcc; 1383 1381 enum surface_pixel_format format; 1384 1382 enum dc_rotation_angle rotation; ··· 1405 1403 * store current value in plane states so we can still recover 1406 1404 * a valid current state during dc update. 1407 1405 */ 1408 - struct dc_plane_state plane_states[MAX_SURFACE_NUM]; 1406 + struct dc_plane_state plane_states[MAX_SURFACES]; 1409 1407 1410 1408 struct dc_stream_state stream_state; 1411 1409 }; ··· 2027 2025 const struct dc_link *link, 2028 2026 const struct dc_link_settings *link_setting); 2029 2027 2028 + struct dp_audio_bandwidth_params { 2029 + const struct dc_crtc_timing *crtc_timing; 2030 + enum dp_link_encoding link_encoding; 2031 + uint32_t channel_count; 2032 + uint32_t sample_rate_hz; 2033 + }; 2034 + 2035 + /* The function calculates the minimum size of hblank (in bytes) needed to 2036 + * support the specified channel count and sample rate combination, given the 2037 + * link encoding and timing to be used. This calculation is not supported 2038 + * for 8b/10b SST. 2039 + * 2040 + * return - min hblank size in bytes, 0 if 8b/10b SST. 2041 + */ 2042 + uint32_t dc_link_required_hblank_size_bytes( 2043 + const struct dc_link *link, 2044 + struct dp_audio_bandwidth_params *audio_params); 2045 + 2030 2046 /* The function takes a snapshot of current link resource allocation state 2031 2047 * @dc: pointer to dc of the dm calling this 2032 2048 * @map: a dc link resource snapshot defined internally to dc. ··· 2404 2384 struct dsc_dec_dpcd_caps dsc_dec_caps; 2405 2385 }; 2406 2386 2387 + struct dc_sink_hblank_expansion_caps { 2388 + // 'true' if these are virtual DPCD's HBlank expansion caps (immediately upstream of sink in MST topology), 2389 + // 'false' if they are sink's HBlank expansion caps 2390 + bool is_virtual_dpcd_hblank_expansion; 2391 + struct hblank_expansion_dpcd_caps dpcd_caps; 2392 + }; 2393 + 2407 2394 struct dc_sink_fec_caps { 2408 2395 bool is_rx_fec_supported; 2409 2396 bool is_topology_fec_supported; ··· 2437 2410 struct scdc_caps scdc_caps; 2438 2411 struct dc_sink_dsc_caps dsc_caps; 2439 2412 struct dc_sink_fec_caps fec_caps; 2413 + struct dc_sink_hblank_expansion_caps hblank_expansion_caps; 2440 2414 2441 2415 bool is_vsc_sdp_colorimetry_supported; 2442 2416
+16
drivers/gpu/drm/amd/display/dc/dc_dp_types.h
··· 969 969 uint8_t raw; 970 970 }; 971 971 972 + union dp_receive_port0_cap { 973 + struct { 974 + uint8_t RESERVED :1; 975 + uint8_t LOCAL_EDID_PRESENT :1; 976 + uint8_t ASSOCIATED_TO_PRECEDING_PORT:1; 977 + uint8_t HBLANK_EXPANSION_CAPABLE :1; 978 + uint8_t BUFFER_SIZE_UNIT :1; 979 + uint8_t BUFFER_SIZE_PER_PORT :1; 980 + uint8_t HBLANK_REDUCTION_CAPABLE :1; 981 + uint8_t RESERVED2:1; 982 + uint8_t BUFFER_SIZE:8; 983 + } bits; 984 + uint8_t raw[2]; 985 + }; 986 + 972 987 union dpcd_max_uncompressed_pixel_rate_cap { 973 988 struct { 974 989 uint16_t max_uncompressed_pixel_rate_cap :15; ··· 1208 1193 1209 1194 struct replay_info pr_info; 1210 1195 uint16_t edp_oled_emission_rate; 1196 + union dp_receive_port0_cap receive_port0_cap; 1211 1197 }; 1212 1198 1213 1199 union dpcd_sink_ext_caps {
+5
drivers/gpu/drm/amd/display/dc/dc_dsc.h
··· 94 94 const int num_slices_h, 95 95 const bool is_dp); 96 96 97 + void dc_dsc_dump_decoder_caps(const struct display_stream_compressor *dsc, 98 + const struct dsc_dec_dpcd_caps *dsc_sink_caps); 99 + void dc_dsc_dump_encoder_caps(const struct display_stream_compressor *dsc, 100 + const struct dc_crtc_timing *timing); 101 + 97 102 /* TODO - Hardware/specs limitation should be owned by dc dsc and returned to DM, 98 103 * and DM can choose to OVERRIDE the limitation on CASE BY CASE basis. 99 104 * Hardware/specs limitation should not be writable by DM.
+94 -79
drivers/gpu/drm/amd/display/dc/dc_hw_types.h
··· 341 341 DC_ADDR3_SW_UNKNOWN = DC_ADDR3_SW_MAX 342 342 }; 343 343 344 - union dc_tiling_info { 344 + enum dc_gfxversion { 345 + DcGfxVersion7 = 0, 346 + DcGfxVersion8, 347 + DcGfxVersion9, 348 + DcGfxVersion10, 349 + DcGfxVersion11, 350 + DcGfxAddr3, 351 + DcGfxVersionUnknown 352 + }; 345 353 346 - struct { 347 - /* Specifies the number of memory banks for tiling 348 - * purposes. 349 - * Only applies to 2D and 3D tiling modes. 350 - * POSSIBLE VALUES: 2,4,8,16 351 - */ 352 - unsigned int num_banks; 353 - /* Specifies the number of tiles in the x direction 354 - * to be incorporated into the same bank. 355 - * Only applies to 2D and 3D tiling modes. 356 - * POSSIBLE VALUES: 1,2,4,8 357 - */ 358 - unsigned int bank_width; 359 - unsigned int bank_width_c; 360 - /* Specifies the number of tiles in the y direction to 361 - * be incorporated into the same bank. 362 - * Only applies to 2D and 3D tiling modes. 363 - * POSSIBLE VALUES: 1,2,4,8 364 - */ 365 - unsigned int bank_height; 366 - unsigned int bank_height_c; 367 - /* Specifies the macro tile aspect ratio. Only applies 368 - * to 2D and 3D tiling modes. 369 - */ 370 - unsigned int tile_aspect; 371 - unsigned int tile_aspect_c; 372 - /* Specifies the number of bytes that will be stored 373 - * contiguously for each tile. 374 - * If the tile data requires more storage than this 375 - * amount, it is split into multiple slices. 376 - * This field must not be larger than 377 - * GB_ADDR_CONFIG.DRAM_ROW_SIZE. 378 - * Only applies to 2D and 3D tiling modes. 379 - * For color render targets, TILE_SPLIT >= 256B. 380 - */ 381 - enum tile_split_values tile_split; 382 - enum tile_split_values tile_split_c; 383 - /* Specifies the addressing within a tile. 384 - * 0x0 - DISPLAY_MICRO_TILING 385 - * 0x1 - THIN_MICRO_TILING 386 - * 0x2 - DEPTH_MICRO_TILING 387 - * 0x3 - ROTATED_MICRO_TILING 388 - */ 389 - enum tile_mode_values tile_mode; 390 - enum tile_mode_values tile_mode_c; 391 - /* Specifies the number of pipes and how they are 392 - * interleaved in the surface. 393 - * Refer to memory addressing document for complete 394 - * details and constraints. 395 - */ 396 - unsigned int pipe_config; 397 - /* Specifies the tiling mode of the surface. 398 - * THIN tiles use an 8x8x1 tile size. 399 - * THICK tiles use an 8x8x4 tile size. 400 - * 2D tiling modes rotate banks for successive Z slices 401 - * 3D tiling modes rotate pipes and banks for Z slices 402 - * Refer to memory addressing document for complete 403 - * details and constraints. 404 - */ 405 - enum array_mode_values array_mode; 406 - } gfx8; 354 + struct dc_tiling_info { 355 + unsigned int gfxversion; // Specifies which part of the union to use. Must use DalGfxVersion enum 356 + union { 357 + struct { 358 + /* Specifies the number of memory banks for tiling 359 + * purposes. 360 + * Only applies to 2D and 3D tiling modes. 361 + * POSSIBLE VALUES: 2,4,8,16 362 + */ 363 + unsigned int num_banks; 364 + /* Specifies the number of tiles in the x direction 365 + * to be incorporated into the same bank. 366 + * Only applies to 2D and 3D tiling modes. 367 + * POSSIBLE VALUES: 1,2,4,8 368 + */ 369 + unsigned int bank_width; 370 + unsigned int bank_width_c; 371 + /* Specifies the number of tiles in the y direction to 372 + * be incorporated into the same bank. 373 + * Only applies to 2D and 3D tiling modes. 374 + * POSSIBLE VALUES: 1,2,4,8 375 + */ 376 + unsigned int bank_height; 377 + unsigned int bank_height_c; 378 + /* Specifies the macro tile aspect ratio. Only applies 379 + * to 2D and 3D tiling modes. 380 + */ 381 + unsigned int tile_aspect; 382 + unsigned int tile_aspect_c; 383 + /* Specifies the number of bytes that will be stored 384 + * contiguously for each tile. 385 + * If the tile data requires more storage than this 386 + * amount, it is split into multiple slices. 387 + * This field must not be larger than 388 + * GB_ADDR_CONFIG.DRAM_ROW_SIZE. 389 + * Only applies to 2D and 3D tiling modes. 390 + * For color render targets, TILE_SPLIT >= 256B. 391 + */ 392 + enum tile_split_values tile_split; 393 + enum tile_split_values tile_split_c; 394 + /* Specifies the addressing within a tile. 395 + * 0x0 - DISPLAY_MICRO_TILING 396 + * 0x1 - THIN_MICRO_TILING 397 + * 0x2 - DEPTH_MICRO_TILING 398 + * 0x3 - ROTATED_MICRO_TILING 399 + */ 400 + enum tile_mode_values tile_mode; 401 + enum tile_mode_values tile_mode_c; 402 + /* Specifies the number of pipes and how they are 403 + * interleaved in the surface. 404 + * Refer to memory addressing document for complete 405 + * details and constraints. 406 + */ 407 + unsigned int pipe_config; 408 + /* Specifies the tiling mode of the surface. 409 + * THIN tiles use an 8x8x1 tile size. 410 + * THICK tiles use an 8x8x4 tile size. 411 + * 2D tiling modes rotate banks for successive Z slices 412 + * 3D tiling modes rotate pipes and banks for Z slices 413 + * Refer to memory addressing document for complete 414 + * details and constraints. 415 + */ 416 + enum array_mode_values array_mode; 417 + } gfx8; 407 418 408 - struct { 409 - enum swizzle_mode_values swizzle; 410 - unsigned int num_pipes; 411 - unsigned int max_compressed_frags; 412 - unsigned int pipe_interleave; 419 + struct { 420 + enum swizzle_mode_values swizzle; 421 + unsigned int num_pipes; 422 + unsigned int max_compressed_frags; 423 + unsigned int pipe_interleave; 413 424 414 - unsigned int num_banks; 415 - unsigned int num_shader_engines; 416 - unsigned int num_rb_per_se; 417 - bool shaderEnable; 425 + unsigned int num_banks; 426 + unsigned int num_shader_engines; 427 + unsigned int num_rb_per_se; 428 + bool shaderEnable; 418 429 419 - bool meta_linear; 420 - bool rb_aligned; 421 - bool pipe_aligned; 422 - unsigned int num_pkrs; 423 - } gfx9;/*gfx9, gfx10 and above*/ 424 - struct { 425 - enum swizzle_mode_addr3_values swizzle; 426 - } gfx_addr3;/*gfx with addr3 and above*/ 430 + bool meta_linear; 431 + bool rb_aligned; 432 + bool pipe_aligned; 433 + unsigned int num_pkrs; 434 + } gfx9;/*gfx9, gfx10 and above*/ 435 + struct { 436 + enum swizzle_mode_addr3_values swizzle; 437 + } gfx_addr3;/*gfx with addr3 and above*/ 438 + }; 427 439 }; 428 440 429 441 /* Rotation angle */ ··· 987 975 struct dc_crtc_timing_flags flags; 988 976 uint32_t dsc_fixed_bits_per_pixel_x16; /* DSC target bitrate in 1/16 of bpp (e.g. 128 -> 8bpp) */ 989 977 struct dc_dsc_config dsc_cfg; 978 + 979 + /* The number of pixels that HBlank has been expanded by from the original EDID timing. */ 980 + uint32_t expanded_hblank; 990 981 }; 991 982 992 983 enum trigger_delay {
+10 -2
drivers/gpu/drm/amd/display/dc/dc_stream.h
··· 56 56 int plane_count; 57 57 int audio_inst; 58 58 struct timing_sync_info timing_sync_info; 59 - struct dc_plane_state *plane_states[MAX_SURFACE_NUM]; 59 + struct dc_plane_state *plane_states[MAX_SURFACES]; 60 60 bool is_abm_supported; 61 61 struct mall_stream_config mall_stream_config; 62 62 bool fpo_in_use; ··· 539 539 struct rect *rect, 540 540 uint8_t phy_id, 541 541 bool is_stop); 542 + 543 + bool dc_stream_forward_multiple_crc_window(struct dc_stream_state *stream, 544 + struct crc_window *window, 545 + uint8_t phy_id, 546 + bool stop); 542 547 #endif 543 548 544 549 bool dc_stream_configure_crc(struct dc *dc, 545 550 struct dc_stream_state *stream, 546 551 struct crc_params *crc_window, 547 552 bool enable, 548 - bool continuous); 553 + bool continuous, 554 + uint8_t idx, 555 + bool reset); 549 556 550 557 bool dc_stream_get_crc(struct dc *dc, 551 558 struct dc_stream_state *stream, 559 + uint8_t idx, 552 560 uint32_t *r_cr, 553 561 uint32_t *g_y, 554 562 uint32_t *b_cb);
+28 -7
drivers/gpu/drm/amd/display/dc/dc_types.h
··· 76 76 unsigned long last_entry_write; 77 77 }; 78 78 79 - #define MAX_SURFACE_NUM 6 80 79 #define NUM_PIXEL_FORMATS 10 81 80 82 81 enum tiling_mode { ··· 874 875 bool is_dp; /* Decoded format */ 875 876 }; 876 877 878 + struct hblank_expansion_dpcd_caps { 879 + bool expansion_supported; 880 + bool reduction_supported; 881 + bool buffer_unit_bytes; /* True: buffer size in bytes. False: buffer size in pixels*/ 882 + bool buffer_per_port; /* True: buffer size per port. False: buffer size per lane*/ 883 + uint32_t buffer_size; /* Add 1 to value and multiply by 32 */ 884 + }; 885 + 877 886 struct dc_golden_table { 878 887 uint16_t dc_golden_table_ver; 879 888 uint32_t aux_dphy_rx_control0_val; ··· 939 932 }; 940 933 941 934 #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) 935 + #define MAX_CRC_WINDOW_NUM 2 936 + 942 937 struct otg_phy_mux { 943 938 uint8_t phy_output_num; 944 939 uint8_t otg_output_num; 940 + }; 941 + 942 + struct crc_window { 943 + struct rect rect; 944 + bool enable; 945 945 }; 946 946 #endif 947 947 ··· 1066 1052 1067 1053 union replay_error_status { 1068 1054 struct { 1069 - unsigned char STATE_TRANSITION_ERROR :1; 1070 - unsigned char LINK_CRC_ERROR :1; 1071 - unsigned char DESYNC_ERROR :1; 1072 - unsigned char RESERVED :5; 1055 + unsigned int STATE_TRANSITION_ERROR :1; 1056 + unsigned int LINK_CRC_ERROR :1; 1057 + unsigned int DESYNC_ERROR :1; 1058 + unsigned int RESERVED_3 :1; 1059 + unsigned int LOW_RR_INCORRECT_VTOTAL :1; 1060 + unsigned int NO_DOUBLED_RR :1; 1061 + unsigned int RESERVED_6_7 :2; 1073 1062 } bits; 1074 1063 unsigned char raw; 1075 1064 }; ··· 1119 1102 union replay_error_status replay_error_status; 1120 1103 /* Replay Low Hz enable Options */ 1121 1104 union replay_low_refresh_rate_enable_options low_rr_enable_options; 1105 + /* Replay coasting vtotal is within low refresh rate range. */ 1106 + bool low_rr_activated; 1122 1107 }; 1123 1108 1124 1109 /* Replay feature flags*/ ··· 1145 1126 uint32_t defer_update_coasting_vtotal_table[PR_COASTING_TYPE_NUM]; 1146 1127 /* Maximum link off frame count */ 1147 1128 uint32_t link_off_frame_count; 1148 - /* Replay pseudo vtotal for abm + ips on full screen video which can improve ips residency */ 1149 - uint16_t abm_with_ips_on_full_screen_video_pseudo_vtotal; 1129 + /* Replay pseudo vtotal for low refresh rate*/ 1130 + uint16_t low_rr_full_screen_video_pseudo_vtotal; 1150 1131 /* Replay last pseudo vtotal set to DMUB */ 1151 1132 uint16_t last_pseudo_vtotal; 1133 + /* Replay desync error */ 1134 + uint32_t replay_desync_error_fail_count; 1152 1135 }; 1153 1136 1154 1137 /* To split out "global" and "per-panel" config settings.
+5 -5
drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
··· 98 98 } 99 99 100 100 static enum mi_tiling_format get_mi_tiling( 101 - union dc_tiling_info *tiling_info) 101 + struct dc_tiling_info *tiling_info) 102 102 { 103 103 switch (tiling_info->gfx8.array_mode) { 104 104 case DC_ARRAY_1D_TILED_THIN1: ··· 133 133 static void dce_mi_program_pte_vm( 134 134 struct mem_input *mi, 135 135 enum surface_pixel_format format, 136 - union dc_tiling_info *tiling_info, 136 + struct dc_tiling_info *tiling_info, 137 137 enum dc_rotation_angle rotation) 138 138 { 139 139 struct dce_mem_input *dce_mi = TO_DCE_MEM_INPUT(mi); ··· 430 430 } 431 431 432 432 static void program_tiling( 433 - struct dce_mem_input *dce_mi, const union dc_tiling_info *info) 433 + struct dce_mem_input *dce_mi, const struct dc_tiling_info *info) 434 434 { 435 435 if (dce_mi->masks->GRPH_SW_MODE) { /* GFX9 */ 436 436 REG_UPDATE_6(GRPH_CONTROL, ··· 650 650 static void dce_mi_program_surface_config( 651 651 struct mem_input *mi, 652 652 enum surface_pixel_format format, 653 - union dc_tiling_info *tiling_info, 653 + struct dc_tiling_info *tiling_info, 654 654 struct plane_size *plane_size, 655 655 enum dc_rotation_angle rotation, 656 656 struct dc_plane_dcc_param *dcc, ··· 670 670 static void dce60_mi_program_surface_config( 671 671 struct mem_input *mi, 672 672 enum surface_pixel_format format, 673 - union dc_tiling_info *tiling_info, 673 + struct dc_tiling_info *tiling_info, 674 674 struct plane_size *plane_size, 675 675 enum dc_rotation_angle rotation, /* not used in DCE6 */ 676 676 struct dc_plane_dcc_param *dcc,
+2 -1
drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c
··· 63 63 64 64 bool should_use_dmub_lock(struct dc_link *link) 65 65 { 66 - if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) 66 + if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1 || 67 + link->psr_settings.psr_version == DC_PSR_VERSION_1) 67 68 return true; 68 69 69 70 if (link->replay_settings.replay_feature_enabled)
+4 -4
drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c
··· 162 162 163 163 static void program_tiling( 164 164 struct dce_mem_input *mem_input110, 165 - const union dc_tiling_info *info, 165 + const struct dc_tiling_info *info, 166 166 const enum surface_pixel_format pixel_format) 167 167 { 168 168 uint32_t value = 0; ··· 523 523 524 524 /* Helper to get table entry from surface info */ 525 525 static const unsigned int *get_dvmm_hw_setting( 526 - union dc_tiling_info *tiling_info, 526 + struct dc_tiling_info *tiling_info, 527 527 enum surface_pixel_format format, 528 528 bool chroma) 529 529 { ··· 563 563 static void dce_mem_input_v_program_pte_vm( 564 564 struct mem_input *mem_input, 565 565 enum surface_pixel_format format, 566 - union dc_tiling_info *tiling_info, 566 + struct dc_tiling_info *tiling_info, 567 567 enum dc_rotation_angle rotation) 568 568 { 569 569 struct dce_mem_input *mem_input110 = TO_DCE_MEM_INPUT(mem_input); ··· 636 636 static void dce_mem_input_v_program_surface_config( 637 637 struct mem_input *mem_input, 638 638 enum surface_pixel_format format, 639 - union dc_tiling_info *tiling_info, 639 + struct dc_tiling_info *tiling_info, 640 640 struct plane_size *plane_size, 641 641 enum dc_rotation_angle rotation, 642 642 struct dc_plane_dcc_param *dcc,
+135 -58
drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
··· 2127 2127 2128 2128 cntl_addr = CRTC_REG(mmCRTC_CRC_CNTL); 2129 2129 2130 - /* First, disable CRC before we configure it. */ 2131 - dm_write_reg(tg->ctx, cntl_addr, 0); 2130 + if (!params->enable || params->reset) 2131 + /* First, disable CRC before we configure it. */ 2132 + dm_write_reg(tg->ctx, cntl_addr, 0); 2132 2133 2133 2134 if (!params->enable) 2134 2135 return true; 2135 2136 2136 2137 /* Program frame boundaries */ 2137 - /* Window A x axis start and end. */ 2138 - value = 0; 2139 - addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_X_CONTROL); 2140 - set_reg_field_value(value, params->windowa_x_start, 2141 - CRTC_CRC0_WINDOWA_X_CONTROL, 2142 - CRTC_CRC0_WINDOWA_X_START); 2143 - set_reg_field_value(value, params->windowa_x_end, 2144 - CRTC_CRC0_WINDOWA_X_CONTROL, 2145 - CRTC_CRC0_WINDOWA_X_END); 2146 - dm_write_reg(tg->ctx, addr, value); 2138 + switch (params->crc_eng_inst) { 2139 + case 0: 2140 + /* Window A x axis start and end. */ 2141 + value = 0; 2142 + addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_X_CONTROL); 2143 + set_reg_field_value(value, params->windowa_x_start, 2144 + CRTC_CRC0_WINDOWA_X_CONTROL, 2145 + CRTC_CRC0_WINDOWA_X_START); 2146 + set_reg_field_value(value, params->windowa_x_end, 2147 + CRTC_CRC0_WINDOWA_X_CONTROL, 2148 + CRTC_CRC0_WINDOWA_X_END); 2149 + dm_write_reg(tg->ctx, addr, value); 2147 2150 2148 - /* Window A y axis start and end. */ 2149 - value = 0; 2150 - addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_Y_CONTROL); 2151 - set_reg_field_value(value, params->windowa_y_start, 2152 - CRTC_CRC0_WINDOWA_Y_CONTROL, 2153 - CRTC_CRC0_WINDOWA_Y_START); 2154 - set_reg_field_value(value, params->windowa_y_end, 2155 - CRTC_CRC0_WINDOWA_Y_CONTROL, 2156 - CRTC_CRC0_WINDOWA_Y_END); 2157 - dm_write_reg(tg->ctx, addr, value); 2151 + /* Window A y axis start and end. */ 2152 + value = 0; 2153 + addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_Y_CONTROL); 2154 + set_reg_field_value(value, params->windowa_y_start, 2155 + CRTC_CRC0_WINDOWA_Y_CONTROL, 2156 + CRTC_CRC0_WINDOWA_Y_START); 2157 + set_reg_field_value(value, params->windowa_y_end, 2158 + CRTC_CRC0_WINDOWA_Y_CONTROL, 2159 + CRTC_CRC0_WINDOWA_Y_END); 2160 + dm_write_reg(tg->ctx, addr, value); 2158 2161 2159 - /* Window B x axis start and end. */ 2160 - value = 0; 2161 - addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_X_CONTROL); 2162 - set_reg_field_value(value, params->windowb_x_start, 2163 - CRTC_CRC0_WINDOWB_X_CONTROL, 2164 - CRTC_CRC0_WINDOWB_X_START); 2165 - set_reg_field_value(value, params->windowb_x_end, 2166 - CRTC_CRC0_WINDOWB_X_CONTROL, 2167 - CRTC_CRC0_WINDOWB_X_END); 2168 - dm_write_reg(tg->ctx, addr, value); 2162 + /* Window B x axis start and end. */ 2163 + value = 0; 2164 + addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_X_CONTROL); 2165 + set_reg_field_value(value, params->windowb_x_start, 2166 + CRTC_CRC0_WINDOWB_X_CONTROL, 2167 + CRTC_CRC0_WINDOWB_X_START); 2168 + set_reg_field_value(value, params->windowb_x_end, 2169 + CRTC_CRC0_WINDOWB_X_CONTROL, 2170 + CRTC_CRC0_WINDOWB_X_END); 2171 + dm_write_reg(tg->ctx, addr, value); 2169 2172 2170 - /* Window B y axis start and end. */ 2171 - value = 0; 2172 - addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_Y_CONTROL); 2173 - set_reg_field_value(value, params->windowb_y_start, 2174 - CRTC_CRC0_WINDOWB_Y_CONTROL, 2175 - CRTC_CRC0_WINDOWB_Y_START); 2176 - set_reg_field_value(value, params->windowb_y_end, 2177 - CRTC_CRC0_WINDOWB_Y_CONTROL, 2178 - CRTC_CRC0_WINDOWB_Y_END); 2179 - dm_write_reg(tg->ctx, addr, value); 2173 + /* Window B y axis start and end. */ 2174 + value = 0; 2175 + addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_Y_CONTROL); 2176 + set_reg_field_value(value, params->windowb_y_start, 2177 + CRTC_CRC0_WINDOWB_Y_CONTROL, 2178 + CRTC_CRC0_WINDOWB_Y_START); 2179 + set_reg_field_value(value, params->windowb_y_end, 2180 + CRTC_CRC0_WINDOWB_Y_CONTROL, 2181 + CRTC_CRC0_WINDOWB_Y_END); 2182 + dm_write_reg(tg->ctx, addr, value); 2180 2183 2181 - /* Set crc mode and selection, and enable. Only using CRC0*/ 2182 - value = 0; 2183 - set_reg_field_value(value, params->continuous_mode ? 1 : 0, 2184 - CRTC_CRC_CNTL, CRTC_CRC_CONT_EN); 2185 - set_reg_field_value(value, params->selection, 2186 - CRTC_CRC_CNTL, CRTC_CRC0_SELECT); 2187 - set_reg_field_value(value, 1, CRTC_CRC_CNTL, CRTC_CRC_EN); 2188 - dm_write_reg(tg->ctx, cntl_addr, value); 2184 + /* Set crc mode and selection, and enable.*/ 2185 + value = 0; 2186 + set_reg_field_value(value, params->continuous_mode ? 1 : 0, 2187 + CRTC_CRC_CNTL, CRTC_CRC_CONT_EN); 2188 + set_reg_field_value(value, params->selection, 2189 + CRTC_CRC_CNTL, CRTC_CRC0_SELECT); 2190 + set_reg_field_value(value, 1, CRTC_CRC_CNTL, CRTC_CRC_EN); 2191 + dm_write_reg(tg->ctx, cntl_addr, value); 2192 + break; 2193 + case 1: 2194 + /* Window A x axis start and end. */ 2195 + value = 0; 2196 + addr = CRTC_REG(mmCRTC_CRC1_WINDOWA_X_CONTROL); 2197 + set_reg_field_value(value, params->windowa_x_start, 2198 + CRTC_CRC1_WINDOWA_X_CONTROL, 2199 + CRTC_CRC1_WINDOWA_X_START); 2200 + set_reg_field_value(value, params->windowa_x_end, 2201 + CRTC_CRC1_WINDOWA_X_CONTROL, 2202 + CRTC_CRC1_WINDOWA_X_END); 2203 + dm_write_reg(tg->ctx, addr, value); 2204 + 2205 + /* Window A y axis start and end. */ 2206 + value = 0; 2207 + addr = CRTC_REG(mmCRTC_CRC1_WINDOWA_Y_CONTROL); 2208 + set_reg_field_value(value, params->windowa_y_start, 2209 + CRTC_CRC1_WINDOWA_Y_CONTROL, 2210 + CRTC_CRC1_WINDOWA_Y_START); 2211 + set_reg_field_value(value, params->windowa_y_end, 2212 + CRTC_CRC1_WINDOWA_Y_CONTROL, 2213 + CRTC_CRC1_WINDOWA_Y_END); 2214 + dm_write_reg(tg->ctx, addr, value); 2215 + 2216 + /* Window B x axis start and end. */ 2217 + value = 0; 2218 + addr = CRTC_REG(mmCRTC_CRC1_WINDOWB_X_CONTROL); 2219 + set_reg_field_value(value, params->windowb_x_start, 2220 + CRTC_CRC1_WINDOWB_X_CONTROL, 2221 + CRTC_CRC1_WINDOWB_X_START); 2222 + set_reg_field_value(value, params->windowb_x_end, 2223 + CRTC_CRC1_WINDOWB_X_CONTROL, 2224 + CRTC_CRC1_WINDOWB_X_END); 2225 + dm_write_reg(tg->ctx, addr, value); 2226 + 2227 + /* Window B y axis start and end. */ 2228 + value = 0; 2229 + addr = CRTC_REG(mmCRTC_CRC1_WINDOWB_Y_CONTROL); 2230 + set_reg_field_value(value, params->windowb_y_start, 2231 + CRTC_CRC1_WINDOWB_Y_CONTROL, 2232 + CRTC_CRC1_WINDOWB_Y_START); 2233 + set_reg_field_value(value, params->windowb_y_end, 2234 + CRTC_CRC1_WINDOWB_Y_CONTROL, 2235 + CRTC_CRC1_WINDOWB_Y_END); 2236 + dm_write_reg(tg->ctx, addr, value); 2237 + 2238 + /* Set crc mode and selection, and enable.*/ 2239 + value = 0; 2240 + set_reg_field_value(value, params->continuous_mode ? 1 : 0, 2241 + CRTC_CRC_CNTL, CRTC_CRC_CONT_EN); 2242 + set_reg_field_value(value, params->selection, 2243 + CRTC_CRC_CNTL, CRTC_CRC1_SELECT); 2244 + set_reg_field_value(value, 1, CRTC_CRC_CNTL, CRTC_CRC_EN); 2245 + dm_write_reg(tg->ctx, cntl_addr, value); 2246 + break; 2247 + default: 2248 + return false; 2249 + } 2189 2250 2190 2251 return true; 2191 2252 } 2192 2253 2193 - bool dce110_get_crc(struct timing_generator *tg, 2254 + bool dce110_get_crc(struct timing_generator *tg, uint8_t idx, 2194 2255 uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb) 2195 2256 { 2196 2257 uint32_t addr = 0; ··· 2267 2206 if (!field) 2268 2207 return false; 2269 2208 2270 - addr = CRTC_REG(mmCRTC_CRC0_DATA_RG); 2271 - value = dm_read_reg(tg->ctx, addr); 2272 - *r_cr = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_R_CR); 2273 - *g_y = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_G_Y); 2209 + switch (idx) { 2210 + case 0: 2211 + addr = CRTC_REG(mmCRTC_CRC0_DATA_RG); 2212 + value = dm_read_reg(tg->ctx, addr); 2213 + *r_cr = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_R_CR); 2214 + *g_y = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_G_Y); 2274 2215 2275 - addr = CRTC_REG(mmCRTC_CRC0_DATA_B); 2276 - value = dm_read_reg(tg->ctx, addr); 2277 - *b_cb = get_reg_field_value(value, CRTC_CRC0_DATA_B, CRC0_B_CB); 2216 + addr = CRTC_REG(mmCRTC_CRC0_DATA_B); 2217 + value = dm_read_reg(tg->ctx, addr); 2218 + *b_cb = get_reg_field_value(value, CRTC_CRC0_DATA_B, CRC0_B_CB); 2219 + break; 2220 + case 1: 2221 + addr = CRTC_REG(mmCRTC_CRC1_DATA_RG); 2222 + value = dm_read_reg(tg->ctx, addr); 2223 + *r_cr = get_reg_field_value(value, CRTC_CRC1_DATA_RG, CRC1_R_CR); 2224 + *g_y = get_reg_field_value(value, CRTC_CRC1_DATA_RG, CRC1_G_Y); 2225 + 2226 + addr = CRTC_REG(mmCRTC_CRC1_DATA_B); 2227 + value = dm_read_reg(tg->ctx, addr); 2228 + *b_cb = get_reg_field_value(value, CRTC_CRC1_DATA_B, CRC1_B_CB); 2229 + break; 2230 + default: 2231 + return false; 2232 + } 2278 2233 2279 2234 return true; 2280 2235 }
+1 -1
drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
··· 286 286 bool dce110_configure_crc(struct timing_generator *tg, 287 287 const struct crc_params *params); 288 288 289 - bool dce110_get_crc(struct timing_generator *tg, 289 + bool dce110_get_crc(struct timing_generator *tg, uint8_t idx, 290 290 uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb); 291 291 292 292 bool dce110_is_two_pixels_per_container(const struct dc_crtc_timing *timing);
+83 -33
drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
··· 1100 1100 if (!dce120_is_tg_enabled(tg)) 1101 1101 return false; 1102 1102 1103 - /* First, disable CRC before we configure it. */ 1104 - dm_write_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL, 1105 - tg110->offsets.crtc, 0); 1103 + if (!params->enable || params->reset) 1104 + /* First, disable CRC before we configure it. */ 1105 + dm_write_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL, 1106 + tg110->offsets.crtc, 0); 1106 1107 1107 1108 if (!params->enable) 1108 1109 return true; 1109 1110 1110 1111 /* Program frame boundaries */ 1111 - /* Window A x axis start and end. */ 1112 - CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_X_CONTROL, 1113 - CRTC_CRC0_WINDOWA_X_START, params->windowa_x_start, 1114 - CRTC_CRC0_WINDOWA_X_END, params->windowa_x_end); 1112 + switch (params->crc_eng_inst) { 1113 + case 0: 1114 + /* Window A x axis start and end. */ 1115 + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_X_CONTROL, 1116 + CRTC_CRC0_WINDOWA_X_START, params->windowa_x_start, 1117 + CRTC_CRC0_WINDOWA_X_END, params->windowa_x_end); 1115 1118 1116 - /* Window A y axis start and end. */ 1117 - CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_Y_CONTROL, 1118 - CRTC_CRC0_WINDOWA_Y_START, params->windowa_y_start, 1119 - CRTC_CRC0_WINDOWA_Y_END, params->windowa_y_end); 1119 + /* Window A y axis start and end. */ 1120 + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_Y_CONTROL, 1121 + CRTC_CRC0_WINDOWA_Y_START, params->windowa_y_start, 1122 + CRTC_CRC0_WINDOWA_Y_END, params->windowa_y_end); 1120 1123 1121 - /* Window B x axis start and end. */ 1122 - CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_X_CONTROL, 1123 - CRTC_CRC0_WINDOWB_X_START, params->windowb_x_start, 1124 - CRTC_CRC0_WINDOWB_X_END, params->windowb_x_end); 1124 + /* Window B x axis start and end. */ 1125 + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_X_CONTROL, 1126 + CRTC_CRC0_WINDOWB_X_START, params->windowb_x_start, 1127 + CRTC_CRC0_WINDOWB_X_END, params->windowb_x_end); 1125 1128 1126 - /* Window B y axis start and end. */ 1127 - CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_Y_CONTROL, 1128 - CRTC_CRC0_WINDOWB_Y_START, params->windowb_y_start, 1129 - CRTC_CRC0_WINDOWB_Y_END, params->windowb_y_end); 1129 + /* Window B y axis start and end. */ 1130 + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_Y_CONTROL, 1131 + CRTC_CRC0_WINDOWB_Y_START, params->windowb_y_start, 1132 + CRTC_CRC0_WINDOWB_Y_END, params->windowb_y_end); 1130 1133 1131 - /* Set crc mode and selection, and enable. Only using CRC0*/ 1132 - CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL, 1133 - CRTC_CRC_EN, params->continuous_mode ? 1 : 0, 1134 - CRTC_CRC0_SELECT, params->selection, 1135 - CRTC_CRC_EN, 1); 1134 + /* Set crc mode and selection, and enable.*/ 1135 + CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL, 1136 + CRTC_CRC_CONT_EN, params->continuous_mode ? 1 : 0, 1137 + CRTC_CRC0_SELECT, params->selection, 1138 + CRTC_CRC_EN, 1); 1139 + break; 1140 + case 1: 1141 + /* Window A x axis start and end. */ 1142 + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWA_X_CONTROL, 1143 + CRTC_CRC1_WINDOWA_X_START, params->windowa_x_start, 1144 + CRTC_CRC1_WINDOWA_X_END, params->windowa_x_end); 1145 + 1146 + /* Window A y axis start and end. */ 1147 + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWA_Y_CONTROL, 1148 + CRTC_CRC1_WINDOWA_Y_START, params->windowa_y_start, 1149 + CRTC_CRC1_WINDOWA_Y_END, params->windowa_y_end); 1150 + 1151 + /* Window B x axis start and end. */ 1152 + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWB_X_CONTROL, 1153 + CRTC_CRC1_WINDOWB_X_START, params->windowb_x_start, 1154 + CRTC_CRC1_WINDOWB_X_END, params->windowb_x_end); 1155 + 1156 + /* Window B y axis start and end. */ 1157 + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWB_Y_CONTROL, 1158 + CRTC_CRC1_WINDOWB_Y_START, params->windowb_y_start, 1159 + CRTC_CRC1_WINDOWB_Y_END, params->windowb_y_end); 1160 + 1161 + /* Set crc mode and selection, and enable */ 1162 + CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL, 1163 + CRTC_CRC_CONT_EN, params->continuous_mode ? 1 : 0, 1164 + CRTC_CRC1_SELECT, params->selection, 1165 + CRTC_CRC_EN, 1); 1166 + break; 1167 + default: 1168 + return false; 1169 + } 1136 1170 1137 1171 return true; 1138 1172 } 1139 1173 1140 - static bool dce120_get_crc(struct timing_generator *tg, uint32_t *r_cr, 1141 - uint32_t *g_y, uint32_t *b_cb) 1174 + static bool dce120_get_crc(struct timing_generator *tg, uint8_t idx, 1175 + uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb) 1142 1176 { 1143 1177 struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); 1144 1178 uint32_t value, field; ··· 1185 1151 if (!field) 1186 1152 return false; 1187 1153 1188 - value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_RG, 1189 - tg110->offsets.crtc); 1190 - *r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_R_CR); 1191 - *g_y = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_G_Y); 1154 + switch (idx) { 1155 + case 0: 1156 + value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_RG, 1157 + tg110->offsets.crtc); 1158 + *r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_R_CR); 1159 + *g_y = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_G_Y); 1192 1160 1193 - value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_B, 1194 - tg110->offsets.crtc); 1195 - *b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_B, CRC0_B_CB); 1161 + value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_B, 1162 + tg110->offsets.crtc); 1163 + *b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_B, CRC0_B_CB); 1164 + break; 1165 + case 1: 1166 + value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC1_DATA_RG, 1167 + tg110->offsets.crtc); 1168 + *r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC1_DATA_RG, CRC1_R_CR); 1169 + *g_y = get_reg_field_value(value, CRTC0_CRTC_CRC1_DATA_RG, CRC1_G_Y); 1170 + 1171 + value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC1_DATA_B, 1172 + tg110->offsets.crtc); 1173 + *b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC1_DATA_B, CRC1_B_CB); 1174 + break; 1175 + default: 1176 + return false; 1177 + } 1196 1178 1197 1179 return true; 1198 1180 }
+5
drivers/gpu/drm/amd/display/dc/dm_helpers.h
··· 158 158 const struct dc_stream_state *stream, 159 159 bool enable 160 160 ); 161 + 162 + bool dm_helpers_dp_write_hblank_reduction( 163 + struct dc_context *ctx, 164 + const struct dc_stream_state *stream); 165 + 161 166 bool dm_helpers_is_dp_sink_present( 162 167 struct dc_link *link); 163 168
+4
drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
··· 1595 1595 unsigned int NonDSCBPP0; 1596 1596 unsigned int NonDSCBPP1; 1597 1597 unsigned int NonDSCBPP2; 1598 + unsigned int NonDSCBPP3 = BPP_INVALID; 1598 1599 1599 1600 if (Format == dm_420) { 1600 1601 NonDSCBPP0 = 12; ··· 1604 1603 MinDSCBPP = 6; 1605 1604 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16; 1606 1605 } else if (Format == dm_444) { 1606 + NonDSCBPP3 = 18; 1607 1607 NonDSCBPP0 = 24; 1608 1608 NonDSCBPP1 = 30; 1609 1609 NonDSCBPP2 = 36; ··· 1669 1667 return NonDSCBPP1; 1670 1668 else if (MaxLinkBPP >= NonDSCBPP0) 1671 1669 return 16.0; 1670 + else if ((Output == dm_dp2p0 || Output == dm_dp) && NonDSCBPP3 != BPP_INVALID && MaxLinkBPP >= NonDSCBPP3) 1671 + return NonDSCBPP3; // Special case to allow 6bpc RGB for DP connections. 1672 1672 else 1673 1673 return BPP_INVALID; 1674 1674 }
+2 -2
drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
··· 195 195 .dcn_downspread_percent = 0.5, 196 196 .gpuvm_min_page_size_bytes = 4096, 197 197 .hostvm_min_page_size_bytes = 4096, 198 - .do_urgent_latency_adjustment = 1, 198 + .do_urgent_latency_adjustment = 0, 199 199 .urgent_latency_adjustment_fabric_clock_component_us = 0, 200 - .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, 200 + .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, 201 201 }; 202 202 203 203 void dcn35_build_wm_range_table_fpu(struct clk_mgr *clk_mgr)
+8
drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
··· 66 66 67 67 static inline double dml_ceil(double a, double granularity) 68 68 { 69 + if (granularity == 0) 70 + return 0; 69 71 return (double) dcn_bw_ceil2(a, granularity); 70 72 } 71 73 72 74 static inline double dml_floor(double a, double granularity) 73 75 { 76 + if (granularity == 0) 77 + return 0; 74 78 return (double) dcn_bw_floor2(a, granularity); 75 79 } 76 80 ··· 118 114 119 115 static inline double dml_ceil_ex(double x, double granularity) 120 116 { 117 + if (granularity == 0) 118 + return 0; 121 119 return (double) dcn_bw_ceil2(x, granularity); 122 120 } 123 121 124 122 static inline double dml_floor_ex(double x, double granularity) 125 123 { 124 + if (granularity == 0) 125 + return 0; 126 126 return (double) dcn_bw_floor2(x, granularity); 127 127 } 128 128
+4
drivers/gpu/drm/amd/display/dc/dml2/Makefile
··· 29 29 30 30 ifneq ($(CONFIG_FRAME_WARN),0) 31 31 ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) 32 + ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_COMPILE_TEST),yy) 33 + frame_warn_flag := -Wframe-larger-than=4096 34 + else 32 35 frame_warn_flag := -Wframe-larger-than=3072 36 + endif 33 37 else 34 38 frame_warn_flag := -Wframe-larger-than=2048 35 39 endif
+20 -82
drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
··· 10 10 #include "dml21_utils.h" 11 11 #include "dml21_translation_helper.h" 12 12 #include "bounding_boxes/dcn4_soc_bb.h" 13 - #include "bounding_boxes/dcn3_soc_bb.h" 14 13 15 14 static void dml21_init_socbb_params(struct dml2_initialize_instance_in_out *dml_init, 16 15 const struct dml2_configuration_options *config, ··· 19 20 const struct dml2_soc_qos_parameters *qos_params; 20 21 21 22 switch (in_dc->ctx->dce_version) { 22 - case DCN_VERSION_3_2: // TODO : Temporary for N-1 validation. Remove this after N-1 validation phase is complete. 23 - soc_bb = &dml2_socbb_dcn31; 24 - qos_params = &dml_dcn31_soc_qos_params; 25 - break; 26 23 case DCN_VERSION_4_01: 27 24 default: 28 25 if (config->bb_from_dmub) ··· 55 60 const struct dml2_ip_capabilities *ip_caps; 56 61 57 62 switch (in_dc->ctx->dce_version) { 58 - case DCN_VERSION_3_2: // TODO : Temporary for N-1 validation. Remove this after N-1 validation phase is complete. 59 - ip_caps = &dml2_dcn31_max_ip_caps; 60 - break; 61 63 case DCN_VERSION_4_01: 62 64 default: 63 65 ip_caps = &dml2_dcn401_max_ip_caps; ··· 294 302 dml_soc_bb->power_management_parameters.stutter_exit_latency_us = 295 303 (in_dc->ctx->dc_bios->bb_info.dram_sr_exit_latency_100ns + 9) / 10; 296 304 297 - if (in_dc->ctx->dc_bios->vram_info.num_chans) { 305 + if (dc_bw_params->num_channels) { 306 + dml_clk_table->dram_config.channel_count = dc_bw_params->num_channels; 307 + dml_soc_bb->mall_allocated_for_dcn_mbytes = in_dc->caps.mall_size_total / 1048576; 308 + } else if (in_dc->ctx->dc_bios->vram_info.num_chans) { 298 309 dml_clk_table->dram_config.channel_count = in_dc->ctx->dc_bios->vram_info.num_chans; 299 310 dml_soc_bb->mall_allocated_for_dcn_mbytes = in_dc->caps.mall_size_total / 1048576; 300 311 } 301 312 302 - if (in_dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) { 313 + if (dc_bw_params->dram_channel_width_bytes) { 314 + dml_clk_table->dram_config.channel_width_bytes = dc_bw_params->dram_channel_width_bytes; 315 + } else if (in_dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) { 303 316 dml_clk_table->dram_config.channel_width_bytes = in_dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; 304 317 } 305 318 ··· 718 721 surface->dcc.informative.fraction_of_zero_size_request_plane1 = plane_state->dcc.independent_64b_blks_c; 719 722 surface->dcc.plane0.pitch = plane_state->dcc.meta_pitch; 720 723 surface->dcc.plane1.pitch = plane_state->dcc.meta_pitch_c; 721 - if (in_dc->ctx->dce_version < DCN_VERSION_4_01) { 722 - /* needed for N-1 testing */ 724 + 725 + // Update swizzle / array mode based on the gfx_format 726 + switch (plane_state->tiling_info.gfxversion) { 727 + case DcGfxVersion7: 728 + case DcGfxVersion8: 729 + // Placeholder for programming the array_mode 730 + break; 731 + case DcGfxVersion9: 732 + case DcGfxVersion10: 733 + case DcGfxVersion11: 723 734 surface->tiling = gfx9_to_dml2_swizzle_mode(plane_state->tiling_info.gfx9.swizzle); 724 - } else { 735 + break; 736 + case DcGfxAddr3: 725 737 surface->tiling = gfx_addr3_to_dml2_swizzle_mode(plane_state->tiling_info.gfx_addr3.swizzle); 738 + break; 726 739 } 727 740 } 728 741 ··· 1088 1081 context->bw_ctx.bw.dcn.clk.subvp_prefetch_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz; 1089 1082 } 1090 1083 1091 - void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_watermarks *watermark, enum dml2_dchub_watermark_reg_set_index reg_set_idx, struct dml2_context *in_ctx) 1092 - { 1093 - struct dml2_core_internal_display_mode_lib *mode_lib = &in_ctx->v21.dml_init.dml2_instance->core_instance.clean_me_up.mode_lib; 1094 - double refclk_freq_in_mhz = (in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz > 0) ? (double)in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; 1095 - 1096 - if (reg_set_idx >= DML2_DCHUB_WATERMARK_SET_NUM) { 1097 - /* invalid register set index */ 1098 - return; 1099 - } 1100 - 1101 - /* convert to legacy format (time in ns) */ 1102 - watermark->urgent_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].urgent / refclk_freq_in_mhz) * 1000.0; 1103 - watermark->pte_meta_urgent_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].urgent / refclk_freq_in_mhz) * 1000.0; 1104 - watermark->cstate_pstate.cstate_enter_plus_exit_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].sr_enter / refclk_freq_in_mhz) * 1000.0; 1105 - watermark->cstate_pstate.cstate_exit_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].sr_exit / refclk_freq_in_mhz) * 1000.0; 1106 - watermark->cstate_pstate.pstate_change_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].uclk_pstate / refclk_freq_in_mhz) * 1000.0; 1107 - watermark->urgent_latency_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].urgent / refclk_freq_in_mhz) * 1000.0; 1108 - watermark->cstate_pstate.fclk_pstate_change_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].fclk_pstate / refclk_freq_in_mhz) * 1000.0; 1109 - watermark->frac_urg_bw_flip = in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].frac_urg_bw_flip; 1110 - watermark->frac_urg_bw_nom = in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].frac_urg_bw_nom; 1111 - } 1112 - 1113 1084 static struct dml2_dchub_watermark_regs *wm_set_index_to_dc_wm_set(union dcn_watermark_set *watermarks, const enum dml2_dchub_watermark_reg_set_index wm_index) 1114 1085 { 1115 1086 struct dml2_dchub_watermark_regs *wm_regs = NULL; ··· 1129 1144 &programming->global_regs.wm_regs[wm_index], 1130 1145 sizeof(struct dml2_dchub_watermark_regs)); 1131 1146 } 1132 - } 1133 - 1134 - 1135 - void dml21_populate_pipe_ctx_dlg_params(struct dml2_context *dml_ctx, struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming) 1136 - { 1137 - unsigned int hactive, vactive, hblank_start, vblank_start, hblank_end, vblank_end; 1138 - struct dc_crtc_timing *timing = &pipe_ctx->stream->timing; 1139 - union dml2_global_sync_programming *global_sync = &stream_programming->global_sync; 1140 - 1141 - hactive = timing->h_addressable + timing->h_border_left + timing->h_border_right + pipe_ctx->hblank_borrow; 1142 - vactive = timing->v_addressable + timing->v_border_bottom + timing->v_border_top; 1143 - hblank_start = pipe_ctx->stream->timing.h_total - pipe_ctx->stream->timing.h_front_porch; 1144 - vblank_start = pipe_ctx->stream->timing.v_total - pipe_ctx->stream->timing.v_front_porch; 1145 - 1146 - hblank_end = hblank_start - timing->h_addressable - timing->h_border_left - timing->h_border_right - pipe_ctx->hblank_borrow; 1147 - vblank_end = vblank_start - timing->v_addressable - timing->v_border_top - timing->v_border_bottom; 1148 - 1149 - if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) { 1150 - /* phantom has its own global sync */ 1151 - global_sync = &stream_programming->phantom_stream.global_sync; 1152 - } 1153 - 1154 - pipe_ctx->pipe_dlg_param.vstartup_start = global_sync->dcn4x.vstartup_lines; 1155 - pipe_ctx->pipe_dlg_param.vupdate_offset = global_sync->dcn4x.vupdate_offset_pixels; 1156 - pipe_ctx->pipe_dlg_param.vupdate_width = global_sync->dcn4x.vupdate_vupdate_width_pixels; 1157 - pipe_ctx->pipe_dlg_param.vready_offset = global_sync->dcn4x.vready_offset_pixels; 1158 - pipe_ctx->pipe_dlg_param.pstate_keepout = global_sync->dcn4x.pstate_keepout_start_lines; 1159 - 1160 - pipe_ctx->pipe_dlg_param.otg_inst = pipe_ctx->stream_res.tg->inst; 1161 - 1162 - pipe_ctx->pipe_dlg_param.hactive = hactive; 1163 - pipe_ctx->pipe_dlg_param.vactive = vactive; 1164 - pipe_ctx->pipe_dlg_param.htotal = pipe_ctx->stream->timing.h_total; 1165 - pipe_ctx->pipe_dlg_param.vtotal = pipe_ctx->stream->timing.v_total; 1166 - pipe_ctx->pipe_dlg_param.hblank_end = hblank_end; 1167 - pipe_ctx->pipe_dlg_param.vblank_end = vblank_end; 1168 - pipe_ctx->pipe_dlg_param.hblank_start = hblank_start; 1169 - pipe_ctx->pipe_dlg_param.vblank_start = vblank_start; 1170 - pipe_ctx->pipe_dlg_param.vfront_porch = pipe_ctx->stream->timing.v_front_porch; 1171 - pipe_ctx->pipe_dlg_param.pixel_rate_mhz = pipe_ctx->stream->timing.pix_clk_100hz / 10000.00; 1172 - pipe_ctx->pipe_dlg_param.refresh_rate = ((timing->pix_clk_100hz * 100) / timing->h_total) / timing->v_total; 1173 - pipe_ctx->pipe_dlg_param.vtotal_max = pipe_ctx->stream->adjust.v_total_max; 1174 - pipe_ctx->pipe_dlg_param.vtotal_min = pipe_ctx->stream->adjust.v_total_min; 1175 - pipe_ctx->pipe_dlg_param.recout_height = pipe_ctx->plane_res.scl_data.recout.height; 1176 - pipe_ctx->pipe_dlg_param.recout_width = pipe_ctx->plane_res.scl_data.recout.width; 1177 - pipe_ctx->pipe_dlg_param.full_recout_height = pipe_ctx->plane_res.scl_data.recout.height; 1178 - pipe_ctx->pipe_dlg_param.full_recout_width = pipe_ctx->plane_res.scl_data.recout.width; 1179 1147 } 1180 1148 1181 1149 void dml21_map_hw_resources(struct dml2_context *dml_ctx)
-2
drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h
··· 21 21 void dml21_initialize_ip_params(struct dml2_initialize_instance_in_out *dml_init, const struct dml2_configuration_options *config, const struct dc *in_dc); 22 22 bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx); 23 23 void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state *context); 24 - void dml21_populate_pipe_ctx_dlg_params(struct dml2_context *dml_ctx, struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming); 25 - void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_watermarks *watermark, enum dml2_dchub_watermark_reg_set_index reg_set_idx, struct dml2_context *in_ctx); 26 24 void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx); 27 25 void dml21_map_hw_resources(struct dml2_context *dml_ctx); 28 26 void dml21_get_pipe_mcache_config(struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, struct dml2_pipe_configuration_descriptor *mcache_pipe_config);
+13 -112
drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c
··· 142 142 return num_pipes; 143 143 } 144 144 145 - 146 - void dml21_update_pipe_ctx_dchub_regs(struct dml2_display_rq_regs *rq_regs, 147 - struct dml2_display_dlg_regs *disp_dlg_regs, 148 - struct dml2_display_ttu_regs *disp_ttu_regs, 149 - struct pipe_ctx *out) 145 + void dml21_pipe_populate_global_sync(struct dml2_context *dml_ctx, 146 + struct dc_state *context, 147 + struct pipe_ctx *pipe_ctx, 148 + struct dml2_per_stream_programming *stream_programming) 150 149 { 151 - memset(&out->rq_regs, 0, sizeof(out->rq_regs)); 152 - out->rq_regs.rq_regs_l.chunk_size = rq_regs->rq_regs_l.chunk_size; 153 - out->rq_regs.rq_regs_l.min_chunk_size = rq_regs->rq_regs_l.min_chunk_size; 154 - //out->rq_regs.rq_regs_l.meta_chunk_size = rq_regs->rq_regs_l.meta_chunk_size; 155 - //out->rq_regs.rq_regs_l.min_meta_chunk_size = rq_regs->rq_regs_l.min_meta_chunk_size; 156 - out->rq_regs.rq_regs_l.dpte_group_size = rq_regs->rq_regs_l.dpte_group_size; 157 - out->rq_regs.rq_regs_l.mpte_group_size = rq_regs->rq_regs_l.mpte_group_size; 158 - out->rq_regs.rq_regs_l.swath_height = rq_regs->rq_regs_l.swath_height; 159 - out->rq_regs.rq_regs_l.pte_row_height_linear = rq_regs->rq_regs_l.pte_row_height_linear; 150 + union dml2_global_sync_programming *global_sync = &stream_programming->global_sync; 160 151 161 - out->rq_regs.rq_regs_c.chunk_size = rq_regs->rq_regs_c.chunk_size; 162 - out->rq_regs.rq_regs_c.min_chunk_size = rq_regs->rq_regs_c.min_chunk_size; 163 - //out->rq_regs.rq_regs_c.meta_chunk_size = rq_regs->rq_regs_c.meta_chunk_size; 164 - //out->rq_regs.rq_regs_c.min_meta_chunk_size = rq_regs->rq_regs_c.min_meta_chunk_size; 165 - out->rq_regs.rq_regs_c.dpte_group_size = rq_regs->rq_regs_c.dpte_group_size; 166 - out->rq_regs.rq_regs_c.mpte_group_size = rq_regs->rq_regs_c.mpte_group_size; 167 - out->rq_regs.rq_regs_c.swath_height = rq_regs->rq_regs_c.swath_height; 168 - out->rq_regs.rq_regs_c.pte_row_height_linear = rq_regs->rq_regs_c.pte_row_height_linear; 152 + if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) { 153 + /* phantom has its own global sync */ 154 + global_sync = &stream_programming->phantom_stream.global_sync; 155 + } 169 156 170 - out->rq_regs.drq_expansion_mode = rq_regs->drq_expansion_mode; 171 - out->rq_regs.prq_expansion_mode = rq_regs->prq_expansion_mode; 172 - //out->rq_regs.mrq_expansion_mode = rq_regs->mrq_expansion_mode; 173 - out->rq_regs.crq_expansion_mode = rq_regs->crq_expansion_mode; 174 - out->rq_regs.plane1_base_address = rq_regs->plane1_base_address; 175 - out->unbounded_req = rq_regs->unbounded_request_enabled; 176 - 177 - memset(&out->dlg_regs, 0, sizeof(out->dlg_regs)); 178 - out->dlg_regs.refcyc_h_blank_end = disp_dlg_regs->refcyc_h_blank_end; 179 - out->dlg_regs.dlg_vblank_end = disp_dlg_regs->dlg_vblank_end; 180 - out->dlg_regs.min_dst_y_next_start = disp_dlg_regs->min_dst_y_next_start; 181 - out->dlg_regs.refcyc_per_htotal = disp_dlg_regs->refcyc_per_htotal; 182 - out->dlg_regs.refcyc_x_after_scaler = disp_dlg_regs->refcyc_x_after_scaler; 183 - out->dlg_regs.dst_y_after_scaler = disp_dlg_regs->dst_y_after_scaler; 184 - out->dlg_regs.dst_y_prefetch = disp_dlg_regs->dst_y_prefetch; 185 - out->dlg_regs.dst_y_per_vm_vblank = disp_dlg_regs->dst_y_per_vm_vblank; 186 - out->dlg_regs.dst_y_per_row_vblank = disp_dlg_regs->dst_y_per_row_vblank; 187 - out->dlg_regs.dst_y_per_vm_flip = disp_dlg_regs->dst_y_per_vm_flip; 188 - out->dlg_regs.dst_y_per_row_flip = disp_dlg_regs->dst_y_per_row_flip; 189 - out->dlg_regs.ref_freq_to_pix_freq = disp_dlg_regs->ref_freq_to_pix_freq; 190 - out->dlg_regs.vratio_prefetch = disp_dlg_regs->vratio_prefetch; 191 - out->dlg_regs.vratio_prefetch_c = disp_dlg_regs->vratio_prefetch_c; 192 - out->dlg_regs.refcyc_per_tdlut_group = disp_dlg_regs->refcyc_per_tdlut_group; 193 - out->dlg_regs.refcyc_per_pte_group_vblank_l = disp_dlg_regs->refcyc_per_pte_group_vblank_l; 194 - out->dlg_regs.refcyc_per_pte_group_vblank_c = disp_dlg_regs->refcyc_per_pte_group_vblank_c; 195 - //out->dlg_regs.refcyc_per_meta_chunk_vblank_l = disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; 196 - //out->dlg_regs.refcyc_per_meta_chunk_vblank_c = disp_dlg_regs->refcyc_per_meta_chunk_vblank_c; 197 - out->dlg_regs.refcyc_per_pte_group_flip_l = disp_dlg_regs->refcyc_per_pte_group_flip_l; 198 - out->dlg_regs.refcyc_per_pte_group_flip_c = disp_dlg_regs->refcyc_per_pte_group_flip_c; 199 - //out->dlg_regs.refcyc_per_meta_chunk_flip_l = disp_dlg_regs->refcyc_per_meta_chunk_flip_l; 200 - //out->dlg_regs.refcyc_per_meta_chunk_flip_c = disp_dlg_regs->refcyc_per_meta_chunk_flip_c; 201 - out->dlg_regs.dst_y_per_pte_row_nom_l = disp_dlg_regs->dst_y_per_pte_row_nom_l; 202 - out->dlg_regs.dst_y_per_pte_row_nom_c = disp_dlg_regs->dst_y_per_pte_row_nom_c; 203 - out->dlg_regs.refcyc_per_pte_group_nom_l = disp_dlg_regs->refcyc_per_pte_group_nom_l; 204 - out->dlg_regs.refcyc_per_pte_group_nom_c = disp_dlg_regs->refcyc_per_pte_group_nom_c; 205 - //out->dlg_regs.dst_y_per_meta_row_nom_l = disp_dlg_regs->dst_y_per_meta_row_nom_l; 206 - //out->dlg_regs.dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_c; 207 - //out->dlg_regs.refcyc_per_meta_chunk_nom_l = disp_dlg_regs->refcyc_per_meta_chunk_nom_l; 208 - //out->dlg_regs.refcyc_per_meta_chunk_nom_c = disp_dlg_regs->refcyc_per_meta_chunk_nom_c; 209 - out->dlg_regs.refcyc_per_line_delivery_pre_l = disp_dlg_regs->refcyc_per_line_delivery_pre_l; 210 - out->dlg_regs.refcyc_per_line_delivery_pre_c = disp_dlg_regs->refcyc_per_line_delivery_pre_c; 211 - out->dlg_regs.refcyc_per_line_delivery_l = disp_dlg_regs->refcyc_per_line_delivery_l; 212 - out->dlg_regs.refcyc_per_line_delivery_c = disp_dlg_regs->refcyc_per_line_delivery_c; 213 - out->dlg_regs.refcyc_per_vm_group_vblank = disp_dlg_regs->refcyc_per_vm_group_vblank; 214 - out->dlg_regs.refcyc_per_vm_group_flip = disp_dlg_regs->refcyc_per_vm_group_flip; 215 - out->dlg_regs.refcyc_per_vm_req_vblank = disp_dlg_regs->refcyc_per_vm_req_vblank; 216 - out->dlg_regs.refcyc_per_vm_req_flip = disp_dlg_regs->refcyc_per_vm_req_flip; 217 - out->dlg_regs.dst_y_offset_cur0 = disp_dlg_regs->dst_y_offset_cur0; 218 - out->dlg_regs.chunk_hdl_adjust_cur0 = disp_dlg_regs->chunk_hdl_adjust_cur0; 219 - //out->dlg_regs.dst_y_offset_cur1 = disp_dlg_regs->dst_y_offset_cur1; 220 - //out->dlg_regs.chunk_hdl_adjust_cur1 = disp_dlg_regs->chunk_hdl_adjust_cur1; 221 - out->dlg_regs.vready_after_vcount0 = disp_dlg_regs->vready_after_vcount0; 222 - out->dlg_regs.dst_y_delta_drq_limit = disp_dlg_regs->dst_y_delta_drq_limit; 223 - out->dlg_regs.refcyc_per_vm_dmdata = disp_dlg_regs->refcyc_per_vm_dmdata; 224 - out->dlg_regs.dmdata_dl_delta = disp_dlg_regs->dmdata_dl_delta; 225 - 226 - memset(&out->ttu_regs, 0, sizeof(out->ttu_regs)); 227 - out->ttu_regs.qos_level_low_wm = disp_ttu_regs->qos_level_low_wm; 228 - out->ttu_regs.qos_level_high_wm = disp_ttu_regs->qos_level_high_wm; 229 - out->ttu_regs.min_ttu_vblank = disp_ttu_regs->min_ttu_vblank; 230 - out->ttu_regs.qos_level_flip = disp_ttu_regs->qos_level_flip; 231 - out->ttu_regs.refcyc_per_req_delivery_l = disp_ttu_regs->refcyc_per_req_delivery_l; 232 - out->ttu_regs.refcyc_per_req_delivery_c = disp_ttu_regs->refcyc_per_req_delivery_c; 233 - out->ttu_regs.refcyc_per_req_delivery_cur0 = disp_ttu_regs->refcyc_per_req_delivery_cur0; 234 - //out->ttu_regs.refcyc_per_req_delivery_cur1 = disp_ttu_regs->refcyc_per_req_delivery_cur1; 235 - out->ttu_regs.refcyc_per_req_delivery_pre_l = disp_ttu_regs->refcyc_per_req_delivery_pre_l; 236 - out->ttu_regs.refcyc_per_req_delivery_pre_c = disp_ttu_regs->refcyc_per_req_delivery_pre_c; 237 - out->ttu_regs.refcyc_per_req_delivery_pre_cur0 = disp_ttu_regs->refcyc_per_req_delivery_pre_cur0; 238 - //out->ttu_regs.refcyc_per_req_delivery_pre_cur1 = disp_ttu_regs->refcyc_per_req_delivery_pre_cur1; 239 - out->ttu_regs.qos_level_fixed_l = disp_ttu_regs->qos_level_fixed_l; 240 - out->ttu_regs.qos_level_fixed_c = disp_ttu_regs->qos_level_fixed_c; 241 - out->ttu_regs.qos_level_fixed_cur0 = disp_ttu_regs->qos_level_fixed_cur0; 242 - //out->ttu_regs.qos_level_fixed_cur1 = disp_ttu_regs->qos_level_fixed_cur1; 243 - out->ttu_regs.qos_ramp_disable_l = disp_ttu_regs->qos_ramp_disable_l; 244 - out->ttu_regs.qos_ramp_disable_c = disp_ttu_regs->qos_ramp_disable_c; 245 - out->ttu_regs.qos_ramp_disable_cur0 = disp_ttu_regs->qos_ramp_disable_cur0; 246 - //out->ttu_regs.qos_ramp_disable_cur1 = disp_ttu_regs->qos_ramp_disable_cur1; 157 + memcpy(&pipe_ctx->global_sync, 158 + global_sync, 159 + sizeof(union dml2_global_sync_programming)); 247 160 } 248 161 249 162 void dml21_populate_mall_allocation_size(struct dc_state *context, ··· 214 301 { 215 302 unsigned int pipe_reg_index = 0; 216 303 217 - dml21_populate_pipe_ctx_dlg_params(dml_ctx, context, pipe_ctx, stream_prog); 304 + dml21_pipe_populate_global_sync(dml_ctx, context, pipe_ctx, stream_prog); 218 305 find_pipe_regs_idx(dml_ctx, pipe_ctx, &pipe_reg_index); 219 306 220 307 if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) { 221 308 memcpy(&pipe_ctx->hubp_regs, pln_prog->phantom_plane.pipe_regs[pipe_reg_index], sizeof(struct dml2_dchub_per_pipe_register_set)); 222 309 pipe_ctx->unbounded_req = false; 223 - 224 - /* legacy only, should be removed later */ 225 - dml21_update_pipe_ctx_dchub_regs(&pln_prog->phantom_plane.pipe_regs[pipe_reg_index]->rq_regs, 226 - &pln_prog->phantom_plane.pipe_regs[pipe_reg_index]->dlg_regs, 227 - &pln_prog->phantom_plane.pipe_regs[pipe_reg_index]->ttu_regs, pipe_ctx); 228 - 229 310 pipe_ctx->det_buffer_size_kb = 0; 230 311 } else { 231 312 memcpy(&pipe_ctx->hubp_regs, pln_prog->pipe_regs[pipe_reg_index], sizeof(struct dml2_dchub_per_pipe_register_set)); 232 313 pipe_ctx->unbounded_req = pln_prog->pipe_regs[pipe_reg_index]->rq_regs.unbounded_request_enabled; 233 - 234 - /* legacy only, should be removed later */ 235 - dml21_update_pipe_ctx_dchub_regs(&pln_prog->pipe_regs[pipe_reg_index]->rq_regs, 236 - &pln_prog->pipe_regs[pipe_reg_index]->dlg_regs, 237 - &pln_prog->pipe_regs[pipe_reg_index]->ttu_regs, pipe_ctx); 238 - 239 314 pipe_ctx->det_buffer_size_kb = pln_prog->pipe_regs[pipe_reg_index]->det_size * 64; 240 315 } 241 316
+4 -4
drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h
··· 18 18 int dml21_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id); 19 19 int dml21_find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int plane_id); 20 20 bool dml21_get_plane_id(const struct dc_state *state, const struct dc_plane_state *plane, unsigned int *plane_id); 21 - void dml21_update_pipe_ctx_dchub_regs(struct dml2_display_rq_regs *rq_regs, 22 - struct dml2_display_dlg_regs *disp_dlg_regs, 23 - struct dml2_display_ttu_regs *disp_ttu_regs, 24 - struct pipe_ctx *out); 21 + void dml21_pipe_populate_global_sync(struct dml2_context *dml_ctx, 22 + struct dc_state *context, 23 + struct pipe_ctx *pipe_ctx, 24 + struct dml2_per_stream_programming *stream_programming); 25 25 void dml21_populate_mall_allocation_size(struct dc_state *context, 26 26 struct dml2_context *in_ctx, 27 27 struct dml2_per_plane_programming *pln_prog,
-8
drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c
··· 75 75 { 76 76 switch (in_dc->ctx->dce_version) { 77 77 case DCN_VERSION_4_01: 78 - case DCN_VERSION_3_2: // TODO : Temporary for N-1 validation. Remove this after N-1 validation phase is complete. 79 78 (*dml_ctx)->v21.dml_init.options.project_id = dml2_project_dcn4x_stage2_auto_drr_svp; 80 79 break; 81 80 default: ··· 232 233 dml21_calculate_rq_and_dlg_params(in_dc, context, &context->res_ctx, dml_ctx, in_dc->res_pool->pipe_count); 233 234 dml21_copy_clocks_to_dc_state(dml_ctx, context); 234 235 dml21_extract_watermark_sets(in_dc, &context->bw_ctx.bw.dcn.watermarks, dml_ctx); 235 - if (in_dc->ctx->dce_version == DCN_VERSION_3_2) { 236 - dml21_extract_legacy_watermark_set(in_dc, &context->bw_ctx.bw.dcn.watermarks.a, DML2_DCHUB_WATERMARK_SET_A, dml_ctx); 237 - dml21_extract_legacy_watermark_set(in_dc, &context->bw_ctx.bw.dcn.watermarks.b, DML2_DCHUB_WATERMARK_SET_A, dml_ctx); 238 - dml21_extract_legacy_watermark_set(in_dc, &context->bw_ctx.bw.dcn.watermarks.c, DML2_DCHUB_WATERMARK_SET_A, dml_ctx); 239 - dml21_extract_legacy_watermark_set(in_dc, &context->bw_ctx.bw.dcn.watermarks.d, DML2_DCHUB_WATERMARK_SET_A, dml_ctx); 240 - } 241 - 242 236 dml21_build_fams2_programming(in_dc, context, dml_ctx); 243 237 } 244 238
-401
drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h
··· 1 - /* 2 - * Copyright 2022 Advanced Micro Devices, Inc. 3 - * 4 - * Permission is hereby granted, free of charge, to any person obtaining a 5 - * copy of this software and associated documentation files (the "Software"), 6 - * to deal in the Software without restriction, including without limitation 7 - * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 - * and/or sell copies of the Software, and to permit persons to whom the 9 - * Software is furnished to do so, subject to the following conditions: 10 - * 11 - * The above copyright notice and this permission notice shall be included in 12 - * all copies or substantial portions of the Software. 13 - * 14 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 - * OTHER DEALINGS IN THE SOFTWARE. 21 - * 22 - * Authors: AMD 23 - * 24 - */ 25 - 26 - #ifndef __DML_DML_DCN3_SOC_BB__ 27 - #define __DML_DML_DCN3_SOC_BB__ 28 - 29 - #include "dml_top_soc_parameter_types.h" 30 - 31 - static const struct dml2_soc_qos_parameters dml_dcn31_soc_qos_params = { 32 - .derate_table = { 33 - .system_active_urgent = { 34 - .dram_derate_percent_pixel = 22, 35 - .dram_derate_percent_vm = 0, 36 - .dram_derate_percent_pixel_and_vm = 0, 37 - .fclk_derate_percent = 76, 38 - .dcfclk_derate_percent = 100, 39 - }, 40 - .system_active_average = { 41 - .dram_derate_percent_pixel = 17, 42 - .dram_derate_percent_vm = 0, 43 - .dram_derate_percent_pixel_and_vm = 0, 44 - .fclk_derate_percent = 57, 45 - .dcfclk_derate_percent = 75, 46 - }, 47 - .dcn_mall_prefetch_urgent = { 48 - .dram_derate_percent_pixel = 22, 49 - .dram_derate_percent_vm = 0, 50 - .dram_derate_percent_pixel_and_vm = 0, 51 - .fclk_derate_percent = 76, 52 - .dcfclk_derate_percent = 100, 53 - }, 54 - .dcn_mall_prefetch_average = { 55 - .dram_derate_percent_pixel = 17, 56 - .dram_derate_percent_vm = 0, 57 - .dram_derate_percent_pixel_and_vm = 0, 58 - .fclk_derate_percent = 57, 59 - .dcfclk_derate_percent = 75, 60 - }, 61 - .system_idle_average = { 62 - .dram_derate_percent_pixel = 17, 63 - .dram_derate_percent_vm = 0, 64 - .dram_derate_percent_pixel_and_vm = 0, 65 - .fclk_derate_percent = 57, 66 - .dcfclk_derate_percent = 100, 67 - }, 68 - }, 69 - .writeback = { 70 - .base_latency_us = 12, 71 - .scaling_factor_us = 0, 72 - .scaling_factor_mhz = 0, 73 - }, 74 - .qos_params = { 75 - .dcn4x = { 76 - .df_qos_response_time_fclk_cycles = 300, 77 - .max_round_trip_to_furthest_cs_fclk_cycles = 350, 78 - .mall_overhead_fclk_cycles = 50, 79 - .meta_trip_adder_fclk_cycles = 36, 80 - .average_transport_distance_fclk_cycles = 257, 81 - .umc_urgent_ramp_latency_margin = 50, 82 - .umc_max_latency_margin = 30, 83 - .umc_average_latency_margin = 20, 84 - .fabric_max_transport_latency_margin = 20, 85 - .fabric_average_transport_latency_margin = 10, 86 - 87 - .per_uclk_dpm_params = { 88 - { 89 - .minimum_uclk_khz = 97, 90 - .urgent_ramp_uclk_cycles = 472, 91 - .trip_to_memory_uclk_cycles = 827, 92 - .meta_trip_to_memory_uclk_cycles = 827, 93 - .maximum_latency_when_urgent_uclk_cycles = 72, 94 - .average_latency_when_urgent_uclk_cycles = 61, 95 - .maximum_latency_when_non_urgent_uclk_cycles = 827, 96 - .average_latency_when_non_urgent_uclk_cycles = 118, 97 - }, 98 - { 99 - .minimum_uclk_khz = 435, 100 - .urgent_ramp_uclk_cycles = 546, 101 - .trip_to_memory_uclk_cycles = 848, 102 - .meta_trip_to_memory_uclk_cycles = 848, 103 - .maximum_latency_when_urgent_uclk_cycles = 146, 104 - .average_latency_when_urgent_uclk_cycles = 90, 105 - .maximum_latency_when_non_urgent_uclk_cycles = 848, 106 - .average_latency_when_non_urgent_uclk_cycles = 135, 107 - }, 108 - { 109 - .minimum_uclk_khz = 731, 110 - .urgent_ramp_uclk_cycles = 632, 111 - .trip_to_memory_uclk_cycles = 874, 112 - .meta_trip_to_memory_uclk_cycles = 874, 113 - .maximum_latency_when_urgent_uclk_cycles = 232, 114 - .average_latency_when_urgent_uclk_cycles = 124, 115 - .maximum_latency_when_non_urgent_uclk_cycles = 874, 116 - .average_latency_when_non_urgent_uclk_cycles = 155, 117 - }, 118 - { 119 - .minimum_uclk_khz = 1187, 120 - .urgent_ramp_uclk_cycles = 716, 121 - .trip_to_memory_uclk_cycles = 902, 122 - .meta_trip_to_memory_uclk_cycles = 902, 123 - .maximum_latency_when_urgent_uclk_cycles = 316, 124 - .average_latency_when_urgent_uclk_cycles = 160, 125 - .maximum_latency_when_non_urgent_uclk_cycles = 902, 126 - .average_latency_when_non_urgent_uclk_cycles = 177, 127 - }, 128 - }, 129 - }, 130 - }, 131 - .qos_type = dml2_qos_param_type_dcn4x, 132 - }; 133 - 134 - static const struct dml2_soc_bb dml2_socbb_dcn31 = { 135 - .clk_table = { 136 - .uclk = { 137 - .clk_values_khz = {97000, 435000, 731000, 1187000}, 138 - .num_clk_values = 4, 139 - }, 140 - .fclk = { 141 - .clk_values_khz = {300000, 2500000}, 142 - .num_clk_values = 2, 143 - }, 144 - .dcfclk = { 145 - .clk_values_khz = {200000, 1800000}, 146 - .num_clk_values = 2, 147 - }, 148 - .dispclk = { 149 - .clk_values_khz = {100000, 2000000}, 150 - .num_clk_values = 2, 151 - }, 152 - .dppclk = { 153 - .clk_values_khz = {100000, 2000000}, 154 - .num_clk_values = 2, 155 - }, 156 - .dtbclk = { 157 - .clk_values_khz = {100000, 2000000}, 158 - .num_clk_values = 2, 159 - }, 160 - .phyclk = { 161 - .clk_values_khz = {810000, 810000}, 162 - .num_clk_values = 2, 163 - }, 164 - .socclk = { 165 - .clk_values_khz = {300000, 1600000}, 166 - .num_clk_values = 2, 167 - }, 168 - .dscclk = { 169 - .clk_values_khz = {666667, 666667}, 170 - .num_clk_values = 2, 171 - }, 172 - .phyclk_d18 = { 173 - .clk_values_khz = {625000, 625000}, 174 - .num_clk_values = 2, 175 - }, 176 - .phyclk_d32 = { 177 - .clk_values_khz = {2000000, 2000000}, 178 - .num_clk_values = 2, 179 - }, 180 - .dram_config = { 181 - .channel_width_bytes = 2, 182 - .channel_count = 16, 183 - .transactions_per_clock = 16, 184 - }, 185 - }, 186 - 187 - .qos_parameters = { 188 - .derate_table = { 189 - .system_active_urgent = { 190 - .dram_derate_percent_pixel = 22, 191 - .dram_derate_percent_vm = 0, 192 - .dram_derate_percent_pixel_and_vm = 0, 193 - .fclk_derate_percent = 76, 194 - .dcfclk_derate_percent = 100, 195 - }, 196 - .system_active_average = { 197 - .dram_derate_percent_pixel = 17, 198 - .dram_derate_percent_vm = 0, 199 - .dram_derate_percent_pixel_and_vm = 0, 200 - .fclk_derate_percent = 57, 201 - .dcfclk_derate_percent = 75, 202 - }, 203 - .dcn_mall_prefetch_urgent = { 204 - .dram_derate_percent_pixel = 22, 205 - .dram_derate_percent_vm = 0, 206 - .dram_derate_percent_pixel_and_vm = 0, 207 - .fclk_derate_percent = 76, 208 - .dcfclk_derate_percent = 100, 209 - }, 210 - .dcn_mall_prefetch_average = { 211 - .dram_derate_percent_pixel = 17, 212 - .dram_derate_percent_vm = 0, 213 - .dram_derate_percent_pixel_and_vm = 0, 214 - .fclk_derate_percent = 57, 215 - .dcfclk_derate_percent = 75, 216 - }, 217 - .system_idle_average = { 218 - .dram_derate_percent_pixel = 17, 219 - .dram_derate_percent_vm = 0, 220 - .dram_derate_percent_pixel_and_vm = 0, 221 - .fclk_derate_percent = 57, 222 - .dcfclk_derate_percent = 100, 223 - }, 224 - }, 225 - .writeback = { 226 - .base_latency_us = 0, 227 - .scaling_factor_us = 0, 228 - .scaling_factor_mhz = 0, 229 - }, 230 - .qos_params = { 231 - .dcn4x = { 232 - .df_qos_response_time_fclk_cycles = 300, 233 - .max_round_trip_to_furthest_cs_fclk_cycles = 350, 234 - .mall_overhead_fclk_cycles = 50, 235 - .meta_trip_adder_fclk_cycles = 36, 236 - .average_transport_distance_fclk_cycles = 260, 237 - .umc_urgent_ramp_latency_margin = 50, 238 - .umc_max_latency_margin = 30, 239 - .umc_average_latency_margin = 20, 240 - .fabric_max_transport_latency_margin = 20, 241 - .fabric_average_transport_latency_margin = 10, 242 - 243 - .per_uclk_dpm_params = { 244 - { 245 - // State 1 246 - .minimum_uclk_khz = 0, 247 - .urgent_ramp_uclk_cycles = 472, 248 - .trip_to_memory_uclk_cycles = 827, 249 - .meta_trip_to_memory_uclk_cycles = 827, 250 - .maximum_latency_when_urgent_uclk_cycles = 72, 251 - .average_latency_when_urgent_uclk_cycles = 72, 252 - .maximum_latency_when_non_urgent_uclk_cycles = 827, 253 - .average_latency_when_non_urgent_uclk_cycles = 117, 254 - }, 255 - { 256 - // State 2 257 - .minimum_uclk_khz = 0, 258 - .urgent_ramp_uclk_cycles = 546, 259 - .trip_to_memory_uclk_cycles = 848, 260 - .meta_trip_to_memory_uclk_cycles = 848, 261 - .maximum_latency_when_urgent_uclk_cycles = 146, 262 - .average_latency_when_urgent_uclk_cycles = 146, 263 - .maximum_latency_when_non_urgent_uclk_cycles = 848, 264 - .average_latency_when_non_urgent_uclk_cycles = 133, 265 - }, 266 - { 267 - // State 3 268 - .minimum_uclk_khz = 0, 269 - .urgent_ramp_uclk_cycles = 564, 270 - .trip_to_memory_uclk_cycles = 853, 271 - .meta_trip_to_memory_uclk_cycles = 853, 272 - .maximum_latency_when_urgent_uclk_cycles = 164, 273 - .average_latency_when_urgent_uclk_cycles = 164, 274 - .maximum_latency_when_non_urgent_uclk_cycles = 853, 275 - .average_latency_when_non_urgent_uclk_cycles = 136, 276 - }, 277 - { 278 - // State 4 279 - .minimum_uclk_khz = 0, 280 - .urgent_ramp_uclk_cycles = 613, 281 - .trip_to_memory_uclk_cycles = 869, 282 - .meta_trip_to_memory_uclk_cycles = 869, 283 - .maximum_latency_when_urgent_uclk_cycles = 213, 284 - .average_latency_when_urgent_uclk_cycles = 213, 285 - .maximum_latency_when_non_urgent_uclk_cycles = 869, 286 - .average_latency_when_non_urgent_uclk_cycles = 149, 287 - }, 288 - { 289 - // State 5 290 - .minimum_uclk_khz = 0, 291 - .urgent_ramp_uclk_cycles = 632, 292 - .trip_to_memory_uclk_cycles = 874, 293 - .meta_trip_to_memory_uclk_cycles = 874, 294 - .maximum_latency_when_urgent_uclk_cycles = 232, 295 - .average_latency_when_urgent_uclk_cycles = 232, 296 - .maximum_latency_when_non_urgent_uclk_cycles = 874, 297 - .average_latency_when_non_urgent_uclk_cycles = 153, 298 - }, 299 - { 300 - // State 6 301 - .minimum_uclk_khz = 0, 302 - .urgent_ramp_uclk_cycles = 665, 303 - .trip_to_memory_uclk_cycles = 885, 304 - .meta_trip_to_memory_uclk_cycles = 885, 305 - .maximum_latency_when_urgent_uclk_cycles = 265, 306 - .average_latency_when_urgent_uclk_cycles = 265, 307 - .maximum_latency_when_non_urgent_uclk_cycles = 885, 308 - .average_latency_when_non_urgent_uclk_cycles = 161, 309 - }, 310 - { 311 - // State 7 312 - .minimum_uclk_khz = 0, 313 - .urgent_ramp_uclk_cycles = 689, 314 - .trip_to_memory_uclk_cycles = 895, 315 - .meta_trip_to_memory_uclk_cycles = 895, 316 - .maximum_latency_when_urgent_uclk_cycles = 289, 317 - .average_latency_when_urgent_uclk_cycles = 289, 318 - .maximum_latency_when_non_urgent_uclk_cycles = 895, 319 - .average_latency_when_non_urgent_uclk_cycles = 167, 320 - }, 321 - { 322 - // State 8 323 - .minimum_uclk_khz = 0, 324 - .urgent_ramp_uclk_cycles = 716, 325 - .trip_to_memory_uclk_cycles = 902, 326 - .meta_trip_to_memory_uclk_cycles = 902, 327 - .maximum_latency_when_urgent_uclk_cycles = 316, 328 - .average_latency_when_urgent_uclk_cycles = 316, 329 - .maximum_latency_when_non_urgent_uclk_cycles = 902, 330 - .average_latency_when_non_urgent_uclk_cycles = 174, 331 - }, 332 - }, 333 - }, 334 - }, 335 - .qos_type = dml2_qos_param_type_dcn4x, 336 - }, 337 - 338 - .power_management_parameters = { 339 - .dram_clk_change_blackout_us = 400, 340 - .fclk_change_blackout_us = 0, 341 - .g7_ppt_blackout_us = 0, 342 - .stutter_enter_plus_exit_latency_us = 50, 343 - .stutter_exit_latency_us = 43, 344 - .z8_stutter_enter_plus_exit_latency_us = 0, 345 - .z8_stutter_exit_latency_us = 0, 346 - }, 347 - 348 - .vmin_limit = { 349 - .dispclk_khz = 600 * 1000, 350 - }, 351 - 352 - .dprefclk_mhz = 700, 353 - .xtalclk_mhz = 100, 354 - .pcie_refclk_mhz = 100, 355 - .dchub_refclk_mhz = 50, 356 - .mall_allocated_for_dcn_mbytes = 64, 357 - .max_outstanding_reqs = 512, 358 - .fabric_datapath_to_dcn_data_return_bytes = 64, 359 - .return_bus_width_bytes = 64, 360 - .hostvm_min_page_size_kbytes = 0, 361 - .gpuvm_min_page_size_kbytes = 256, 362 - .phy_downspread_percent = 0, 363 - .dcn_downspread_percent = 0, 364 - .dispclk_dppclk_vco_speed_mhz = 4500, 365 - .do_urgent_latency_adjustment = 0, 366 - .mem_word_bytes = 32, 367 - .num_dcc_mcaches = 8, 368 - .mcache_size_bytes = 2048, 369 - .mcache_line_size_bytes = 32, 370 - .max_fclk_for_uclk_dpm_khz = 1250 * 1000, 371 - }; 372 - 373 - static const struct dml2_ip_capabilities dml2_dcn31_max_ip_caps = { 374 - .pipe_count = 4, 375 - .otg_count = 4, 376 - .num_dsc = 4, 377 - .max_num_dp2p0_streams = 4, 378 - .max_num_hdmi_frl_outputs = 1, 379 - .max_num_dp2p0_outputs = 4, 380 - .rob_buffer_size_kbytes = 192, 381 - .config_return_buffer_size_in_kbytes = 1152, 382 - .meta_fifo_size_in_kentries = 22, 383 - .compressed_buffer_segment_size_in_kbytes = 64, 384 - .subvp_drr_scheduling_margin_us = 100, 385 - .subvp_prefetch_end_to_mall_start_us = 15, 386 - .subvp_fw_processing_delay = 15, 387 - 388 - .fams2 = { 389 - .max_allow_delay_us = 100 * 1000, 390 - .scheduling_delay_us = 50, 391 - .vertical_interrupt_ack_delay_us = 18, 392 - .allow_programming_delay_us = 18, 393 - .min_allow_width_us = 20, 394 - .subvp_df_throttle_delay_us = 100, 395 - .subvp_programming_delay_us = 18, 396 - .subvp_prefetch_to_mall_delay_us = 18, 397 - .drr_programming_delay_us = 18, 398 - }, 399 - }; 400 - 401 - #endif /* __DML_DML_DCN3_SOC_BB__ */
+100 -11
drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
··· 600 600 { 601 601 *BytePerPixelDETY = 0; 602 602 *BytePerPixelDETC = 0; 603 - *BytePerPixelY = 0; 604 - *BytePerPixelC = 0; 603 + *BytePerPixelY = 1; 604 + *BytePerPixelC = 1; 605 605 606 606 if (SourcePixelFormat == dml2_444_64) { 607 607 *BytePerPixelDETY = 8; ··· 3528 3528 dml2_printf("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma); 3529 3529 dml2_printf("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); 3530 3530 #endif 3531 - 3532 3531 } 3533 3532 3534 - static void CalculateDCFCLKDeepSleep( 3533 + static void CalculateDCFCLKDeepSleepTdlut( 3535 3534 const struct dml2_display_cfg *display_cfg, 3536 3535 unsigned int NumberOfActiveSurfaces, 3537 3536 unsigned int BytePerPixelY[], ··· 3544 3545 double ReadBandwidthLuma[], 3545 3546 double ReadBandwidthChroma[], 3546 3547 unsigned int ReturnBusWidth, 3548 + 3549 + double dispclk, 3550 + unsigned int tdlut_bytes_to_deliver[], 3551 + double prefetch_swath_time_us[], 3547 3552 3548 3553 // Output 3549 3554 double *DCFClkDeepSleep) ··· 3583 3580 } 3584 3581 DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], pixel_rate_mhz / 16); 3585 3582 3583 + // adjust for 3dlut delivery time 3584 + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && tdlut_bytes_to_deliver[k] > 0) { 3585 + double tdlut_required_deepsleep_dcfclk = (double) tdlut_bytes_to_deliver[k] / 64.0 / prefetch_swath_time_us[k]; 3586 + 3587 + dml2_printf("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); 3588 + dml2_printf("DML::%s: k=%d, tdlut_bytes_to_deliver = %d\n", __func__, k, tdlut_bytes_to_deliver[k]); 3589 + dml2_printf("DML::%s: k=%d, prefetch_swath_time_us = %f\n", __func__, k, prefetch_swath_time_us[k]); 3590 + dml2_printf("DML::%s: k=%d, tdlut_required_deepsleep_dcfclk = %f\n", __func__, k, tdlut_required_deepsleep_dcfclk); 3591 + 3592 + // increase the deepsleep dcfclk to match the original dispclk throughput rate 3593 + if (tdlut_required_deepsleep_dcfclk > DCFClkDeepSleepPerSurface[k]) { 3594 + DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], tdlut_required_deepsleep_dcfclk); 3595 + DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], dispclk / 4.0); 3596 + } 3597 + } 3598 + 3586 3599 #ifdef __DML_VBA_DEBUG__ 3587 3600 dml2_printf("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz); 3588 3601 dml2_printf("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); ··· 3621 3602 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 3622 3603 *DCFClkDeepSleep = math_max2(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); 3623 3604 } 3605 + 3624 3606 dml2_printf("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); 3607 + } 3608 + 3609 + static void CalculateDCFCLKDeepSleep( 3610 + const struct dml2_display_cfg *display_cfg, 3611 + unsigned int NumberOfActiveSurfaces, 3612 + unsigned int BytePerPixelY[], 3613 + unsigned int BytePerPixelC[], 3614 + unsigned int SwathWidthY[], 3615 + unsigned int SwathWidthC[], 3616 + unsigned int DPPPerSurface[], 3617 + double PSCL_THROUGHPUT[], 3618 + double PSCL_THROUGHPUT_CHROMA[], 3619 + double Dppclk[], 3620 + double ReadBandwidthLuma[], 3621 + double ReadBandwidthChroma[], 3622 + unsigned int ReturnBusWidth, 3623 + 3624 + // Output 3625 + double *DCFClkDeepSleep) 3626 + { 3627 + double zero_double[DML2_MAX_PLANES]; 3628 + unsigned int zero_integer[DML2_MAX_PLANES]; 3629 + 3630 + memset(zero_double, 0, DML2_MAX_PLANES * sizeof(double)); 3631 + memset(zero_integer, 0, DML2_MAX_PLANES * sizeof(unsigned int)); 3632 + 3633 + CalculateDCFCLKDeepSleepTdlut( 3634 + display_cfg, 3635 + NumberOfActiveSurfaces, 3636 + BytePerPixelY, 3637 + BytePerPixelC, 3638 + SwathWidthY, 3639 + SwathWidthC, 3640 + DPPPerSurface, 3641 + PSCL_THROUGHPUT, 3642 + PSCL_THROUGHPUT_CHROMA, 3643 + Dppclk, 3644 + ReadBandwidthLuma, 3645 + ReadBandwidthChroma, 3646 + ReturnBusWidth, 3647 + 0, 3648 + zero_integer, //tdlut_bytes_to_deliver, 3649 + zero_double, //prefetch_swath_time_us, 3650 + 3651 + // Output 3652 + DCFClkDeepSleep); 3625 3653 } 3626 3654 3627 3655 static double CalculateWriteBackDelay( ··· 4670 4604 *p->tdlut_groups_per_2row_ub = 0; 4671 4605 *p->tdlut_opt_time = 0; 4672 4606 *p->tdlut_drain_time = 0; 4607 + *p->tdlut_bytes_to_deliver = 0; 4673 4608 *p->tdlut_bytes_per_group = 0; 4674 4609 *p->tdlut_pte_bytes_per_frame = 0; 4675 4610 *p->tdlut_bytes_per_frame = 0; ··· 4739 4672 *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); 4740 4673 *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; 4741 4674 *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate; 4675 + *p->tdlut_bytes_to_deliver = (unsigned int) (p->cursor_buffer_size * 1024.0); 4742 4676 } 4743 4677 4744 4678 #ifdef __DML_VBA_DEBUG__ ··· 4760 4692 dml2_printf("DML::%s: tdlut_delivery_cycles = %u\n", __func__, tdlut_delivery_cycles); 4761 4693 dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time); 4762 4694 dml2_printf("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time); 4695 + dml2_printf("DML::%s: tdlut_bytes_to_deliver = %d\n", __func__, *p->tdlut_bytes_to_deliver); 4763 4696 dml2_printf("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub); 4764 4697 #endif 4765 4698 } ··· 5769 5700 5770 5701 s->cursor_prefetch_bytes = (unsigned int)math_max2(p->cursor_bytes_per_chunk, 4 * p->cursor_bytes_per_line); 5771 5702 *p->prefetch_cursor_bw = p->num_cursors * s->cursor_prefetch_bytes / (s->LinesToRequestPrefetchPixelData * s->LineTime); 5703 + *p->prefetch_swath_time_us = (s->LinesToRequestPrefetchPixelData * s->LineTime); 5772 5704 5773 5705 #ifdef __DML_VBA_DEBUG__ 5774 5706 dml2_printf("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM); ··· 5780 5710 dml2_printf("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); 5781 5711 dml2_printf("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData); 5782 5712 dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); 5713 + dml2_printf("DML::%s: prefetch_swath_time_us = %f\n", __func__, *p->prefetch_swath_time_us); 5783 5714 5784 5715 dml2_printf("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk); 5785 5716 dml2_printf("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line); ··· 8888 8817 calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k]; 8889 8818 calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k]; 8890 8819 calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k]; 8820 + calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k]; 8891 8821 calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k]; 8892 8822 8893 8823 calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); ··· 9081 9009 CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k]; 9082 9010 CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k]; 9083 9011 CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k]; 9012 + CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->prefetch_swath_time_us[k]; 9084 9013 9085 9014 mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); 9086 9015 ··· 9089 9016 dml2_printf("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank); 9090 9017 dml2_printf("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank); 9091 9018 } // for k num_planes 9019 + 9020 + CalculateDCFCLKDeepSleepTdlut( 9021 + display_cfg, 9022 + mode_lib->ms.num_active_planes, 9023 + mode_lib->ms.BytePerPixelY, 9024 + mode_lib->ms.BytePerPixelC, 9025 + mode_lib->ms.SwathWidthY, 9026 + mode_lib->ms.SwathWidthC, 9027 + mode_lib->ms.NoOfDPP, 9028 + mode_lib->ms.PSCL_FACTOR, 9029 + mode_lib->ms.PSCL_FACTOR_CHROMA, 9030 + mode_lib->ms.RequiredDPPCLK, 9031 + mode_lib->ms.vactive_sw_bw_l, 9032 + mode_lib->ms.vactive_sw_bw_c, 9033 + mode_lib->soc.return_bus_width_bytes, 9034 + mode_lib->ms.RequiredDISPCLK, 9035 + s->tdlut_bytes_to_deliver, 9036 + s->prefetch_swath_time_us, 9037 + 9038 + /* Output */ 9039 + &mode_lib->ms.dcfclk_deepsleep); 9092 9040 9093 9041 for (k = 0; k < mode_lib->ms.num_active_planes; k++) { 9094 9042 if (mode_lib->ms.dst_y_prefetch[k] < 2.0 ··· 10462 10368 dml2_assert(s->SOCCLK > 0); 10463 10369 10464 10370 #ifdef __DML_VBA_DEBUG__ 10465 - // dml2_printf_dml_display_cfg_timing(&display_cfg->timing, s->num_active_planes); 10466 - // dml2_printf_dml_display_cfg_plane(&display_cfg->plane, s->num_active_planes); 10467 - // dml2_printf_dml_display_cfg_surface(&display_cfg->surface, s->num_active_planes); 10468 - // dml2_printf_dml_display_cfg_output(&display_cfg->output, s->num_active_planes); 10469 - // dml2_printf_dml_display_cfg_hw_resource(&display_cfg->hw, s->num_active_planes); 10470 - 10471 10371 dml2_printf("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes); 10472 10372 dml2_printf("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes); 10473 10373 dml2_printf("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk); ··· 10920 10832 calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k]; 10921 10833 calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k]; 10922 10834 calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k]; 10835 + calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k]; 10923 10836 calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k]; 10924 - 10925 10837 calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); 10926 10838 } 10927 10839 ··· 11307 11219 CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k]; 11308 11220 CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k]; 11309 11221 CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k]; 11222 + CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->dummy_single[0]; 11310 11223 11311 11224 mode_lib->mp.NoTimeToPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); 11312 11225
+5 -1
drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h
··· 958 958 unsigned int tdlut_groups_per_2row_ub[DML2_MAX_PLANES]; 959 959 double tdlut_opt_time[DML2_MAX_PLANES]; 960 960 double tdlut_drain_time[DML2_MAX_PLANES]; 961 + unsigned int tdlut_bytes_to_deliver[DML2_MAX_PLANES]; 961 962 unsigned int tdlut_bytes_per_group[DML2_MAX_PLANES]; 962 963 963 964 unsigned int cursor_bytes_per_chunk[DML2_MAX_PLANES]; ··· 980 979 enum dml2_source_format_class pixel_format[DML2_MAX_PLANES]; 981 980 unsigned int lb_source_lines_l[DML2_MAX_PLANES]; 982 981 unsigned int lb_source_lines_c[DML2_MAX_PLANES]; 982 + double prefetch_swath_time_us[DML2_MAX_PLANES]; 983 983 }; 984 984 985 985 struct dml2_core_calcs_mode_programming_locals { ··· 1044 1042 unsigned int tdlut_groups_per_2row_ub[DML2_MAX_PLANES]; 1045 1043 double tdlut_opt_time[DML2_MAX_PLANES]; 1046 1044 double tdlut_drain_time[DML2_MAX_PLANES]; 1045 + unsigned int tdlut_bytes_to_deliver[DML2_MAX_PLANES]; 1047 1046 unsigned int tdlut_bytes_per_group[DML2_MAX_PLANES]; 1048 1047 1049 1048 unsigned int cursor_bytes_per_chunk[DML2_MAX_PLANES]; ··· 1812 1809 unsigned int *VReadyOffsetPix; 1813 1810 double *prefetch_cursor_bw; 1814 1811 double *prefetch_sw_bytes; 1812 + double *prefetch_swath_time_us; 1815 1813 }; 1816 1814 1817 1815 struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params { ··· 1997 1993 unsigned int *tdlut_groups_per_2row_ub; 1998 1994 double *tdlut_opt_time; 1999 1995 double *tdlut_drain_time; 1996 + unsigned int *tdlut_bytes_to_deliver; 2000 1997 unsigned int *tdlut_bytes_per_group; 2001 1998 }; 2002 1999 ··· 2142 2137 const struct core_display_cfg_support_info *cfg_support_info; 2143 2138 int min_clk_index; 2144 2139 struct dml2_display_cfg_programming *programming; 2145 - 2146 2140 }; 2147 2141 2148 2142 #endif
+1 -1
drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c
··· 556 556 { 557 557 bool ret_val = 0; 558 558 559 - if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha)) 559 + if (dml2_core_utils_is_420(source_format) || dml2_core_utils_is_422_planar(source_format) || (source_format == dml2_rgbe_alpha)) 560 560 ret_val = 1; 561 561 562 562 return ret_val;
+7 -7
drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c
··· 212 212 213 213 clock_khz *= 1.0 + margin; 214 214 215 - divider = (unsigned int)(DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz)); 215 + divider = (unsigned int)((int)DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz)); 216 216 217 217 /* we want to floor here to get higher clock than required rather than lower */ 218 218 if (divider < DFS_DIVIDER_RANGE_2_START) { ··· 417 417 418 418 static bool are_timings_trivially_synchronizable(struct dml2_display_cfg *display_config, int mask) 419 419 { 420 - unsigned char i; 420 + unsigned int i; 421 421 bool identical = true; 422 422 bool contains_drr = false; 423 - unsigned char remap_array[DML2_MAX_PLANES]; 424 - unsigned char remap_array_size = 0; 423 + unsigned int remap_array[DML2_MAX_PLANES]; 424 + unsigned int remap_array_size = 0; 425 425 426 426 // Create a remap array to enable simple iteration through only masked stream indicies 427 427 for (i = 0; i < display_config->num_streams; i++) { ··· 456 456 457 457 static int find_smallest_idle_time_in_vblank_us(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, int mask) 458 458 { 459 - unsigned char i; 459 + unsigned int i; 460 460 int min_idle_us = 0; 461 - unsigned char remap_array[DML2_MAX_PLANES]; 462 - unsigned char remap_array_size = 0; 461 + unsigned int remap_array[DML2_MAX_PLANES]; 462 + unsigned int remap_array_size = 0; 463 463 const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; 464 464 465 465 // Create a remap array to enable simple iteration through only masked stream indicies
+8 -4
drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c
··· 195 195 196 196 static bool are_timings_trivially_synchronizable(struct display_configuation_with_meta *display_config, int mask) 197 197 { 198 - unsigned char i; 198 + unsigned int i; 199 199 bool identical = true; 200 200 bool contains_drr = false; 201 - unsigned char remap_array[DML2_MAX_PLANES]; 202 - unsigned char remap_array_size = 0; 201 + unsigned int remap_array[DML2_MAX_PLANES]; 202 + unsigned int remap_array_size = 0; 203 203 204 204 // Create a remap array to enable simple iteration through only masked stream indicies 205 205 for (i = 0; i < display_config->display_config.num_streams; i++) { ··· 347 347 int odm_load, highest_odm_load = -1, highest_odm_load_index = -1; 348 348 349 349 for (i = 0; i < display_config->num_streams; i++) { 350 - odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz 350 + if (mode_support_result->cfg_support_info.stream_support_info[i].odms_used > 0) 351 + odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz 351 352 / mode_support_result->cfg_support_info.stream_support_info[i].odms_used; 353 + else 354 + odm_load = 0; 355 + 352 356 if (odm_load > highest_odm_load) { 353 357 highest_odm_load_index = i; 354 358 highest_odm_load = odm_load;
+22 -18
drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c
··· 813 813 int odm_load, highest_odm_load = -1, highest_odm_load_index = -1; 814 814 815 815 for (i = 0; i < display_config->num_streams; i++) { 816 - odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz 816 + if (mode_support_result->cfg_support_info.stream_support_info[i].odms_used > 0) 817 + odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz 817 818 / mode_support_result->cfg_support_info.stream_support_info[i].odms_used; 819 + else 820 + odm_load = 0; 821 + 818 822 if (odm_load > highest_odm_load) { 819 823 highest_odm_load_index = i; 820 824 highest_odm_load = odm_load; ··· 990 986 const struct display_configuation_with_meta *display_config, 991 987 unsigned int mask) 992 988 { 993 - unsigned char i; 989 + unsigned int i; 994 990 bool valid = true; 995 991 996 992 // Create a remap array to enable simple iteration through only masked stream indicies ··· 1039 1035 const struct display_configuation_with_meta *display_config, 1040 1036 unsigned int mask) 1041 1037 { 1042 - unsigned char i; 1038 + unsigned int i; 1043 1039 for (i = 0; i < DML2_MAX_PLANES; i++) { 1044 1040 const struct dml2_stream_parameters *stream_descriptor; 1045 1041 const struct dml2_fams2_meta *stream_fams2_meta; ··· 1081 1077 const struct dml2_plane_parameters *plane_descriptor; 1082 1078 const struct dml2_fams2_meta *stream_fams2_meta; 1083 1079 unsigned int microschedule_vlines; 1084 - unsigned char i; 1080 + unsigned int i; 1085 1081 1086 1082 unsigned int num_planes_per_stream[DML2_MAX_PLANES] = { 0 }; 1087 1083 ··· 1198 1194 1199 1195 static bool all_planes_match_method(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pstate_method method) 1200 1196 { 1201 - unsigned char i; 1197 + unsigned int i; 1202 1198 1203 1199 for (i = 0; i < DML2_MAX_PLANES; i++) { 1204 1200 if (is_bit_set_in_bitfield(plane_mask, i)) { ··· 1376 1372 if (j_disallow_us < jp1_disallow_us) { 1377 1373 /* swap as A < B */ 1378 1374 swap(s->pmo_dcn4.sorted_group_gtl_disallow_index[j], 1379 - s->pmo_dcn4.sorted_group_gtl_disallow_index[j+1]); 1375 + s->pmo_dcn4.sorted_group_gtl_disallow_index[j + 1]); 1380 1376 swapped = true; 1381 1377 } 1382 1378 } ··· 1435 1431 if (j_period_us < jp1_period_us) { 1436 1432 /* swap as A < B */ 1437 1433 swap(s->pmo_dcn4.sorted_group_gtl_period_index[j], 1438 - s->pmo_dcn4.sorted_group_gtl_period_index[j+1]); 1434 + s->pmo_dcn4.sorted_group_gtl_period_index[j + 1]); 1439 1435 swapped = true; 1440 1436 } 1441 1437 } ··· 1549 1545 { 1550 1546 struct dml2_pmo_scratch *s = &pmo->scratch; 1551 1547 1552 - unsigned char stream_index = 0; 1548 + unsigned int stream_index = 0; 1553 1549 1554 1550 unsigned int svp_count = 0; 1555 1551 unsigned int svp_stream_mask = 0; ··· 1613 1609 1614 1610 static int get_vactive_pstate_margin(const struct display_configuation_with_meta *display_cfg, int plane_mask) 1615 1611 { 1616 - unsigned char i; 1612 + unsigned int i; 1617 1613 int min_vactive_margin_us = 0xFFFFFFF; 1618 1614 1619 1615 for (i = 0; i < DML2_MAX_PLANES; i++) { ··· 1821 1817 const struct dml2_pmo_pstate_strategy *strategy_list = NULL; 1822 1818 struct dml2_pmo_pstate_strategy override_base_strategy = { 0 }; 1823 1819 unsigned int strategy_list_size = 0; 1824 - unsigned char plane_index, stream_index, i; 1820 + unsigned int plane_index, stream_index, i; 1825 1821 bool build_override_strategy = true; 1826 1822 1827 1823 state->performed = true; ··· 1944 1940 struct dml2_pmo_instance *pmo, 1945 1941 int plane_mask) 1946 1942 { 1947 - unsigned char plane_index; 1943 + unsigned int plane_index; 1948 1944 struct dml2_plane_parameters *plane; 1949 1945 1950 1946 for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { ··· 1965 1961 { 1966 1962 struct dml2_pmo_scratch *scratch = &pmo->scratch; 1967 1963 1968 - unsigned char plane_index; 1964 + unsigned int plane_index; 1969 1965 int stream_index = -1; 1970 1966 1971 1967 for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { ··· 1988 1984 { 1989 1985 struct dml2_pmo_scratch *scratch = &pmo->scratch; 1990 1986 1991 - unsigned char plane_index; 1987 + unsigned int plane_index; 1992 1988 int stream_index = -1; 1993 1989 1994 1990 for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { ··· 2009 2005 struct dml2_pmo_instance *pmo, 2010 2006 int plane_mask) 2011 2007 { 2012 - unsigned char plane_index; 2008 + unsigned int plane_index; 2013 2009 struct dml2_plane_parameters *plane; 2014 2010 2015 2011 for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { ··· 2029 2025 struct dml2_pmo_instance *pmo, 2030 2026 int plane_mask) 2031 2027 { 2032 - unsigned char plane_index; 2028 + unsigned int plane_index; 2033 2029 struct dml2_plane_parameters *plane; 2034 2030 2035 2031 for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { ··· 2046 2042 struct dml2_pmo_instance *pmo, 2047 2043 int plane_mask) 2048 2044 { 2049 - unsigned char plane_index; 2045 + unsigned int plane_index; 2050 2046 unsigned int stream_index; 2051 2047 2052 2048 for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { ··· 2067 2063 struct dml2_pmo_instance *pmo, 2068 2064 int plane_mask) 2069 2065 { 2070 - unsigned char plane_index; 2066 + unsigned int plane_index; 2071 2067 unsigned int stream_index; 2072 2068 2073 2069 for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { ··· 2135 2131 static int get_minimum_reserved_time_us_for_planes(struct display_configuation_with_meta *display_config, int plane_mask) 2136 2132 { 2137 2133 int min_time_us = 0xFFFFFF; 2138 - unsigned char plane_index = 0; 2134 + unsigned int plane_index = 0; 2139 2135 2140 2136 for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { 2141 2137 if (is_bit_set_in_bitfield(plane_mask, plane_index)) {
-1
drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c
··· 15 15 { 16 16 switch (in_out->options.project_id) { 17 17 case dml2_project_dcn4x_stage1: 18 - return false; 19 18 case dml2_project_dcn4x_stage2: 20 19 case dml2_project_dcn4x_stage2_auto_drr_svp: 21 20 return dml2_top_soc15_initialize_instance(in_out);
+6
drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c
··· 2 2 // 3 3 // Copyright 2024 Advanced Micro Devices, Inc. 4 4 5 + #include "dml2_top_legacy.h" 6 + #include "dml2_top_soc15.h" 7 + #include "dml2_core_factory.h" 8 + #include "dml2_pmo_factory.h" 9 + #include "display_mode_core_structs.h" 10 +
+1
drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c
··· 545 545 if (odm_combine_factor > 1) { 546 546 max_per_pipe_vp_p0 = plane->surface.plane0.width; 547 547 temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane0.h_ratio * stream->timing.h_active / odm_combine_factor); 548 + 548 549 if (temp < max_per_pipe_vp_p0) 549 550 max_per_pipe_vp_p0 = temp; 550 551
+1
drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.h
··· 1 1 // SPDX-License-Identifier: MIT 2 2 // 3 3 // Copyright 2024 Advanced Micro Devices, Inc. 4 + 4 5 #ifndef __DML2_TOP_SOC15_H__ 5 6 #define __DML2_TOP_SOC15_H__ 6 7 #include "dml2_internal_shared_types.h"
-2
drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h
··· 357 357 enum dml2_pstate_method per_stream_pstate_method[DML2_MAX_PLANES]; 358 358 bool allow_state_increase; 359 359 }; 360 - 361 - 362 360 struct dml2_core_mode_support_in_out { 363 361 /* 364 362 * Inputs
+1 -1
drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c
··· 813 813 { 814 814 int i, old_plane_count; 815 815 struct dc_stream_status *stream_status = NULL; 816 - struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 }; 816 + struct dc_plane_state *del_planes[MAX_SURFACES] = { 0 }; 817 817 818 818 for (i = 0; i < context->stream_count; i++) 819 819 if (context->streams[i] == stream) {
+5 -9
drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
··· 747 747 748 748 static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) 749 749 { 750 - // TODO : Temporarily add DCN_VERSION_3_2 for N-1 validation. Remove DCN_VERSION_3_2 after N-1 validation phase is complete. 751 - if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01 || in_dc->ctx->dce_version == DCN_VERSION_3_2)) { 752 - dml21_reinit(in_dc, dml2, config); 750 + if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01)) { 751 + dml21_reinit(in_dc, dml2, config); 753 752 return; 754 - } 753 + } 755 754 756 755 // Store config options 757 756 (*dml2)->config = *config; ··· 785 786 786 787 bool dml2_create(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) 787 788 { 788 - // TODO : Temporarily add DCN_VERSION_3_2 for N-1 validation. Remove DCN_VERSION_3_2 after N-1 validation phase is complete. 789 - if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01 || in_dc->ctx->dce_version == DCN_VERSION_3_2)) { 789 + if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01)) 790 790 return dml21_create(in_dc, dml2, config); 791 - } 792 791 793 792 // Allocate Mode Lib Ctx 794 793 *dml2 = dml2_allocate_memory(); ··· 854 857 const struct dml2_configuration_options *config, 855 858 struct dml2_context **dml2) 856 859 { 857 - // TODO : Temporarily add DCN_VERSION_3_2 for N-1 validation. Remove DCN_VERSION_3_2 after N-1 validation phase is complete. 858 - if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01 || in_dc->ctx->dce_version == DCN_VERSION_3_2)) { 860 + if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01)) { 859 861 dml21_reinit(in_dc, dml2, config); 860 862 return; 861 863 }
+45
drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
··· 30 30 #include "rc_calc.h" 31 31 #include "fixed31_32.h" 32 32 33 + #define DC_LOGGER \ 34 + dsc->ctx->logger 35 + 33 36 /* This module's internal functions */ 34 37 35 38 /* default DSC policy target bitrate limit is 16bpp */ ··· 481 478 config.num_slices_h, &dsc_common_caps, timing, link_encoding, range); 482 479 483 480 return is_dsc_possible; 481 + } 482 + 483 + void dc_dsc_dump_encoder_caps(const struct display_stream_compressor *dsc, 484 + const struct dc_crtc_timing *timing) 485 + { 486 + struct dsc_enc_caps dsc_enc_caps; 487 + 488 + get_dsc_enc_caps(dsc, &dsc_enc_caps, timing->pix_clk_100hz); 489 + 490 + DC_LOG_DSC("dsc encoder caps:"); 491 + DC_LOG_DSC("\tdsc_version 0x%x", dsc_enc_caps.dsc_version); 492 + DC_LOG_DSC("\tslice_caps 0x%x", dsc_enc_caps.slice_caps.raw); 493 + DC_LOG_DSC("\tlb_bit_depth %d", dsc_enc_caps.lb_bit_depth); 494 + DC_LOG_DSC("\tis_block_pred_supported %d", dsc_enc_caps.is_block_pred_supported); 495 + DC_LOG_DSC("\tcolor_formats 0x%x", dsc_enc_caps.color_formats.raw); 496 + DC_LOG_DSC("\tcolor_depth 0x%x", dsc_enc_caps.color_depth.raw); 497 + DC_LOG_DSC("\tmax_total_throughput_mps %d", dsc_enc_caps.max_total_throughput_mps); 498 + DC_LOG_DSC("\tmax_slice_width %d", dsc_enc_caps.max_slice_width); 499 + DC_LOG_DSC("\tbpp_increment_div %d", dsc_enc_caps.bpp_increment_div); 500 + } 501 + 502 + void dc_dsc_dump_decoder_caps(const struct display_stream_compressor *dsc, 503 + const struct dsc_dec_dpcd_caps *dsc_sink_caps) 504 + { 505 + DC_LOG_DSC("dsc decoder caps:"); 506 + DC_LOG_DSC("\tis_dsc_supported %d", dsc_sink_caps->is_dsc_supported); 507 + DC_LOG_DSC("\tdsc_version 0x%x", dsc_sink_caps->dsc_version); 508 + DC_LOG_DSC("\trc_buffer_size %d", dsc_sink_caps->rc_buffer_size); 509 + DC_LOG_DSC("\tslice_caps1 0x%x", dsc_sink_caps->slice_caps1.raw); 510 + DC_LOG_DSC("\tslice_caps2 0x%x", dsc_sink_caps->slice_caps2.raw); 511 + DC_LOG_DSC("\tlb_bit_depth %d", dsc_sink_caps->lb_bit_depth); 512 + DC_LOG_DSC("\tis_block_pred_supported %d", dsc_sink_caps->is_block_pred_supported); 513 + DC_LOG_DSC("\tedp_max_bits_per_pixel %d", dsc_sink_caps->edp_max_bits_per_pixel); 514 + DC_LOG_DSC("\tcolor_formats 0x%x", dsc_sink_caps->color_formats.raw); 515 + DC_LOG_DSC("\tthroughput_mode_0_mps %d", dsc_sink_caps->throughput_mode_0_mps); 516 + DC_LOG_DSC("\tthroughput_mode_1_mps %d", dsc_sink_caps->throughput_mode_1_mps); 517 + DC_LOG_DSC("\tmax_slice_width %d", dsc_sink_caps->max_slice_width); 518 + DC_LOG_DSC("\tbpp_increment_div %d", dsc_sink_caps->bpp_increment_div); 519 + DC_LOG_DSC("\tbranch_overall_throughput_0_mps %d", dsc_sink_caps->branch_overall_throughput_0_mps); 520 + DC_LOG_DSC("\tbranch_overall_throughput_1_mps %d", dsc_sink_caps->branch_overall_throughput_1_mps); 521 + DC_LOG_DSC("\tbranch_max_line_width %d", dsc_sink_caps->branch_max_line_width); 522 + DC_LOG_DSC("\tis_dp %d", dsc_sink_caps->is_dp); 484 523 } 485 524 486 525 static void get_dsc_enc_caps(
+2 -2
drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c
··· 140 140 141 141 void hubp1_program_tiling( 142 142 struct hubp *hubp, 143 - const union dc_tiling_info *info, 143 + const struct dc_tiling_info *info, 144 144 const enum surface_pixel_format pixel_format) 145 145 { 146 146 struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); ··· 549 549 void hubp1_program_surface_config( 550 550 struct hubp *hubp, 551 551 enum surface_pixel_format format, 552 - union dc_tiling_info *tiling_info, 552 + struct dc_tiling_info *tiling_info, 553 553 struct plane_size *plane_size, 554 554 enum dc_rotation_angle rotation, 555 555 struct dc_plane_dcc_param *dcc,
+2 -2
drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h
··· 706 706 void hubp1_program_surface_config( 707 707 struct hubp *hubp, 708 708 enum surface_pixel_format format, 709 - union dc_tiling_info *tiling_info, 709 + struct dc_tiling_info *tiling_info, 710 710 struct plane_size *plane_size, 711 711 enum dc_rotation_angle rotation, 712 712 struct dc_plane_dcc_param *dcc, ··· 739 739 740 740 void hubp1_program_tiling( 741 741 struct hubp *hubp, 742 - const union dc_tiling_info *info, 742 + const struct dc_tiling_info *info, 743 743 const enum surface_pixel_format pixel_format); 744 744 745 745 void hubp1_dcc_control(struct hubp *hubp,
+2 -2
drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c
··· 310 310 */ 311 311 static void hubp2_program_tiling( 312 312 struct dcn20_hubp *hubp2, 313 - const union dc_tiling_info *info, 313 + const struct dc_tiling_info *info, 314 314 const enum surface_pixel_format pixel_format) 315 315 { 316 316 REG_UPDATE_3(DCSURF_ADDR_CONFIG, ··· 550 550 void hubp2_program_surface_config( 551 551 struct hubp *hubp, 552 552 enum surface_pixel_format format, 553 - union dc_tiling_info *tiling_info, 553 + struct dc_tiling_info *tiling_info, 554 554 struct plane_size *plane_size, 555 555 enum dc_rotation_angle rotation, 556 556 struct dc_plane_dcc_param *dcc,
+1 -1
drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h
··· 382 382 void hubp2_program_surface_config( 383 383 struct hubp *hubp, 384 384 enum surface_pixel_format format, 385 - union dc_tiling_info *tiling_info, 385 + struct dc_tiling_info *tiling_info, 386 386 struct plane_size *plane_size, 387 387 enum dc_rotation_angle rotation, 388 388 struct dc_plane_dcc_param *dcc,
+1 -1
drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c
··· 42 42 static void hubp201_program_surface_config( 43 43 struct hubp *hubp, 44 44 enum surface_pixel_format format, 45 - union dc_tiling_info *tiling_info, 45 + struct dc_tiling_info *tiling_info, 46 46 struct plane_size *plane_size, 47 47 enum dc_rotation_angle rotation, 48 48 struct dc_plane_dcc_param *dcc,
+2 -2
drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c
··· 318 318 319 319 void hubp3_program_tiling( 320 320 struct dcn20_hubp *hubp2, 321 - const union dc_tiling_info *info, 321 + const struct dc_tiling_info *info, 322 322 const enum surface_pixel_format pixel_format) 323 323 { 324 324 REG_UPDATE_4(DCSURF_ADDR_CONFIG, ··· 411 411 void hubp3_program_surface_config( 412 412 struct hubp *hubp, 413 413 enum surface_pixel_format format, 414 - union dc_tiling_info *tiling_info, 414 + struct dc_tiling_info *tiling_info, 415 415 struct plane_size *plane_size, 416 416 enum dc_rotation_angle rotation, 417 417 struct dc_plane_dcc_param *dcc,
+2 -2
drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h
··· 264 264 void hubp3_program_surface_config( 265 265 struct hubp *hubp, 266 266 enum surface_pixel_format format, 267 - union dc_tiling_info *tiling_info, 267 + struct dc_tiling_info *tiling_info, 268 268 struct plane_size *plane_size, 269 269 enum dc_rotation_angle rotation, 270 270 struct dc_plane_dcc_param *dcc, ··· 280 280 281 281 void hubp3_program_tiling( 282 282 struct dcn20_hubp *hubp2, 283 - const union dc_tiling_info *info, 283 + const struct dc_tiling_info *info, 284 284 const enum surface_pixel_format pixel_format); 285 285 286 286 void hubp3_dcc_control(struct hubp *hubp, bool enable,
+1 -1
drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c
··· 172 172 void hubp35_program_surface_config( 173 173 struct hubp *hubp, 174 174 enum surface_pixel_format format, 175 - union dc_tiling_info *tiling_info, 175 + struct dc_tiling_info *tiling_info, 176 176 struct plane_size *plane_size, 177 177 enum dc_rotation_angle rotation, 178 178 struct dc_plane_dcc_param *dcc,
+1 -1
drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.h
··· 65 65 void hubp35_program_surface_config( 66 66 struct hubp *hubp, 67 67 enum surface_pixel_format format, 68 - union dc_tiling_info *tiling_info, 68 + struct dc_tiling_info *tiling_info, 69 69 struct plane_size *plane_size, 70 70 enum dc_rotation_angle rotation, 71 71 struct dc_plane_dcc_param *dcc,
+54 -42
drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c
··· 145 145 } 146 146 147 147 void hubp401_vready_at_or_After_vsync(struct hubp *hubp, 148 - struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest) 148 + union dml2_global_sync_programming *pipe_global_sync, 149 + struct dc_crtc_timing *timing) 149 150 { 150 - uint32_t value = 0; 151 + unsigned int vstartup_lines = pipe_global_sync->dcn4x.vstartup_lines; 152 + unsigned int vupdate_offset_pixels = pipe_global_sync->dcn4x.vupdate_offset_pixels; 153 + unsigned int vupdate_width_pixels = pipe_global_sync->dcn4x.vupdate_vupdate_width_pixels; 154 + unsigned int vready_offset_pixels = pipe_global_sync->dcn4x.vready_offset_pixels; 155 + unsigned int htotal = timing->h_total; 156 + unsigned int vblank_start = 0; 157 + unsigned int vblank_end = 0; 158 + unsigned int pixel_width = 0; 159 + uint32_t reg_value = 0; 160 + bool is_vready_at_or_after_vsync = false; 151 161 struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); 162 + 152 163 /* 153 164 * if (VSTARTUP_START - (VREADY_OFFSET+VUPDATE_WIDTH+VUPDATE_OFFSET)/htotal) <= OTG_V_BLANK_END 154 165 * Set HUBP_VREADY_AT_OR_AFTER_VSYNC = 1 155 166 * else 156 167 * Set HUBP_VREADY_AT_OR_AFTER_VSYNC = 0 157 168 */ 158 - if (pipe_dest->htotal != 0) { 159 - if ((pipe_dest->vstartup_start - (pipe_dest->vready_offset+pipe_dest->vupdate_width 160 - + pipe_dest->vupdate_offset) / pipe_dest->htotal) <= pipe_dest->vblank_end) { 161 - value = 1; 162 - } else 163 - value = 0; 169 + if (htotal != 0) { 170 + vblank_start = timing->v_total - timing->v_front_porch; 171 + vblank_end = vblank_start - timing->v_addressable - timing->v_border_top - timing->v_border_bottom; 172 + pixel_width = vready_offset_pixels + vupdate_width_pixels + vupdate_offset_pixels; 173 + 174 + is_vready_at_or_after_vsync = (vstartup_lines - pixel_width / htotal) <= vblank_end; 175 + 176 + if (is_vready_at_or_after_vsync) 177 + reg_value = 1; 164 178 } 165 179 166 - REG_UPDATE(DCHUBP_CNTL, HUBP_VREADY_AT_OR_AFTER_VSYNC, value); 180 + REG_UPDATE(DCHUBP_CNTL, HUBP_VREADY_AT_OR_AFTER_VSYNC, reg_value); 167 181 } 168 182 169 183 void hubp401_program_requestor( 170 184 struct hubp *hubp, 171 - struct _vcs_dpi_display_rq_regs_st *rq_regs) 185 + struct dml2_display_rq_regs *rq_regs) 172 186 { 173 187 struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); 174 188 ··· 210 196 211 197 void hubp401_program_deadline( 212 198 struct hubp *hubp, 213 - struct _vcs_dpi_display_dlg_regs_st *dlg_attr, 214 - struct _vcs_dpi_display_ttu_regs_st *ttu_attr) 199 + struct dml2_display_dlg_regs *dlg_attr, 200 + struct dml2_display_ttu_regs *ttu_attr) 215 201 { 216 202 struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); 217 203 ··· 308 294 309 295 void hubp401_setup( 310 296 struct hubp *hubp, 311 - struct _vcs_dpi_display_dlg_regs_st *dlg_attr, 312 - struct _vcs_dpi_display_ttu_regs_st *ttu_attr, 313 - struct _vcs_dpi_display_rq_regs_st *rq_regs, 314 - struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest) 297 + struct dml2_dchub_per_pipe_register_set *pipe_regs, 298 + union dml2_global_sync_programming *pipe_global_sync, 299 + struct dc_crtc_timing *timing) 315 300 { 316 301 /* otg is locked when this func is called. Register are double buffered. 317 302 * disable the requestors is not needed 318 303 */ 319 - hubp401_vready_at_or_After_vsync(hubp, pipe_dest); 320 - hubp401_program_requestor(hubp, rq_regs); 321 - hubp401_program_deadline(hubp, dlg_attr, ttu_attr); 304 + hubp401_vready_at_or_After_vsync(hubp, pipe_global_sync, timing); 305 + hubp401_program_requestor(hubp, &pipe_regs->rq_regs); 306 + hubp401_program_deadline(hubp, &pipe_regs->dlg_regs, &pipe_regs->ttu_regs); 322 307 } 323 308 324 309 void hubp401_setup_interdependent( 325 310 struct hubp *hubp, 326 - struct _vcs_dpi_display_dlg_regs_st *dlg_attr, 327 - struct _vcs_dpi_display_ttu_regs_st *ttu_attr) 311 + struct dml2_dchub_per_pipe_register_set *pipe_regs) 328 312 { 329 313 struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); 330 314 331 315 REG_SET_2(PREFETCH_SETTINGS, 0, 332 - DST_Y_PREFETCH, dlg_attr->dst_y_prefetch, 333 - VRATIO_PREFETCH, dlg_attr->vratio_prefetch); 316 + DST_Y_PREFETCH, pipe_regs->dlg_regs.dst_y_prefetch, 317 + VRATIO_PREFETCH, pipe_regs->dlg_regs.vratio_prefetch); 334 318 335 319 REG_SET(PREFETCH_SETTINGS_C, 0, 336 - VRATIO_PREFETCH_C, dlg_attr->vratio_prefetch_c); 320 + VRATIO_PREFETCH_C, pipe_regs->dlg_regs.vratio_prefetch_c); 337 321 338 322 REG_SET_2(VBLANK_PARAMETERS_0, 0, 339 - DST_Y_PER_VM_VBLANK, dlg_attr->dst_y_per_vm_vblank, 340 - DST_Y_PER_ROW_VBLANK, dlg_attr->dst_y_per_row_vblank); 323 + DST_Y_PER_VM_VBLANK, pipe_regs->dlg_regs.dst_y_per_vm_vblank, 324 + DST_Y_PER_ROW_VBLANK, pipe_regs->dlg_regs.dst_y_per_row_vblank); 341 325 342 326 REG_SET_2(FLIP_PARAMETERS_0, 0, 343 - DST_Y_PER_VM_FLIP, dlg_attr->dst_y_per_vm_flip, 344 - DST_Y_PER_ROW_FLIP, dlg_attr->dst_y_per_row_flip); 327 + DST_Y_PER_VM_FLIP, pipe_regs->dlg_regs.dst_y_per_vm_flip, 328 + DST_Y_PER_ROW_FLIP, pipe_regs->dlg_regs.dst_y_per_row_flip); 345 329 346 330 REG_SET(VBLANK_PARAMETERS_3, 0, 347 - REFCYC_PER_META_CHUNK_VBLANK_L, dlg_attr->refcyc_per_meta_chunk_vblank_l); 331 + REFCYC_PER_META_CHUNK_VBLANK_L, pipe_regs->dlg_regs.refcyc_per_meta_chunk_vblank_l); 348 332 349 333 REG_SET(VBLANK_PARAMETERS_4, 0, 350 - REFCYC_PER_META_CHUNK_VBLANK_C, dlg_attr->refcyc_per_meta_chunk_vblank_c); 334 + REFCYC_PER_META_CHUNK_VBLANK_C, pipe_regs->dlg_regs.refcyc_per_meta_chunk_vblank_c); 351 335 352 336 REG_SET(FLIP_PARAMETERS_2, 0, 353 - REFCYC_PER_META_CHUNK_FLIP_L, dlg_attr->refcyc_per_meta_chunk_flip_l); 337 + REFCYC_PER_META_CHUNK_FLIP_L, pipe_regs->dlg_regs.refcyc_per_meta_chunk_flip_l); 354 338 355 339 REG_SET_2(PER_LINE_DELIVERY_PRE, 0, 356 - REFCYC_PER_LINE_DELIVERY_PRE_L, dlg_attr->refcyc_per_line_delivery_pre_l, 357 - REFCYC_PER_LINE_DELIVERY_PRE_C, dlg_attr->refcyc_per_line_delivery_pre_c); 340 + REFCYC_PER_LINE_DELIVERY_PRE_L, pipe_regs->dlg_regs.refcyc_per_line_delivery_pre_l, 341 + REFCYC_PER_LINE_DELIVERY_PRE_C, pipe_regs->dlg_regs.refcyc_per_line_delivery_pre_c); 358 342 359 343 REG_SET(DCN_SURF0_TTU_CNTL1, 0, 360 344 REFCYC_PER_REQ_DELIVERY_PRE, 361 - ttu_attr->refcyc_per_req_delivery_pre_l); 345 + pipe_regs->ttu_regs.refcyc_per_req_delivery_pre_l); 362 346 REG_SET(DCN_SURF1_TTU_CNTL1, 0, 363 347 REFCYC_PER_REQ_DELIVERY_PRE, 364 - ttu_attr->refcyc_per_req_delivery_pre_c); 348 + pipe_regs->ttu_regs.refcyc_per_req_delivery_pre_c); 365 349 REG_SET(DCN_CUR0_TTU_CNTL1, 0, 366 - REFCYC_PER_REQ_DELIVERY_PRE, ttu_attr->refcyc_per_req_delivery_pre_cur0); 350 + REFCYC_PER_REQ_DELIVERY_PRE, pipe_regs->ttu_regs.refcyc_per_req_delivery_pre_cur0); 367 351 368 352 REG_SET_2(DCN_GLOBAL_TTU_CNTL, 0, 369 - MIN_TTU_VBLANK, ttu_attr->min_ttu_vblank, 370 - QoS_LEVEL_FLIP, ttu_attr->qos_level_flip); 353 + MIN_TTU_VBLANK, pipe_regs->ttu_regs.min_ttu_vblank, 354 + QoS_LEVEL_FLIP, pipe_regs->ttu_regs.qos_level_flip); 371 355 } 372 356 373 357 ··· 544 532 545 533 void hubp401_program_tiling( 546 534 struct dcn20_hubp *hubp2, 547 - const union dc_tiling_info *info, 535 + const struct dc_tiling_info *info, 548 536 const enum surface_pixel_format pixel_format) 549 537 { 550 538 /* DCSURF_ADDR_CONFIG still shows up in reg spec, but does not need to be programmed for DCN4x ··· 592 580 void hubp401_program_surface_config( 593 581 struct hubp *hubp, 594 582 enum surface_pixel_format format, 595 - union dc_tiling_info *tiling_info, 583 + struct dc_tiling_info *tiling_info, 596 584 struct plane_size *plane_size, 597 585 enum dc_rotation_angle rotation, 598 586 struct dc_plane_dcc_param *dcc, ··· 993 981 .hubp_program_surface_flip_and_addr = hubp401_program_surface_flip_and_addr, 994 982 .hubp_program_surface_config = hubp401_program_surface_config, 995 983 .hubp_is_flip_pending = hubp2_is_flip_pending, 996 - .hubp_setup = hubp401_setup, 997 - .hubp_setup_interdependent = hubp401_setup_interdependent, 984 + .hubp_setup2 = hubp401_setup, 985 + .hubp_setup_interdependent2 = hubp401_setup_interdependent, 998 986 .hubp_set_vm_system_aperture_settings = hubp3_set_vm_system_aperture_settings, 999 987 .set_blank = hubp2_set_blank, 1000 988 .set_blank_regs = hubp2_set_blank_regs,
+19 -20
drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h
··· 256 256 257 257 void hubp401_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor); 258 258 259 - void hubp401_vready_at_or_After_vsync(struct hubp *hubp, 260 - struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest); 261 - 262 - void hubp401_program_requestor( 263 - struct hubp *hubp, 264 - struct _vcs_dpi_display_rq_regs_st *rq_regs); 265 - 266 - void hubp401_program_deadline( 267 - struct hubp *hubp, 268 - struct _vcs_dpi_display_dlg_regs_st *dlg_attr, 269 - struct _vcs_dpi_display_ttu_regs_st *ttu_attr); 270 - 271 259 void hubp401_setup( 272 260 struct hubp *hubp, 273 - struct _vcs_dpi_display_dlg_regs_st *dlg_attr, 274 - struct _vcs_dpi_display_ttu_regs_st *ttu_attr, 275 - struct _vcs_dpi_display_rq_regs_st *rq_regs, 276 - struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest); 261 + struct dml2_dchub_per_pipe_register_set *pipe_regs, 262 + union dml2_global_sync_programming *pipe_global_sync, 263 + struct dc_crtc_timing *timing); 277 264 278 265 void hubp401_setup_interdependent( 279 266 struct hubp *hubp, 280 - struct _vcs_dpi_display_dlg_regs_st *dlg_attr, 281 - struct _vcs_dpi_display_ttu_regs_st *ttu_attr); 267 + struct dml2_dchub_per_pipe_register_set *pipe_regs); 282 268 283 269 bool hubp401_program_surface_flip_and_addr( 284 270 struct hubp *hubp, ··· 276 290 277 291 void hubp401_program_tiling( 278 292 struct dcn20_hubp *hubp2, 279 - const union dc_tiling_info *info, 293 + const struct dc_tiling_info *info, 280 294 const enum surface_pixel_format pixel_format); 281 295 282 296 void hubp401_program_size( ··· 288 302 void hubp401_program_surface_config( 289 303 struct hubp *hubp, 290 304 enum surface_pixel_format format, 291 - union dc_tiling_info *tiling_info, 305 + struct dc_tiling_info *tiling_info, 292 306 struct plane_size *plane_size, 293 307 enum dc_rotation_angle rotation, 294 308 struct dc_plane_dcc_param *dcc, ··· 350 364 void hubp401_program_3dlut_fl_mode(struct hubp *hubp, enum hubp_3dlut_fl_mode mode); 351 365 352 366 void hubp401_clear_tiling(struct hubp *hubp); 367 + 368 + void hubp401_vready_at_or_After_vsync(struct hubp *hubp, 369 + union dml2_global_sync_programming *pipe_global_sync, 370 + struct dc_crtc_timing *timing); 371 + 372 + void hubp401_program_requestor( 373 + struct hubp *hubp, 374 + struct dml2_display_rq_regs *rq_regs); 375 + 376 + void hubp401_program_deadline( 377 + struct hubp *hubp, 378 + struct dml2_display_dlg_regs *dlg_attr, 379 + struct dml2_display_ttu_regs *ttu_attr); 353 380 354 381 #endif /* __DC_HUBP_DCN401_H__ */
+145 -112
drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
··· 1288 1288 } 1289 1289 } 1290 1290 1291 - static void dcn20_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx, 1291 + void dcn20_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx, 1292 1292 struct dc_state *context) 1293 1293 { 1294 1294 //if (dc->debug.sanity_checks) { ··· 1467 1467 } 1468 1468 } 1469 1469 1470 - static void dcn20_detect_pipe_changes(struct dc_state *old_state, 1470 + void dcn20_detect_pipe_changes(struct dc_state *old_state, 1471 1471 struct dc_state *new_state, 1472 1472 struct pipe_ctx *old_pipe, 1473 1473 struct pipe_ctx *new_pipe) ··· 1655 1655 } 1656 1656 } 1657 1657 1658 - static void dcn20_update_dchubp_dpp( 1658 + void dcn20_update_dchubp_dpp( 1659 1659 struct dc *dc, 1660 1660 struct pipe_ctx *pipe_ctx, 1661 1661 struct dc_state *context) ··· 1678 1678 * VTG is within DCHUBBUB which is commond block share by each pipe HUBP. 1679 1679 * VTG is 1:1 mapping with OTG. Each pipe HUBP will select which VTG 1680 1680 */ 1681 + 1681 1682 if (pipe_ctx->update_flags.bits.hubp_rq_dlg_ttu) { 1682 1683 hubp->funcs->hubp_vtg_sel(hubp, pipe_ctx->stream_res.tg->inst); 1683 1684 1684 - hubp->funcs->hubp_setup( 1685 - hubp, 1686 - &pipe_ctx->dlg_regs, 1687 - &pipe_ctx->ttu_regs, 1688 - &pipe_ctx->rq_regs, 1689 - &pipe_ctx->pipe_dlg_param); 1685 + if (hubp->funcs->hubp_setup2) { 1686 + hubp->funcs->hubp_setup2( 1687 + hubp, 1688 + &pipe_ctx->hubp_regs, 1689 + &pipe_ctx->global_sync, 1690 + &pipe_ctx->stream->timing); 1691 + } else { 1692 + hubp->funcs->hubp_setup( 1693 + hubp, 1694 + &pipe_ctx->dlg_regs, 1695 + &pipe_ctx->ttu_regs, 1696 + &pipe_ctx->rq_regs, 1697 + &pipe_ctx->pipe_dlg_param); 1698 + } 1690 1699 } 1691 1700 1692 1701 if (pipe_ctx->update_flags.bits.unbounded_req && hubp->funcs->set_unbounded_requesting) 1693 1702 hubp->funcs->set_unbounded_requesting(hubp, pipe_ctx->unbounded_req); 1694 1703 1695 - if (pipe_ctx->update_flags.bits.hubp_interdependent) 1696 - hubp->funcs->hubp_setup_interdependent( 1697 - hubp, 1698 - &pipe_ctx->dlg_regs, 1699 - &pipe_ctx->ttu_regs); 1704 + if (pipe_ctx->update_flags.bits.hubp_interdependent) { 1705 + if (hubp->funcs->hubp_setup_interdependent2) { 1706 + hubp->funcs->hubp_setup_interdependent2( 1707 + hubp, 1708 + &pipe_ctx->hubp_regs); 1709 + } else { 1710 + hubp->funcs->hubp_setup_interdependent( 1711 + hubp, 1712 + &pipe_ctx->dlg_regs, 1713 + &pipe_ctx->ttu_regs); 1714 + } 1715 + } 1700 1716 1701 1717 if (pipe_ctx->update_flags.bits.enable || 1702 1718 pipe_ctx->update_flags.bits.plane_changed || ··· 1772 1756 &pipe_ctx->plane_res.scl_data.viewport_c); 1773 1757 viewport_changed = true; 1774 1758 } 1775 - if (hubp->funcs->hubp_program_mcache_id_and_split_coordinate) 1776 - hubp->funcs->hubp_program_mcache_id_and_split_coordinate( 1777 - hubp, 1778 - &pipe_ctx->mcache_regs); 1759 + 1760 + if (hubp->funcs->hubp_program_mcache_id_and_split_coordinate) 1761 + hubp->funcs->hubp_program_mcache_id_and_split_coordinate(hubp, &pipe_ctx->mcache_regs); 1779 1762 1780 1763 /* Any updates are handled in dc interface, just need to apply existing for plane enable */ 1781 1764 if ((pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed || ··· 1853 1838 hubp->funcs->phantom_hubp_post_enable(hubp); 1854 1839 } 1855 1840 1856 - static int calculate_vready_offset_for_group(struct pipe_ctx *pipe) 1841 + static int dcn20_calculate_vready_offset_for_group(struct pipe_ctx *pipe) 1857 1842 { 1858 1843 struct pipe_ctx *other_pipe; 1859 1844 int vready_offset = pipe->pipe_dlg_param.vready_offset; ··· 1879 1864 return vready_offset; 1880 1865 } 1881 1866 1867 + static void dcn20_program_tg( 1868 + struct dc *dc, 1869 + struct pipe_ctx *pipe_ctx, 1870 + struct dc_state *context, 1871 + struct dce_hwseq *hws) 1872 + { 1873 + pipe_ctx->stream_res.tg->funcs->program_global_sync( 1874 + pipe_ctx->stream_res.tg, 1875 + dcn20_calculate_vready_offset_for_group(pipe_ctx), 1876 + pipe_ctx->pipe_dlg_param.vstartup_start, 1877 + pipe_ctx->pipe_dlg_param.vupdate_offset, 1878 + pipe_ctx->pipe_dlg_param.vupdate_width, 1879 + pipe_ctx->pipe_dlg_param.pstate_keepout); 1880 + 1881 + if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) 1882 + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); 1883 + 1884 + pipe_ctx->stream_res.tg->funcs->set_vtg_params( 1885 + pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true); 1886 + 1887 + if (hws->funcs.setup_vupdate_interrupt) 1888 + hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx); 1889 + } 1890 + 1882 1891 static void dcn20_program_pipe( 1883 1892 struct dc *dc, 1884 1893 struct pipe_ctx *pipe_ctx, ··· 1913 1874 /* Only need to unblank on top pipe */ 1914 1875 if (resource_is_pipe_type(pipe_ctx, OTG_MASTER)) { 1915 1876 if (pipe_ctx->update_flags.bits.enable || 1916 - pipe_ctx->update_flags.bits.odm || 1917 - pipe_ctx->stream->update_flags.bits.abm_level) 1877 + pipe_ctx->update_flags.bits.odm || 1878 + pipe_ctx->stream->update_flags.bits.abm_level) 1918 1879 hws->funcs.blank_pixel_data(dc, pipe_ctx, 1919 - !pipe_ctx->plane_state || 1920 - !pipe_ctx->plane_state->visible); 1880 + !pipe_ctx->plane_state || 1881 + !pipe_ctx->plane_state->visible); 1921 1882 } 1922 1883 1923 1884 /* Only update TG on top pipe */ 1924 1885 if (pipe_ctx->update_flags.bits.global_sync && !pipe_ctx->top_pipe 1925 - && !pipe_ctx->prev_odm_pipe) { 1926 - pipe_ctx->stream_res.tg->funcs->program_global_sync( 1927 - pipe_ctx->stream_res.tg, 1928 - calculate_vready_offset_for_group(pipe_ctx), 1929 - pipe_ctx->pipe_dlg_param.vstartup_start, 1930 - pipe_ctx->pipe_dlg_param.vupdate_offset, 1931 - pipe_ctx->pipe_dlg_param.vupdate_width, 1932 - pipe_ctx->pipe_dlg_param.pstate_keepout); 1933 - 1934 - if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) 1935 - pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); 1936 - 1937 - pipe_ctx->stream_res.tg->funcs->set_vtg_params( 1938 - pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true); 1939 - 1940 - if (hws->funcs.setup_vupdate_interrupt) 1941 - hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx); 1942 - } 1886 + && !pipe_ctx->prev_odm_pipe) 1887 + dcn20_program_tg(dc, pipe_ctx, context, hws); 1943 1888 1944 1889 if (pipe_ctx->update_flags.bits.odm) 1945 1890 hws->funcs.update_odm(dc, context, pipe_ctx); ··· 1954 1931 dcn20_update_dchubp_dpp(dc, pipe_ctx, context); 1955 1932 1956 1933 if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable || 1957 - pipe_ctx->plane_state->update_flags.bits.hdr_mult)) 1934 + pipe_ctx->plane_state->update_flags.bits.hdr_mult)) 1958 1935 hws->funcs.set_hdr_multiplier(pipe_ctx); 1959 1936 1960 1937 if (hws->funcs.populate_mcm_luts) { 1961 1938 if (pipe_ctx->plane_state) { 1962 1939 hws->funcs.populate_mcm_luts(dc, pipe_ctx, pipe_ctx->plane_state->mcm_luts, 1963 - pipe_ctx->plane_state->lut_bank_a); 1940 + pipe_ctx->plane_state->lut_bank_a); 1964 1941 pipe_ctx->plane_state->lut_bank_a = !pipe_ctx->plane_state->lut_bank_a; 1965 1942 } 1966 1943 } 1967 1944 1968 1945 if (pipe_ctx->plane_state && 1969 - (pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || 1970 - pipe_ctx->plane_state->update_flags.bits.gamma_change || 1971 - pipe_ctx->plane_state->update_flags.bits.lut_3d || 1972 - pipe_ctx->update_flags.bits.enable)) 1946 + (pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || 1947 + pipe_ctx->plane_state->update_flags.bits.gamma_change || 1948 + pipe_ctx->plane_state->update_flags.bits.lut_3d || 1949 + pipe_ctx->update_flags.bits.enable)) 1973 1950 hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state); 1974 1951 1975 1952 /* dcn10_translate_regamma_to_hw_format takes 750us to finish ··· 1977 1954 * updating on slave planes 1978 1955 */ 1979 1956 if (pipe_ctx->update_flags.bits.enable || 1980 - pipe_ctx->update_flags.bits.plane_changed || 1981 - pipe_ctx->stream->update_flags.bits.out_tf || 1982 - (pipe_ctx->plane_state && 1983 - pipe_ctx->plane_state->update_flags.bits.output_tf_change)) 1957 + pipe_ctx->update_flags.bits.plane_changed || 1958 + pipe_ctx->stream->update_flags.bits.out_tf || 1959 + (pipe_ctx->plane_state && 1960 + pipe_ctx->plane_state->update_flags.bits.output_tf_change)) 1984 1961 hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream); 1985 1962 1986 1963 /* If the pipe has been enabled or has a different opp, we ··· 1989 1966 * causes a different pipe to be chosen to odm combine with. 1990 1967 */ 1991 1968 if (pipe_ctx->update_flags.bits.enable 1992 - || pipe_ctx->update_flags.bits.opp_changed) { 1969 + || pipe_ctx->update_flags.bits.opp_changed) { 1993 1970 1994 1971 pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion( 1995 1972 pipe_ctx->stream_res.opp, ··· 2019 1996 memset(&params, 0, sizeof(params)); 2020 1997 odm_opp->funcs->opp_program_bit_depth_reduction(odm_opp, &params); 2021 1998 dc->hwss.set_disp_pattern_generator(dc, 2022 - pipe_ctx, 2023 - pipe_ctx->stream_res.test_pattern_params.test_pattern, 2024 - pipe_ctx->stream_res.test_pattern_params.color_space, 2025 - pipe_ctx->stream_res.test_pattern_params.color_depth, 2026 - NULL, 2027 - pipe_ctx->stream_res.test_pattern_params.width, 2028 - pipe_ctx->stream_res.test_pattern_params.height, 2029 - pipe_ctx->stream_res.test_pattern_params.offset); 1999 + pipe_ctx, 2000 + pipe_ctx->stream_res.test_pattern_params.test_pattern, 2001 + pipe_ctx->stream_res.test_pattern_params.color_space, 2002 + pipe_ctx->stream_res.test_pattern_params.color_depth, 2003 + NULL, 2004 + pipe_ctx->stream_res.test_pattern_params.width, 2005 + pipe_ctx->stream_res.test_pattern_params.height, 2006 + pipe_ctx->stream_res.test_pattern_params.offset); 2030 2007 } 2031 2008 } 2032 2009 ··· 2035 2012 struct dc_state *context) 2036 2013 { 2037 2014 int i; 2038 - struct dce_hwseq *hws = dc->hwseq; 2039 - DC_LOGGER_INIT(dc->ctx->logger); 2040 2015 unsigned int prev_hubp_count = 0; 2041 2016 unsigned int hubp_count = 0; 2042 - struct pipe_ctx *pipe; 2017 + struct dce_hwseq *hws = dc->hwseq; 2018 + struct pipe_ctx *pipe = NULL; 2019 + 2020 + DC_LOGGER_INIT(dc->ctx->logger); 2043 2021 2044 2022 if (resource_is_pipe_topology_changed(dc->current_state, context)) 2045 2023 resource_log_pipe_topology_update(dc, context); ··· 2053 2029 ASSERT(!pipe->plane_state->triplebuffer_flips); 2054 2030 /*turn off triple buffer for full update*/ 2055 2031 dc->hwss.program_triplebuffer( 2056 - dc, pipe, pipe->plane_state->triplebuffer_flips); 2032 + dc, pipe, pipe->plane_state->triplebuffer_flips); 2057 2033 } 2058 2034 } 2059 2035 } ··· 2068 2044 if (prev_hubp_count == 0 && hubp_count > 0) { 2069 2045 if (dc->res_pool->hubbub->funcs->force_pstate_change_control) 2070 2046 dc->res_pool->hubbub->funcs->force_pstate_change_control( 2071 - dc->res_pool->hubbub, true, false); 2047 + dc->res_pool->hubbub, true, false); 2072 2048 udelay(500); 2073 2049 } 2074 2050 2075 2051 /* Set pipe update flags and lock pipes */ 2076 2052 for (i = 0; i < dc->res_pool->pipe_count; i++) 2077 2053 dcn20_detect_pipe_changes(dc->current_state, context, &dc->current_state->res_ctx.pipe_ctx[i], 2078 - &context->res_ctx.pipe_ctx[i]); 2054 + &context->res_ctx.pipe_ctx[i]); 2079 2055 2080 2056 /* When disabling phantom pipes, turn on phantom OTG first (so we can get double 2081 2057 * buffer updates properly) 2082 2058 */ 2083 2059 for (i = 0; i < dc->res_pool->pipe_count; i++) { 2084 2060 struct dc_stream_state *stream = dc->current_state->res_ctx.pipe_ctx[i].stream; 2061 + 2085 2062 pipe = &dc->current_state->res_ctx.pipe_ctx[i]; 2086 2063 2087 2064 if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable && stream && 2088 - dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_PHANTOM) { 2065 + dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_PHANTOM) { 2089 2066 struct timing_generator *tg = dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg; 2090 2067 2091 2068 if (tg->funcs->enable_crtc) { 2092 - if (dc->hwseq->funcs.blank_pixel_data) { 2069 + if (dc->hwseq->funcs.blank_pixel_data) 2093 2070 dc->hwseq->funcs.blank_pixel_data(dc, pipe, true); 2094 - } 2071 + 2095 2072 tg->funcs->enable_crtc(tg); 2096 2073 } 2097 2074 } ··· 2100 2075 /* OTG blank before disabling all front ends */ 2101 2076 for (i = 0; i < dc->res_pool->pipe_count; i++) 2102 2077 if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable 2103 - && !context->res_ctx.pipe_ctx[i].top_pipe 2104 - && !context->res_ctx.pipe_ctx[i].prev_odm_pipe 2105 - && context->res_ctx.pipe_ctx[i].stream) 2078 + && !context->res_ctx.pipe_ctx[i].top_pipe 2079 + && !context->res_ctx.pipe_ctx[i].prev_odm_pipe 2080 + && context->res_ctx.pipe_ctx[i].stream) 2106 2081 hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true); 2107 2082 2108 2083 /* Disconnect mpcc */ 2109 2084 for (i = 0; i < dc->res_pool->pipe_count; i++) 2110 2085 if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable 2111 - || context->res_ctx.pipe_ctx[i].update_flags.bits.opp_changed) { 2086 + || context->res_ctx.pipe_ctx[i].update_flags.bits.opp_changed) { 2112 2087 struct hubbub *hubbub = dc->res_pool->hubbub; 2113 2088 2114 2089 /* Phantom pipe DET should be 0, but if a pipe in use is being transitioned to phantom ··· 2118 2093 * DET allocation. 2119 2094 */ 2120 2095 if ((context->res_ctx.pipe_ctx[i].update_flags.bits.disable || 2121 - (context->res_ctx.pipe_ctx[i].plane_state && dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM))) { 2096 + (context->res_ctx.pipe_ctx[i].plane_state && 2097 + dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) 2098 + == SUBVP_PHANTOM))) { 2122 2099 if (hubbub->funcs->program_det_size) 2123 - hubbub->funcs->program_det_size(hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0); 2100 + hubbub->funcs->program_det_size(hubbub, 2101 + dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0); 2124 2102 if (dc->res_pool->hubbub->funcs->program_det_segments) 2125 - dc->res_pool->hubbub->funcs->program_det_segments(hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0); 2103 + dc->res_pool->hubbub->funcs->program_det_segments( 2104 + hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0); 2126 2105 } 2127 - hws->funcs.plane_atomic_disconnect(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]); 2106 + hws->funcs.plane_atomic_disconnect(dc, dc->current_state, 2107 + &dc->current_state->res_ctx.pipe_ctx[i]); 2128 2108 DC_LOG_DC("Reset mpcc for pipe %d\n", dc->current_state->res_ctx.pipe_ctx[i].pipe_idx); 2129 2109 } 2130 2110 ··· 2137 2107 for (i = 0; i < dc->res_pool->pipe_count; i++) { 2138 2108 pipe = &context->res_ctx.pipe_ctx[i]; 2139 2109 if (resource_is_pipe_type(pipe, OTG_MASTER) && 2140 - !resource_is_pipe_type(pipe, DPP_PIPE) && 2141 - pipe->update_flags.bits.odm && 2142 - hws->funcs.update_odm) 2110 + !resource_is_pipe_type(pipe, DPP_PIPE) && 2111 + pipe->update_flags.bits.odm && 2112 + hws->funcs.update_odm) 2143 2113 hws->funcs.update_odm(dc, context, pipe); 2144 2114 } 2145 2115 ··· 2157 2127 else { 2158 2128 /* Don't program phantom pipes in the regular front end programming sequence. 2159 2129 * There is an MPO transition case where a pipe being used by a video plane is 2160 - * transitioned directly to be a phantom pipe when closing the MPO video. However 2161 - * the phantom pipe will program a new HUBP_VTG_SEL (update takes place right away), 2162 - * but the MPO still exists until the double buffered update of the main pipe so we 2163 - * will get a frame of underflow if the phantom pipe is programmed here. 2130 + * transitioned directly to be a phantom pipe when closing the MPO video. 2131 + * However the phantom pipe will program a new HUBP_VTG_SEL (update takes place 2132 + * right away) but the MPO still exists until the double buffered update of the 2133 + * main pipe so we will get a frame of underflow if the phantom pipe is 2134 + * programmed here. 2164 2135 */ 2165 - if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) 2136 + if (pipe->stream && 2137 + dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) 2166 2138 dcn20_program_pipe(dc, pipe, context); 2167 2139 } 2168 2140 2169 2141 pipe = pipe->bottom_pipe; 2170 2142 } 2171 2143 } 2144 + 2172 2145 /* Program secondary blending tree and writeback pipes */ 2173 2146 pipe = &context->res_ctx.pipe_ctx[i]; 2174 2147 if (!pipe->top_pipe && !pipe->prev_odm_pipe 2175 - && pipe->stream && pipe->stream->num_wb_info > 0 2176 - && (pipe->update_flags.raw || (pipe->plane_state && pipe->plane_state->update_flags.raw) 2177 - || pipe->stream->update_flags.raw) 2178 - && hws->funcs.program_all_writeback_pipes_in_tree) 2148 + && pipe->stream && pipe->stream->num_wb_info > 0 2149 + && (pipe->update_flags.raw || (pipe->plane_state && pipe->plane_state->update_flags.raw) 2150 + || pipe->stream->update_flags.raw) 2151 + && hws->funcs.program_all_writeback_pipes_in_tree) 2179 2152 hws->funcs.program_all_writeback_pipes_in_tree(dc, pipe->stream, context); 2180 2153 2181 2154 /* Avoid underflow by check of pipe line read when adding 2nd plane. */ ··· 2197 2164 * buffered pending status clear and reset opp head pipe's none double buffered 2198 2165 * registers to their initial state. 2199 2166 */ 2200 - static void post_unlock_reset_opp(struct dc *dc, 2167 + void dcn20_post_unlock_reset_opp(struct dc *dc, 2201 2168 struct pipe_ctx *opp_head) 2202 2169 { 2203 2170 struct display_stream_compressor *dsc = opp_head->stream_res.dsc; ··· 2234 2201 struct dc *dc, 2235 2202 struct dc_state *context) 2236 2203 { 2237 - int i; 2238 - const unsigned int TIMEOUT_FOR_PIPE_ENABLE_US = 100000; 2204 + // Timeout for pipe enable 2205 + unsigned int timeout_us = 100000; 2239 2206 unsigned int polling_interval_us = 1; 2240 2207 struct dce_hwseq *hwseq = dc->hwseq; 2208 + int i; 2241 2209 2242 2210 for (i = 0; i < dc->res_pool->pipe_count; i++) 2243 2211 if (resource_is_pipe_type(&dc->current_state->res_ctx.pipe_ctx[i], OPP_HEAD) && 2244 - !resource_is_pipe_type(&context->res_ctx.pipe_ctx[i], OPP_HEAD)) 2245 - post_unlock_reset_opp(dc, 2246 - &dc->current_state->res_ctx.pipe_ctx[i]); 2212 + !resource_is_pipe_type(&context->res_ctx.pipe_ctx[i], OPP_HEAD)) 2213 + dcn20_post_unlock_reset_opp(dc, 2214 + &dc->current_state->res_ctx.pipe_ctx[i]); 2247 2215 2248 2216 for (i = 0; i < dc->res_pool->pipe_count; i++) 2249 2217 if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable) ··· 2260 2226 struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; 2261 2227 // Don't check flip pending on phantom pipes 2262 2228 if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable && 2263 - dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) { 2229 + dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) { 2264 2230 struct hubp *hubp = pipe->plane_res.hubp; 2265 2231 int j = 0; 2266 - for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_US / polling_interval_us 2267 - && hubp->funcs->hubp_is_flip_pending(hubp); j++) 2232 + 2233 + for (j = 0; j < timeout_us / polling_interval_us 2234 + && hubp->funcs->hubp_is_flip_pending(hubp); j++) 2268 2235 udelay(polling_interval_us); 2269 2236 } 2270 2237 } ··· 2279 2244 * before we've transitioned to 2:1 or 4:1 2280 2245 */ 2281 2246 if (resource_is_pipe_type(old_pipe, OTG_MASTER) && resource_is_pipe_type(pipe, OTG_MASTER) && 2282 - resource_get_odm_slice_count(old_pipe) < resource_get_odm_slice_count(pipe) && 2283 - dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) { 2247 + resource_get_odm_slice_count(old_pipe) < resource_get_odm_slice_count(pipe) && 2248 + dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) { 2284 2249 int j = 0; 2285 2250 struct timing_generator *tg = pipe->stream_res.tg; 2286 2251 2287 - 2288 2252 if (tg->funcs->get_optc_double_buffer_pending) { 2289 - for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_US / polling_interval_us 2290 - && tg->funcs->get_optc_double_buffer_pending(tg); j++) 2253 + for (j = 0; j < timeout_us / polling_interval_us 2254 + && tg->funcs->get_optc_double_buffer_pending(tg); j++) 2291 2255 udelay(polling_interval_us); 2292 2256 } 2293 2257 } ··· 2294 2260 2295 2261 if (dc->res_pool->hubbub->funcs->force_pstate_change_control) 2296 2262 dc->res_pool->hubbub->funcs->force_pstate_change_control( 2297 - dc->res_pool->hubbub, false, false); 2263 + dc->res_pool->hubbub, false, false); 2298 2264 2299 2265 for (i = 0; i < dc->res_pool->pipe_count; i++) { 2300 2266 struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; ··· 2325 2291 return; 2326 2292 2327 2293 /* P-State support transitions: 2328 - * Natural -> FPO: P-State disabled in prepare, force disallow anytime is safe 2329 - * FPO -> Natural: Unforce anytime after FW disable is safe (P-State will assert naturally) 2330 - * Unsupported -> FPO: P-State enabled in optimize, force disallow anytime is safe 2331 - * FPO -> Unsupported: P-State disabled in prepare, unforce disallow anytime is safe 2332 - * FPO <-> SubVP: Force disallow is maintained on the FPO / SubVP pipes 2294 + * Natural -> FPO: P-State disabled in prepare, force disallow anytime is safe 2295 + * FPO -> Natural: Unforce anytime after FW disable is safe (P-State will assert naturally) 2296 + * Unsupported -> FPO: P-State enabled in optimize, force disallow anytime is safe 2297 + * FPO -> Unsupported: P-State disabled in prepare, unforce disallow anytime is safe 2298 + * FPO <-> SubVP: Force disallow is maintained on the FPO / SubVP pipes 2333 2299 */ 2334 2300 if (hwseq->funcs.update_force_pstate) 2335 2301 dc->hwseq->funcs.update_force_pstate(dc, context); ··· 2344 2310 if (hwseq->wa.DEGVIDCN21) 2345 2311 dc->res_pool->hubbub->funcs->apply_DEDCN21_147_wa(dc->res_pool->hubbub); 2346 2312 2347 - 2348 2313 /* WA for stutter underflow during MPO transitions when adding 2nd plane */ 2349 2314 if (hwseq->wa.disallow_self_refresh_during_multi_plane_transition) { 2350 2315 2351 2316 if (dc->current_state->stream_status[0].plane_count == 1 && 2352 - context->stream_status[0].plane_count > 1) { 2317 + context->stream_status[0].plane_count > 1) { 2353 2318 2354 2319 struct timing_generator *tg = dc->res_pool->timing_generators[0]; 2355 2320 ··· 2496 2463 2497 2464 pipe_ctx->stream_res.tg->funcs->program_global_sync( 2498 2465 pipe_ctx->stream_res.tg, 2499 - calculate_vready_offset_for_group(pipe_ctx), 2466 + dcn20_calculate_vready_offset_for_group(pipe_ctx), 2500 2467 pipe_ctx->pipe_dlg_param.vstartup_start, 2501 2468 pipe_ctx->pipe_dlg_param.vupdate_offset, 2502 2469 pipe_ctx->pipe_dlg_param.vupdate_width,
+16 -1
drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h
··· 154 154 const struct dc *dc, 155 155 struct pipe_ctx *pipe_ctx, 156 156 bool enable); 157 - 157 + void dcn20_detect_pipe_changes( 158 + struct dc_state *old_state, 159 + struct dc_state *new_state, 160 + struct pipe_ctx *old_pipe, 161 + struct pipe_ctx *new_pipe); 162 + void dcn20_enable_plane( 163 + struct dc *dc, 164 + struct pipe_ctx *pipe_ctx, 165 + struct dc_state *context); 166 + void dcn20_update_dchubp_dpp( 167 + struct dc *dc, 168 + struct pipe_ctx *pipe_ctx, 169 + struct dc_state *context); 170 + void dcn20_post_unlock_reset_opp( 171 + struct dc *dc, 172 + struct pipe_ctx *opp_head); 158 173 #endif /* __DC_HWSS_DCN20_H__ */ 159 174
+6 -6
drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
··· 1399 1399 1400 1400 link_hwss->disable_link_output(link, link_res, signal); 1401 1401 link->phy_state.symclk_state = SYMCLK_OFF_TX_OFF; 1402 - 1403 - if (signal == SIGNAL_TYPE_EDP && 1404 - link->dc->hwss.edp_power_control && 1405 - !link->skip_implict_edp_power_control) 1406 - link->dc->hwss.edp_power_control(link, false); 1407 - else if (dmcu != NULL && dmcu->funcs->unlock_phy) 1402 + /* 1403 + * Add the logic to extract BOTH power up and power down sequences 1404 + * from enable/disable link output and only call edp panel control 1405 + * in enable_link_dp and disable_link_dp once. 1406 + */ 1407 + if (dmcu != NULL && dmcu->funcs->unlock_phy) 1408 1408 dmcu->funcs->unlock_phy(dmcu); 1409 1409 1410 1410 dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_DISABLE_LINK_PHY);
+40 -1
drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
··· 1032 1032 if (pipe_ctx->plane_res.dpp || pipe_ctx->stream_res.opp) 1033 1033 update_state->pg_pipe_res_update[PG_MPCC][pipe_ctx->plane_res.mpcc_inst] = false; 1034 1034 1035 - if (pipe_ctx->stream_res.dsc) 1035 + if (pipe_ctx->stream_res.dsc) { 1036 1036 update_state->pg_pipe_res_update[PG_DSC][pipe_ctx->stream_res.dsc->inst] = false; 1037 + if (dc->caps.sequential_ono) { 1038 + update_state->pg_pipe_res_update[PG_HUBP][pipe_ctx->stream_res.dsc->inst] = false; 1039 + update_state->pg_pipe_res_update[PG_DPP][pipe_ctx->stream_res.dsc->inst] = false; 1040 + } 1041 + } 1037 1042 1038 1043 if (pipe_ctx->stream_res.opp) 1039 1044 update_state->pg_pipe_res_update[PG_OPP][pipe_ctx->stream_res.opp->inst] = false; ··· 1595 1590 return true; 1596 1591 1597 1592 return false; 1593 + } 1594 + 1595 + /* 1596 + * Set powerup to true for every pipe to match pre-OS configuration. 1597 + */ 1598 + static void dcn35_calc_blocks_to_ungate_for_hw_release(struct dc *dc, struct pg_block_update *update_state) 1599 + { 1600 + int i = 0, j = 0; 1601 + 1602 + memset(update_state, 0, sizeof(struct pg_block_update)); 1603 + 1604 + for (i = 0; i < dc->res_pool->pipe_count; i++) 1605 + for (j = 0; j < PG_HW_PIPE_RESOURCES_NUM_ELEMENT; j++) 1606 + update_state->pg_pipe_res_update[j][i] = true; 1607 + 1608 + update_state->pg_res_update[PG_HPO] = true; 1609 + update_state->pg_res_update[PG_DWB] = true; 1610 + } 1611 + 1612 + /* 1613 + * The purpose is to power up all gatings to restore optimization to pre-OS env. 1614 + * Re-use hwss func and existing PG&RCG flags to decide powerup sequence. 1615 + */ 1616 + void dcn35_hardware_release(struct dc *dc) 1617 + { 1618 + struct pg_block_update pg_update_state; 1619 + 1620 + dcn35_calc_blocks_to_ungate_for_hw_release(dc, &pg_update_state); 1621 + 1622 + if (dc->hwss.root_clock_control) 1623 + dc->hwss.root_clock_control(dc, &pg_update_state, true); 1624 + /*power up required HW block*/ 1625 + if (dc->hwss.hw_block_power_up) 1626 + dc->hwss.hw_block_power_up(dc, &pg_update_state); 1598 1627 }
+2
drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
··· 99 99 100 100 bool dcn35_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx); 101 101 102 + void dcn35_hardware_release(struct dc *dc); 103 + 102 104 #endif /* __DC_HWSS_DCN35_H__ */
+5
drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
··· 122 122 .root_clock_control = dcn35_root_clock_control, 123 123 .set_long_vtotal = dcn35_set_long_vblank, 124 124 .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, 125 + .hardware_release = dcn35_hardware_release, 126 + .detect_pipe_changes = dcn20_detect_pipe_changes, 127 + .enable_plane = dcn20_enable_plane, 128 + .update_dchubp_dpp = dcn20_update_dchubp_dpp, 129 + .post_unlock_reset_opp = dcn20_post_unlock_reset_opp, 125 130 }; 126 131 127 132 static const struct hwseq_private_funcs dcn35_private_funcs = {
+737 -94
drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
··· 3 3 // Copyright 2024 Advanced Micro Devices, Inc. 4 4 5 5 #include "dm_services.h" 6 + #include "basics/dc_common.h" 6 7 #include "dm_helpers.h" 7 8 #include "core_types.h" 8 9 #include "resource.h" ··· 125 124 } 126 125 127 126 mpc->funcs->set_gamut_remap(mpc, mpcc_id, &mpc_adjust); 128 - } 129 - 130 - struct ips_ono_region_state dcn401_read_ono_state(struct dc *dc, uint8_t region) 131 - { 132 - struct dce_hwseq *hws = dc->hwseq; 133 - struct ips_ono_region_state state = {0, 0}; 134 - 135 - switch (region) { 136 - case 0: 137 - /* dccg, dio, dcio */ 138 - REG_GET_2(DOMAIN22_PG_STATUS, 139 - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, 140 - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); 141 - break; 142 - case 1: 143 - /* dchubbub, dchvm, dchubbubmem */ 144 - REG_GET_2(DOMAIN23_PG_STATUS, 145 - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, 146 - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); 147 - break; 148 - case 2: 149 - /* mpc, opp, optc, dwb */ 150 - REG_GET_2(DOMAIN24_PG_STATUS, 151 - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, 152 - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); 153 - break; 154 - case 3: 155 - /* hpo */ 156 - REG_GET_2(DOMAIN25_PG_STATUS, 157 - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, 158 - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); 159 - break; 160 - case 4: 161 - /* dchubp0, dpp0 */ 162 - REG_GET_2(DOMAIN0_PG_STATUS, 163 - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, 164 - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); 165 - break; 166 - case 5: 167 - /* dsc0 */ 168 - REG_GET_2(DOMAIN16_PG_STATUS, 169 - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, 170 - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); 171 - break; 172 - case 6: 173 - /* dchubp1, dpp1 */ 174 - REG_GET_2(DOMAIN1_PG_STATUS, 175 - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, 176 - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); 177 - break; 178 - case 7: 179 - /* dsc1 */ 180 - REG_GET_2(DOMAIN17_PG_STATUS, 181 - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, 182 - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); 183 - break; 184 - case 8: 185 - /* dchubp2, dpp2 */ 186 - REG_GET_2(DOMAIN2_PG_STATUS, 187 - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, 188 - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); 189 - break; 190 - case 9: 191 - /* dsc2 */ 192 - REG_GET_2(DOMAIN18_PG_STATUS, 193 - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, 194 - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); 195 - break; 196 - case 10: 197 - /* dchubp3, dpp3 */ 198 - REG_GET_2(DOMAIN3_PG_STATUS, 199 - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, 200 - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); 201 - break; 202 - case 11: 203 - /* dsc3 */ 204 - REG_GET_2(DOMAIN19_PG_STATUS, 205 - DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, 206 - DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); 207 - break; 208 - default: 209 - break; 210 - } 211 - 212 - return state; 213 127 } 214 128 215 129 void dcn401_init_hw(struct dc *dc) ··· 797 881 patched_crtc_timing.h_addressable = patched_crtc_timing.h_addressable + pipe_ctx->hblank_borrow; 798 882 799 883 pipe_ctx->stream_res.tg->funcs->program_timing( 800 - pipe_ctx->stream_res.tg, 801 - &patched_crtc_timing, 802 - pipe_ctx->pipe_dlg_param.vready_offset, 803 - pipe_ctx->pipe_dlg_param.vstartup_start, 804 - pipe_ctx->pipe_dlg_param.vupdate_offset, 805 - pipe_ctx->pipe_dlg_param.vupdate_width, 806 - pipe_ctx->pipe_dlg_param.pstate_keepout, 807 - pipe_ctx->stream->signal, 808 - true); 884 + pipe_ctx->stream_res.tg, 885 + &patched_crtc_timing, 886 + (unsigned int)pipe_ctx->global_sync.dcn4x.vready_offset_pixels, 887 + (unsigned int)pipe_ctx->global_sync.dcn4x.vstartup_lines, 888 + (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_offset_pixels, 889 + (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_vupdate_width_pixels, 890 + (unsigned int)pipe_ctx->global_sync.dcn4x.pstate_keepout_start_lines, 891 + pipe_ctx->stream->signal, 892 + true); 809 893 810 894 for (i = 0; i < opp_cnt; i++) { 811 895 opp_heads[i]->stream_res.opp->funcs->opp_pipe_clock_control( ··· 1927 2011 if (old_clk) 1928 2012 old_clk->funcs->cs_power_down(old_clk); 1929 2013 } 2014 + } 2015 + } 2016 + 2017 + static unsigned int dcn401_calculate_vready_offset_for_group(struct pipe_ctx *pipe) 2018 + { 2019 + struct pipe_ctx *other_pipe; 2020 + unsigned int vready_offset = pipe->global_sync.dcn4x.vready_offset_pixels; 2021 + 2022 + /* Always use the largest vready_offset of all connected pipes */ 2023 + for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) { 2024 + if (other_pipe->global_sync.dcn4x.vready_offset_pixels > vready_offset) 2025 + vready_offset = other_pipe->global_sync.dcn4x.vready_offset_pixels; 2026 + } 2027 + for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) { 2028 + if (other_pipe->global_sync.dcn4x.vready_offset_pixels > vready_offset) 2029 + vready_offset = other_pipe->global_sync.dcn4x.vready_offset_pixels; 2030 + } 2031 + for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) { 2032 + if (other_pipe->global_sync.dcn4x.vready_offset_pixels > vready_offset) 2033 + vready_offset = other_pipe->global_sync.dcn4x.vready_offset_pixels; 2034 + } 2035 + for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) { 2036 + if (other_pipe->global_sync.dcn4x.vready_offset_pixels > vready_offset) 2037 + vready_offset = other_pipe->global_sync.dcn4x.vready_offset_pixels; 2038 + } 2039 + 2040 + return vready_offset; 2041 + } 2042 + 2043 + static void dcn401_program_tg( 2044 + struct dc *dc, 2045 + struct pipe_ctx *pipe_ctx, 2046 + struct dc_state *context, 2047 + struct dce_hwseq *hws) 2048 + { 2049 + pipe_ctx->stream_res.tg->funcs->program_global_sync( 2050 + pipe_ctx->stream_res.tg, 2051 + dcn401_calculate_vready_offset_for_group(pipe_ctx), 2052 + (unsigned int)pipe_ctx->global_sync.dcn4x.vstartup_lines, 2053 + (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_offset_pixels, 2054 + (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_vupdate_width_pixels, 2055 + (unsigned int)pipe_ctx->global_sync.dcn4x.pstate_keepout_start_lines); 2056 + 2057 + if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) 2058 + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); 2059 + 2060 + pipe_ctx->stream_res.tg->funcs->set_vtg_params( 2061 + pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true); 2062 + 2063 + if (hws->funcs.setup_vupdate_interrupt) 2064 + hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx); 2065 + } 2066 + 2067 + static void dcn401_program_pipe( 2068 + struct dc *dc, 2069 + struct pipe_ctx *pipe_ctx, 2070 + struct dc_state *context) 2071 + { 2072 + struct dce_hwseq *hws = dc->hwseq; 2073 + 2074 + /* Only need to unblank on top pipe */ 2075 + if (resource_is_pipe_type(pipe_ctx, OTG_MASTER)) { 2076 + if (pipe_ctx->update_flags.bits.enable || 2077 + pipe_ctx->update_flags.bits.odm || 2078 + pipe_ctx->stream->update_flags.bits.abm_level) 2079 + hws->funcs.blank_pixel_data(dc, pipe_ctx, 2080 + !pipe_ctx->plane_state || 2081 + !pipe_ctx->plane_state->visible); 2082 + } 2083 + 2084 + /* Only update TG on top pipe */ 2085 + if (pipe_ctx->update_flags.bits.global_sync && !pipe_ctx->top_pipe 2086 + && !pipe_ctx->prev_odm_pipe) 2087 + dcn401_program_tg(dc, pipe_ctx, context, hws); 2088 + 2089 + if (pipe_ctx->update_flags.bits.odm) 2090 + hws->funcs.update_odm(dc, context, pipe_ctx); 2091 + 2092 + if (pipe_ctx->update_flags.bits.enable) { 2093 + if (hws->funcs.enable_plane) 2094 + hws->funcs.enable_plane(dc, pipe_ctx, context); 2095 + else 2096 + dc->hwss.enable_plane(dc, pipe_ctx, context); 2097 + 2098 + if (dc->res_pool->hubbub->funcs->force_wm_propagate_to_pipes) 2099 + dc->res_pool->hubbub->funcs->force_wm_propagate_to_pipes(dc->res_pool->hubbub); 2100 + } 2101 + 2102 + if (pipe_ctx->update_flags.bits.det_size) { 2103 + if (dc->res_pool->hubbub->funcs->program_det_size) 2104 + dc->res_pool->hubbub->funcs->program_det_size( 2105 + dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->det_buffer_size_kb); 2106 + if (dc->res_pool->hubbub->funcs->program_det_segments) 2107 + dc->res_pool->hubbub->funcs->program_det_segments( 2108 + dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->hubp_regs.det_size); 2109 + } 2110 + 2111 + if (pipe_ctx->update_flags.raw || 2112 + (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.raw) || 2113 + pipe_ctx->stream->update_flags.raw) 2114 + dc->hwss.update_dchubp_dpp(dc, pipe_ctx, context); 2115 + 2116 + if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable || 2117 + pipe_ctx->plane_state->update_flags.bits.hdr_mult)) 2118 + hws->funcs.set_hdr_multiplier(pipe_ctx); 2119 + 2120 + if (hws->funcs.populate_mcm_luts) { 2121 + if (pipe_ctx->plane_state) { 2122 + hws->funcs.populate_mcm_luts(dc, pipe_ctx, pipe_ctx->plane_state->mcm_luts, 2123 + pipe_ctx->plane_state->lut_bank_a); 2124 + pipe_ctx->plane_state->lut_bank_a = !pipe_ctx->plane_state->lut_bank_a; 2125 + } 2126 + } 2127 + 2128 + if (pipe_ctx->plane_state && 2129 + (pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || 2130 + pipe_ctx->plane_state->update_flags.bits.gamma_change || 2131 + pipe_ctx->plane_state->update_flags.bits.lut_3d || 2132 + pipe_ctx->update_flags.bits.enable)) 2133 + hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state); 2134 + 2135 + /* dcn10_translate_regamma_to_hw_format takes 750us to finish 2136 + * only do gamma programming for powering on, internal memcmp to avoid 2137 + * updating on slave planes 2138 + */ 2139 + if (pipe_ctx->update_flags.bits.enable || 2140 + pipe_ctx->update_flags.bits.plane_changed || 2141 + pipe_ctx->stream->update_flags.bits.out_tf || 2142 + (pipe_ctx->plane_state && 2143 + pipe_ctx->plane_state->update_flags.bits.output_tf_change)) 2144 + hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream); 2145 + 2146 + /* If the pipe has been enabled or has a different opp, we 2147 + * should reprogram the fmt. This deals with cases where 2148 + * interation between mpc and odm combine on different streams 2149 + * causes a different pipe to be chosen to odm combine with. 2150 + */ 2151 + if (pipe_ctx->update_flags.bits.enable 2152 + || pipe_ctx->update_flags.bits.opp_changed) { 2153 + 2154 + pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion( 2155 + pipe_ctx->stream_res.opp, 2156 + COLOR_SPACE_YCBCR601, 2157 + pipe_ctx->stream->timing.display_color_depth, 2158 + pipe_ctx->stream->signal); 2159 + 2160 + pipe_ctx->stream_res.opp->funcs->opp_program_fmt( 2161 + pipe_ctx->stream_res.opp, 2162 + &pipe_ctx->stream->bit_depth_params, 2163 + &pipe_ctx->stream->clamping); 2164 + } 2165 + 2166 + /* Set ABM pipe after other pipe configurations done */ 2167 + if ((pipe_ctx->plane_state && pipe_ctx->plane_state->visible)) { 2168 + if (pipe_ctx->stream_res.abm) { 2169 + dc->hwss.set_pipe(pipe_ctx); 2170 + pipe_ctx->stream_res.abm->funcs->set_abm_level(pipe_ctx->stream_res.abm, 2171 + pipe_ctx->stream->abm_level); 2172 + } 2173 + } 2174 + 2175 + if (pipe_ctx->update_flags.bits.test_pattern_changed) { 2176 + struct output_pixel_processor *odm_opp = pipe_ctx->stream_res.opp; 2177 + struct bit_depth_reduction_params params; 2178 + 2179 + memset(&params, 0, sizeof(params)); 2180 + odm_opp->funcs->opp_program_bit_depth_reduction(odm_opp, &params); 2181 + dc->hwss.set_disp_pattern_generator(dc, 2182 + pipe_ctx, 2183 + pipe_ctx->stream_res.test_pattern_params.test_pattern, 2184 + pipe_ctx->stream_res.test_pattern_params.color_space, 2185 + pipe_ctx->stream_res.test_pattern_params.color_depth, 2186 + NULL, 2187 + pipe_ctx->stream_res.test_pattern_params.width, 2188 + pipe_ctx->stream_res.test_pattern_params.height, 2189 + pipe_ctx->stream_res.test_pattern_params.offset); 2190 + } 2191 + } 2192 + 2193 + void dcn401_program_front_end_for_ctx( 2194 + struct dc *dc, 2195 + struct dc_state *context) 2196 + { 2197 + int i; 2198 + unsigned int prev_hubp_count = 0; 2199 + unsigned int hubp_count = 0; 2200 + struct dce_hwseq *hws = dc->hwseq; 2201 + struct pipe_ctx *pipe = NULL; 2202 + 2203 + DC_LOGGER_INIT(dc->ctx->logger); 2204 + 2205 + if (resource_is_pipe_topology_changed(dc->current_state, context)) 2206 + resource_log_pipe_topology_update(dc, context); 2207 + 2208 + if (dc->hwss.program_triplebuffer != NULL && dc->debug.enable_tri_buf) { 2209 + for (i = 0; i < dc->res_pool->pipe_count; i++) { 2210 + pipe = &context->res_ctx.pipe_ctx[i]; 2211 + 2212 + if (!pipe->top_pipe && !pipe->prev_odm_pipe && pipe->plane_state) { 2213 + if (pipe->plane_state->triplebuffer_flips) 2214 + BREAK_TO_DEBUGGER(); 2215 + 2216 + /*turn off triple buffer for full update*/ 2217 + dc->hwss.program_triplebuffer( 2218 + dc, pipe, pipe->plane_state->triplebuffer_flips); 2219 + } 2220 + } 2221 + } 2222 + 2223 + for (i = 0; i < dc->res_pool->pipe_count; i++) { 2224 + if (dc->current_state->res_ctx.pipe_ctx[i].plane_state) 2225 + prev_hubp_count++; 2226 + if (context->res_ctx.pipe_ctx[i].plane_state) 2227 + hubp_count++; 2228 + } 2229 + 2230 + if (prev_hubp_count == 0 && hubp_count > 0) { 2231 + if (dc->res_pool->hubbub->funcs->force_pstate_change_control) 2232 + dc->res_pool->hubbub->funcs->force_pstate_change_control( 2233 + dc->res_pool->hubbub, true, false); 2234 + udelay(500); 2235 + } 2236 + 2237 + /* Set pipe update flags and lock pipes */ 2238 + for (i = 0; i < dc->res_pool->pipe_count; i++) 2239 + dc->hwss.detect_pipe_changes(dc->current_state, context, &dc->current_state->res_ctx.pipe_ctx[i], 2240 + &context->res_ctx.pipe_ctx[i]); 2241 + 2242 + /* When disabling phantom pipes, turn on phantom OTG first (so we can get double 2243 + * buffer updates properly) 2244 + */ 2245 + for (i = 0; i < dc->res_pool->pipe_count; i++) { 2246 + struct dc_stream_state *stream = dc->current_state->res_ctx.pipe_ctx[i].stream; 2247 + 2248 + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; 2249 + 2250 + if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable && stream && 2251 + dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_PHANTOM) { 2252 + struct timing_generator *tg = dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg; 2253 + 2254 + if (tg->funcs->enable_crtc) { 2255 + if (dc->hwseq->funcs.blank_pixel_data) 2256 + dc->hwseq->funcs.blank_pixel_data(dc, pipe, true); 2257 + 2258 + tg->funcs->enable_crtc(tg); 2259 + } 2260 + } 2261 + } 2262 + /* OTG blank before disabling all front ends */ 2263 + for (i = 0; i < dc->res_pool->pipe_count; i++) 2264 + if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable 2265 + && !context->res_ctx.pipe_ctx[i].top_pipe 2266 + && !context->res_ctx.pipe_ctx[i].prev_odm_pipe 2267 + && context->res_ctx.pipe_ctx[i].stream) 2268 + hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true); 2269 + 2270 + 2271 + /* Disconnect mpcc */ 2272 + for (i = 0; i < dc->res_pool->pipe_count; i++) 2273 + if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable 2274 + || context->res_ctx.pipe_ctx[i].update_flags.bits.opp_changed) { 2275 + struct hubbub *hubbub = dc->res_pool->hubbub; 2276 + 2277 + /* Phantom pipe DET should be 0, but if a pipe in use is being transitioned to phantom 2278 + * then we want to do the programming here (effectively it's being disabled). If we do 2279 + * the programming later the DET won't be updated until the OTG for the phantom pipe is 2280 + * turned on (i.e. in an MCLK switch) which can come in too late and cause issues with 2281 + * DET allocation. 2282 + */ 2283 + if ((context->res_ctx.pipe_ctx[i].update_flags.bits.disable || 2284 + (context->res_ctx.pipe_ctx[i].plane_state && 2285 + dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == 2286 + SUBVP_PHANTOM))) { 2287 + if (hubbub->funcs->program_det_size) 2288 + hubbub->funcs->program_det_size(hubbub, 2289 + dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0); 2290 + if (dc->res_pool->hubbub->funcs->program_det_segments) 2291 + dc->res_pool->hubbub->funcs->program_det_segments( 2292 + hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0); 2293 + } 2294 + hws->funcs.plane_atomic_disconnect(dc, dc->current_state, 2295 + &dc->current_state->res_ctx.pipe_ctx[i]); 2296 + DC_LOG_DC("Reset mpcc for pipe %d\n", dc->current_state->res_ctx.pipe_ctx[i].pipe_idx); 2297 + } 2298 + 2299 + /* update ODM for blanked OTG master pipes */ 2300 + for (i = 0; i < dc->res_pool->pipe_count; i++) { 2301 + pipe = &context->res_ctx.pipe_ctx[i]; 2302 + if (resource_is_pipe_type(pipe, OTG_MASTER) && 2303 + !resource_is_pipe_type(pipe, DPP_PIPE) && 2304 + pipe->update_flags.bits.odm && 2305 + hws->funcs.update_odm) 2306 + hws->funcs.update_odm(dc, context, pipe); 2307 + } 2308 + 2309 + /* 2310 + * Program all updated pipes, order matters for mpcc setup. Start with 2311 + * top pipe and program all pipes that follow in order 2312 + */ 2313 + for (i = 0; i < dc->res_pool->pipe_count; i++) { 2314 + pipe = &context->res_ctx.pipe_ctx[i]; 2315 + 2316 + if (pipe->plane_state && !pipe->top_pipe) { 2317 + while (pipe) { 2318 + if (hws->funcs.program_pipe) 2319 + hws->funcs.program_pipe(dc, pipe, context); 2320 + else { 2321 + /* Don't program phantom pipes in the regular front end programming sequence. 2322 + * There is an MPO transition case where a pipe being used by a video plane is 2323 + * transitioned directly to be a phantom pipe when closing the MPO video. 2324 + * However the phantom pipe will program a new HUBP_VTG_SEL (update takes place 2325 + * right away) but the MPO still exists until the double buffered update of the 2326 + * main pipe so we will get a frame of underflow if the phantom pipe is 2327 + * programmed here. 2328 + */ 2329 + if (pipe->stream && 2330 + dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) 2331 + dcn401_program_pipe(dc, pipe, context); 2332 + } 2333 + 2334 + pipe = pipe->bottom_pipe; 2335 + } 2336 + } 2337 + 2338 + /* Program secondary blending tree and writeback pipes */ 2339 + pipe = &context->res_ctx.pipe_ctx[i]; 2340 + if (!pipe->top_pipe && !pipe->prev_odm_pipe 2341 + && pipe->stream && pipe->stream->num_wb_info > 0 2342 + && (pipe->update_flags.raw || (pipe->plane_state && pipe->plane_state->update_flags.raw) 2343 + || pipe->stream->update_flags.raw) 2344 + && hws->funcs.program_all_writeback_pipes_in_tree) 2345 + hws->funcs.program_all_writeback_pipes_in_tree(dc, pipe->stream, context); 2346 + 2347 + /* Avoid underflow by check of pipe line read when adding 2nd plane. */ 2348 + if (hws->wa.wait_hubpret_read_start_during_mpo_transition && 2349 + !pipe->top_pipe && 2350 + pipe->stream && 2351 + pipe->plane_res.hubp->funcs->hubp_wait_pipe_read_start && 2352 + dc->current_state->stream_status[0].plane_count == 1 && 2353 + context->stream_status[0].plane_count > 1) { 2354 + pipe->plane_res.hubp->funcs->hubp_wait_pipe_read_start(pipe->plane_res.hubp); 2355 + } 2356 + } 2357 + } 2358 + 2359 + void dcn401_post_unlock_program_front_end( 2360 + struct dc *dc, 2361 + struct dc_state *context) 2362 + { 2363 + // Timeout for pipe enable 2364 + unsigned int timeout_us = 100000; 2365 + unsigned int polling_interval_us = 1; 2366 + struct dce_hwseq *hwseq = dc->hwseq; 2367 + int i; 2368 + 2369 + DC_LOGGER_INIT(dc->ctx->logger); 2370 + 2371 + for (i = 0; i < dc->res_pool->pipe_count; i++) 2372 + if (resource_is_pipe_type(&dc->current_state->res_ctx.pipe_ctx[i], OPP_HEAD) && 2373 + !resource_is_pipe_type(&context->res_ctx.pipe_ctx[i], OPP_HEAD)) 2374 + dc->hwss.post_unlock_reset_opp(dc, 2375 + &dc->current_state->res_ctx.pipe_ctx[i]); 2376 + 2377 + for (i = 0; i < dc->res_pool->pipe_count; i++) 2378 + if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable) 2379 + dc->hwss.disable_plane(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]); 2380 + 2381 + /* 2382 + * If we are enabling a pipe, we need to wait for pending clear as this is a critical 2383 + * part of the enable operation otherwise, DM may request an immediate flip which 2384 + * will cause HW to perform an "immediate enable" (as opposed to "vsync enable") which 2385 + * is unsupported on DCN. 2386 + */ 2387 + for (i = 0; i < dc->res_pool->pipe_count; i++) { 2388 + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; 2389 + // Don't check flip pending on phantom pipes 2390 + if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable && 2391 + dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) { 2392 + struct hubp *hubp = pipe->plane_res.hubp; 2393 + int j = 0; 2394 + 2395 + for (j = 0; j < timeout_us / polling_interval_us 2396 + && hubp->funcs->hubp_is_flip_pending(hubp); j++) 2397 + udelay(polling_interval_us); 2398 + } 2399 + } 2400 + 2401 + for (i = 0; i < dc->res_pool->pipe_count; i++) { 2402 + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; 2403 + struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; 2404 + 2405 + /* When going from a smaller ODM slice count to larger, we must ensure double 2406 + * buffer update completes before we return to ensure we don't reduce DISPCLK 2407 + * before we've transitioned to 2:1 or 4:1 2408 + */ 2409 + if (resource_is_pipe_type(old_pipe, OTG_MASTER) && resource_is_pipe_type(pipe, OTG_MASTER) && 2410 + resource_get_odm_slice_count(old_pipe) < resource_get_odm_slice_count(pipe) && 2411 + dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) { 2412 + int j = 0; 2413 + struct timing_generator *tg = pipe->stream_res.tg; 2414 + 2415 + if (tg->funcs->get_optc_double_buffer_pending) { 2416 + for (j = 0; j < timeout_us / polling_interval_us 2417 + && tg->funcs->get_optc_double_buffer_pending(tg); j++) 2418 + udelay(polling_interval_us); 2419 + } 2420 + } 2421 + } 2422 + 2423 + if (dc->res_pool->hubbub->funcs->force_pstate_change_control) 2424 + dc->res_pool->hubbub->funcs->force_pstate_change_control( 2425 + dc->res_pool->hubbub, false, false); 2426 + 2427 + 2428 + for (i = 0; i < dc->res_pool->pipe_count; i++) { 2429 + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; 2430 + 2431 + if (pipe->plane_state && !pipe->top_pipe) { 2432 + /* Program phantom pipe here to prevent a frame of underflow in the MPO transition 2433 + * case (if a pipe being used for a video plane transitions to a phantom pipe, it 2434 + * can underflow due to HUBP_VTG_SEL programming if done in the regular front end 2435 + * programming sequence). 2436 + */ 2437 + while (pipe) { 2438 + if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { 2439 + /* When turning on the phantom pipe we want to run through the 2440 + * entire enable sequence, so apply all the "enable" flags. 2441 + */ 2442 + if (dc->hwss.apply_update_flags_for_phantom) 2443 + dc->hwss.apply_update_flags_for_phantom(pipe); 2444 + if (dc->hwss.update_phantom_vp_position) 2445 + dc->hwss.update_phantom_vp_position(dc, context, pipe); 2446 + dcn401_program_pipe(dc, pipe, context); 2447 + } 2448 + pipe = pipe->bottom_pipe; 2449 + } 2450 + } 2451 + } 2452 + 2453 + if (!hwseq) 2454 + return; 2455 + 2456 + /* P-State support transitions: 2457 + * Natural -> FPO: P-State disabled in prepare, force disallow anytime is safe 2458 + * FPO -> Natural: Unforce anytime after FW disable is safe (P-State will assert naturally) 2459 + * Unsupported -> FPO: P-State enabled in optimize, force disallow anytime is safe 2460 + * FPO -> Unsupported: P-State disabled in prepare, unforce disallow anytime is safe 2461 + * FPO <-> SubVP: Force disallow is maintained on the FPO / SubVP pipes 2462 + */ 2463 + if (hwseq->funcs.update_force_pstate) 2464 + dc->hwseq->funcs.update_force_pstate(dc, context); 2465 + 2466 + /* Only program the MALL registers after all the main and phantom pipes 2467 + * are done programming. 2468 + */ 2469 + if (hwseq->funcs.program_mall_pipe_config) 2470 + hwseq->funcs.program_mall_pipe_config(dc, context); 2471 + 2472 + /* WA to apply WM setting*/ 2473 + if (hwseq->wa.DEGVIDCN21) 2474 + dc->res_pool->hubbub->funcs->apply_DEDCN21_147_wa(dc->res_pool->hubbub); 2475 + 2476 + 2477 + /* WA for stutter underflow during MPO transitions when adding 2nd plane */ 2478 + if (hwseq->wa.disallow_self_refresh_during_multi_plane_transition) { 2479 + 2480 + if (dc->current_state->stream_status[0].plane_count == 1 && 2481 + context->stream_status[0].plane_count > 1) { 2482 + 2483 + struct timing_generator *tg = dc->res_pool->timing_generators[0]; 2484 + 2485 + dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub, false); 2486 + 2487 + hwseq->wa_state.disallow_self_refresh_during_multi_plane_transition_applied = true; 2488 + hwseq->wa_state.disallow_self_refresh_during_multi_plane_transition_applied_on_frame = 2489 + tg->funcs->get_frame_count(tg); 2490 + } 2491 + } 2492 + } 2493 + 2494 + bool dcn401_update_bandwidth( 2495 + struct dc *dc, 2496 + struct dc_state *context) 2497 + { 2498 + int i; 2499 + struct dce_hwseq *hws = dc->hwseq; 2500 + 2501 + /* recalculate DML parameters */ 2502 + if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) 2503 + return false; 2504 + 2505 + /* apply updated bandwidth parameters */ 2506 + dc->hwss.prepare_bandwidth(dc, context); 2507 + 2508 + /* update hubp configs for all pipes */ 2509 + for (i = 0; i < dc->res_pool->pipe_count; i++) { 2510 + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; 2511 + 2512 + if (pipe_ctx->plane_state == NULL) 2513 + continue; 2514 + 2515 + if (pipe_ctx->top_pipe == NULL) { 2516 + bool blank = !is_pipe_tree_visible(pipe_ctx); 2517 + 2518 + pipe_ctx->stream_res.tg->funcs->program_global_sync( 2519 + pipe_ctx->stream_res.tg, 2520 + dcn401_calculate_vready_offset_for_group(pipe_ctx), 2521 + (unsigned int)pipe_ctx->global_sync.dcn4x.vstartup_lines, 2522 + (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_offset_pixels, 2523 + (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_vupdate_width_pixels, 2524 + (unsigned int)pipe_ctx->global_sync.dcn4x.pstate_keepout_start_lines); 2525 + 2526 + pipe_ctx->stream_res.tg->funcs->set_vtg_params( 2527 + pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, false); 2528 + 2529 + if (pipe_ctx->prev_odm_pipe == NULL) 2530 + hws->funcs.blank_pixel_data(dc, pipe_ctx, blank); 2531 + 2532 + if (hws->funcs.setup_vupdate_interrupt) 2533 + hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx); 2534 + } 2535 + 2536 + if (pipe_ctx->plane_res.hubp->funcs->hubp_setup2) 2537 + pipe_ctx->plane_res.hubp->funcs->hubp_setup2( 2538 + pipe_ctx->plane_res.hubp, 2539 + &pipe_ctx->hubp_regs, 2540 + &pipe_ctx->global_sync, 2541 + &pipe_ctx->stream->timing); 2542 + } 2543 + 2544 + return true; 2545 + } 2546 + 2547 + void dcn401_detect_pipe_changes(struct dc_state *old_state, 2548 + struct dc_state *new_state, 2549 + struct pipe_ctx *old_pipe, 2550 + struct pipe_ctx *new_pipe) 2551 + { 2552 + bool old_is_phantom = dc_state_get_pipe_subvp_type(old_state, old_pipe) == SUBVP_PHANTOM; 2553 + bool new_is_phantom = dc_state_get_pipe_subvp_type(new_state, new_pipe) == SUBVP_PHANTOM; 2554 + 2555 + unsigned int old_pipe_vready_offset_pixels = old_pipe->global_sync.dcn4x.vready_offset_pixels; 2556 + unsigned int new_pipe_vready_offset_pixels = new_pipe->global_sync.dcn4x.vready_offset_pixels; 2557 + unsigned int old_pipe_vstartup_lines = old_pipe->global_sync.dcn4x.vstartup_lines; 2558 + unsigned int new_pipe_vstartup_lines = new_pipe->global_sync.dcn4x.vstartup_lines; 2559 + unsigned int old_pipe_vupdate_offset_pixels = old_pipe->global_sync.dcn4x.vupdate_offset_pixels; 2560 + unsigned int new_pipe_vupdate_offset_pixels = new_pipe->global_sync.dcn4x.vupdate_offset_pixels; 2561 + unsigned int old_pipe_vupdate_width_pixels = old_pipe->global_sync.dcn4x.vupdate_vupdate_width_pixels; 2562 + unsigned int new_pipe_vupdate_width_pixels = new_pipe->global_sync.dcn4x.vupdate_vupdate_width_pixels; 2563 + 2564 + new_pipe->update_flags.raw = 0; 2565 + 2566 + /* If non-phantom pipe is being transitioned to a phantom pipe, 2567 + * set disable and return immediately. This is because the pipe 2568 + * that was previously in use must be fully disabled before we 2569 + * can "enable" it as a phantom pipe (since the OTG will certainly 2570 + * be different). The post_unlock sequence will set the correct 2571 + * update flags to enable the phantom pipe. 2572 + */ 2573 + if (old_pipe->plane_state && !old_is_phantom && 2574 + new_pipe->plane_state && new_is_phantom) { 2575 + new_pipe->update_flags.bits.disable = 1; 2576 + return; 2577 + } 2578 + 2579 + if (resource_is_pipe_type(new_pipe, OTG_MASTER) && 2580 + resource_is_odm_topology_changed(new_pipe, old_pipe)) 2581 + /* Detect odm changes */ 2582 + new_pipe->update_flags.bits.odm = 1; 2583 + 2584 + /* Exit on unchanged, unused pipe */ 2585 + if (!old_pipe->plane_state && !new_pipe->plane_state) 2586 + return; 2587 + /* Detect pipe enable/disable */ 2588 + if (!old_pipe->plane_state && new_pipe->plane_state) { 2589 + new_pipe->update_flags.bits.enable = 1; 2590 + new_pipe->update_flags.bits.mpcc = 1; 2591 + new_pipe->update_flags.bits.dppclk = 1; 2592 + new_pipe->update_flags.bits.hubp_interdependent = 1; 2593 + new_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1; 2594 + new_pipe->update_flags.bits.unbounded_req = 1; 2595 + new_pipe->update_flags.bits.gamut_remap = 1; 2596 + new_pipe->update_flags.bits.scaler = 1; 2597 + new_pipe->update_flags.bits.viewport = 1; 2598 + new_pipe->update_flags.bits.det_size = 1; 2599 + if (new_pipe->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE && 2600 + new_pipe->stream_res.test_pattern_params.width != 0 && 2601 + new_pipe->stream_res.test_pattern_params.height != 0) 2602 + new_pipe->update_flags.bits.test_pattern_changed = 1; 2603 + if (!new_pipe->top_pipe && !new_pipe->prev_odm_pipe) { 2604 + new_pipe->update_flags.bits.odm = 1; 2605 + new_pipe->update_flags.bits.global_sync = 1; 2606 + } 2607 + return; 2608 + } 2609 + 2610 + /* For SubVP we need to unconditionally enable because any phantom pipes are 2611 + * always removed then newly added for every full updates whenever SubVP is in use. 2612 + * The remove-add sequence of the phantom pipe always results in the pipe 2613 + * being blanked in enable_stream_timing (DPG). 2614 + */ 2615 + if (new_pipe->stream && dc_state_get_pipe_subvp_type(new_state, new_pipe) == SUBVP_PHANTOM) 2616 + new_pipe->update_flags.bits.enable = 1; 2617 + 2618 + /* Phantom pipes are effectively disabled, if the pipe was previously phantom 2619 + * we have to enable 2620 + */ 2621 + if (old_pipe->plane_state && old_is_phantom && 2622 + new_pipe->plane_state && !new_is_phantom) 2623 + new_pipe->update_flags.bits.enable = 1; 2624 + 2625 + if (old_pipe->plane_state && !new_pipe->plane_state) { 2626 + new_pipe->update_flags.bits.disable = 1; 2627 + return; 2628 + } 2629 + 2630 + /* Detect plane change */ 2631 + if (old_pipe->plane_state != new_pipe->plane_state) 2632 + new_pipe->update_flags.bits.plane_changed = true; 2633 + 2634 + /* Detect top pipe only changes */ 2635 + if (resource_is_pipe_type(new_pipe, OTG_MASTER)) { 2636 + /* Detect global sync changes */ 2637 + if ((old_pipe_vready_offset_pixels != new_pipe_vready_offset_pixels) 2638 + || (old_pipe_vstartup_lines != new_pipe_vstartup_lines) 2639 + || (old_pipe_vupdate_offset_pixels != new_pipe_vupdate_offset_pixels) 2640 + || (old_pipe_vupdate_width_pixels != new_pipe_vupdate_width_pixels)) 2641 + new_pipe->update_flags.bits.global_sync = 1; 2642 + } 2643 + 2644 + if (old_pipe->det_buffer_size_kb != new_pipe->det_buffer_size_kb) 2645 + new_pipe->update_flags.bits.det_size = 1; 2646 + 2647 + /* 2648 + * Detect opp / tg change, only set on change, not on enable 2649 + * Assume mpcc inst = pipe index, if not this code needs to be updated 2650 + * since mpcc is what is affected by these. In fact all of our sequence 2651 + * makes this assumption at the moment with how hubp reset is matched to 2652 + * same index mpcc reset. 2653 + */ 2654 + if (old_pipe->stream_res.opp != new_pipe->stream_res.opp) 2655 + new_pipe->update_flags.bits.opp_changed = 1; 2656 + if (old_pipe->stream_res.tg != new_pipe->stream_res.tg) 2657 + new_pipe->update_flags.bits.tg_changed = 1; 2658 + 2659 + /* 2660 + * Detect mpcc blending changes, only dpp inst and opp matter here, 2661 + * mpccs getting removed/inserted update connected ones during their own 2662 + * programming 2663 + */ 2664 + if (old_pipe->plane_res.dpp != new_pipe->plane_res.dpp 2665 + || old_pipe->stream_res.opp != new_pipe->stream_res.opp) 2666 + new_pipe->update_flags.bits.mpcc = 1; 2667 + 2668 + /* Detect dppclk change */ 2669 + if (old_pipe->plane_res.bw.dppclk_khz != new_pipe->plane_res.bw.dppclk_khz) 2670 + new_pipe->update_flags.bits.dppclk = 1; 2671 + 2672 + /* Check for scl update */ 2673 + if (memcmp(&old_pipe->plane_res.scl_data, &new_pipe->plane_res.scl_data, sizeof(struct scaler_data))) 2674 + new_pipe->update_flags.bits.scaler = 1; 2675 + /* Check for vp update */ 2676 + if (memcmp(&old_pipe->plane_res.scl_data.viewport, &new_pipe->plane_res.scl_data.viewport, sizeof(struct rect)) 2677 + || memcmp(&old_pipe->plane_res.scl_data.viewport_c, 2678 + &new_pipe->plane_res.scl_data.viewport_c, sizeof(struct rect))) 2679 + new_pipe->update_flags.bits.viewport = 1; 2680 + 2681 + /* Detect dlg/ttu/rq updates */ 2682 + { 2683 + struct dml2_display_dlg_regs old_dlg_regs = old_pipe->hubp_regs.dlg_regs; 2684 + struct dml2_display_ttu_regs old_ttu_regs = old_pipe->hubp_regs.ttu_regs; 2685 + struct dml2_display_rq_regs old_rq_regs = old_pipe->hubp_regs.rq_regs; 2686 + struct dml2_display_dlg_regs *new_dlg_regs = &new_pipe->hubp_regs.dlg_regs; 2687 + struct dml2_display_ttu_regs *new_ttu_regs = &new_pipe->hubp_regs.ttu_regs; 2688 + struct dml2_display_rq_regs *new_rq_regs = &new_pipe->hubp_regs.rq_regs; 2689 + 2690 + /* Detect pipe interdependent updates */ 2691 + if ((old_dlg_regs.dst_y_prefetch != new_dlg_regs->dst_y_prefetch) 2692 + || (old_dlg_regs.vratio_prefetch != new_dlg_regs->vratio_prefetch) 2693 + || (old_dlg_regs.vratio_prefetch_c != new_dlg_regs->vratio_prefetch_c) 2694 + || (old_dlg_regs.dst_y_per_vm_vblank != new_dlg_regs->dst_y_per_vm_vblank) 2695 + || (old_dlg_regs.dst_y_per_row_vblank != new_dlg_regs->dst_y_per_row_vblank) 2696 + || (old_dlg_regs.dst_y_per_vm_flip != new_dlg_regs->dst_y_per_vm_flip) 2697 + || (old_dlg_regs.dst_y_per_row_flip != new_dlg_regs->dst_y_per_row_flip) 2698 + || (old_dlg_regs.refcyc_per_meta_chunk_vblank_l != new_dlg_regs->refcyc_per_meta_chunk_vblank_l) 2699 + || (old_dlg_regs.refcyc_per_meta_chunk_vblank_c != new_dlg_regs->refcyc_per_meta_chunk_vblank_c) 2700 + || (old_dlg_regs.refcyc_per_meta_chunk_flip_l != new_dlg_regs->refcyc_per_meta_chunk_flip_l) 2701 + || (old_dlg_regs.refcyc_per_line_delivery_pre_l != new_dlg_regs->refcyc_per_line_delivery_pre_l) 2702 + || (old_dlg_regs.refcyc_per_line_delivery_pre_c != new_dlg_regs->refcyc_per_line_delivery_pre_c) 2703 + || (old_ttu_regs.refcyc_per_req_delivery_pre_l != new_ttu_regs->refcyc_per_req_delivery_pre_l) 2704 + || (old_ttu_regs.refcyc_per_req_delivery_pre_c != new_ttu_regs->refcyc_per_req_delivery_pre_c) 2705 + || (old_ttu_regs.refcyc_per_req_delivery_pre_cur0 != 2706 + new_ttu_regs->refcyc_per_req_delivery_pre_cur0) 2707 + || (old_ttu_regs.min_ttu_vblank != new_ttu_regs->min_ttu_vblank) 2708 + || (old_ttu_regs.qos_level_flip != new_ttu_regs->qos_level_flip)) { 2709 + old_dlg_regs.dst_y_prefetch = new_dlg_regs->dst_y_prefetch; 2710 + old_dlg_regs.vratio_prefetch = new_dlg_regs->vratio_prefetch; 2711 + old_dlg_regs.vratio_prefetch_c = new_dlg_regs->vratio_prefetch_c; 2712 + old_dlg_regs.dst_y_per_vm_vblank = new_dlg_regs->dst_y_per_vm_vblank; 2713 + old_dlg_regs.dst_y_per_row_vblank = new_dlg_regs->dst_y_per_row_vblank; 2714 + old_dlg_regs.dst_y_per_vm_flip = new_dlg_regs->dst_y_per_vm_flip; 2715 + old_dlg_regs.dst_y_per_row_flip = new_dlg_regs->dst_y_per_row_flip; 2716 + old_dlg_regs.refcyc_per_meta_chunk_vblank_l = new_dlg_regs->refcyc_per_meta_chunk_vblank_l; 2717 + old_dlg_regs.refcyc_per_meta_chunk_vblank_c = new_dlg_regs->refcyc_per_meta_chunk_vblank_c; 2718 + old_dlg_regs.refcyc_per_meta_chunk_flip_l = new_dlg_regs->refcyc_per_meta_chunk_flip_l; 2719 + old_dlg_regs.refcyc_per_line_delivery_pre_l = new_dlg_regs->refcyc_per_line_delivery_pre_l; 2720 + old_dlg_regs.refcyc_per_line_delivery_pre_c = new_dlg_regs->refcyc_per_line_delivery_pre_c; 2721 + old_ttu_regs.refcyc_per_req_delivery_pre_l = new_ttu_regs->refcyc_per_req_delivery_pre_l; 2722 + old_ttu_regs.refcyc_per_req_delivery_pre_c = new_ttu_regs->refcyc_per_req_delivery_pre_c; 2723 + old_ttu_regs.refcyc_per_req_delivery_pre_cur0 = new_ttu_regs->refcyc_per_req_delivery_pre_cur0; 2724 + old_ttu_regs.min_ttu_vblank = new_ttu_regs->min_ttu_vblank; 2725 + old_ttu_regs.qos_level_flip = new_ttu_regs->qos_level_flip; 2726 + new_pipe->update_flags.bits.hubp_interdependent = 1; 2727 + } 2728 + /* Detect any other updates to ttu/rq/dlg */ 2729 + if (memcmp(&old_dlg_regs, new_dlg_regs, sizeof(old_dlg_regs)) || 2730 + memcmp(&old_ttu_regs, new_ttu_regs, sizeof(old_ttu_regs)) || 2731 + memcmp(&old_rq_regs, new_rq_regs, sizeof(old_rq_regs))) 2732 + new_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1; 2733 + } 2734 + 2735 + if (old_pipe->unbounded_req != new_pipe->unbounded_req) 2736 + new_pipe->update_flags.bits.unbounded_req = 1; 2737 + 2738 + if (memcmp(&old_pipe->stream_res.test_pattern_params, 2739 + &new_pipe->stream_res.test_pattern_params, sizeof(struct test_pattern_params))) { 2740 + new_pipe->update_flags.bits.test_pattern_changed = 1; 1930 2741 } 1931 2742 }
+8 -3
drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h
··· 63 63 64 64 bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable); 65 65 66 - struct ips_ono_region_state dcn401_read_ono_state(struct dc *dc, 67 - uint8_t region); 68 66 void dcn401_wait_for_dcc_meta_propagation(const struct dc *dc, 69 67 const struct pipe_ctx *top_pipe_to_program); 70 68 ··· 94 96 struct dc *dc, 95 97 struct dc_state *context); 96 98 void dcn401_perform_3dlut_wa_unlock(struct pipe_ctx *pipe_ctx); 97 - 99 + void dcn401_program_front_end_for_ctx(struct dc *dc, struct dc_state *context); 100 + void dcn401_post_unlock_program_front_end(struct dc *dc, struct dc_state *context); 101 + bool dcn401_update_bandwidth(struct dc *dc, struct dc_state *context); 102 + void dcn401_detect_pipe_changes( 103 + struct dc_state *old_state, 104 + struct dc_state *new_state, 105 + struct pipe_ctx *old_pipe, 106 + struct pipe_ctx *new_pipe); 98 107 #endif /* __DC_HWSS_DCN401_H__ */
+7 -3
drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c
··· 17 17 .init_hw = dcn401_init_hw, 18 18 .apply_ctx_to_hw = dce110_apply_ctx_to_hw, 19 19 .apply_ctx_for_surface = NULL, 20 - .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, 20 + .program_front_end_for_ctx = dcn401_program_front_end_for_ctx, 21 21 .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, 22 - .post_unlock_program_front_end = dcn20_post_unlock_program_front_end, 22 + .post_unlock_program_front_end = dcn401_post_unlock_program_front_end, 23 23 .update_plane_addr = dcn20_update_plane_addr, 24 24 .update_dchub = dcn10_update_dchub, 25 25 .update_pending_status = dcn10_update_pending_status, ··· 42 42 .cursor_lock = dcn10_cursor_lock, 43 43 .prepare_bandwidth = dcn401_prepare_bandwidth, 44 44 .optimize_bandwidth = dcn401_optimize_bandwidth, 45 - .update_bandwidth = dcn20_update_bandwidth, 45 + .update_bandwidth = dcn401_update_bandwidth, 46 46 .set_drr = dcn10_set_drr, 47 47 .get_position = dcn10_get_position, 48 48 .set_static_screen_control = dcn31_set_static_screen_control, ··· 99 99 .fams2_global_control_lock_fast = dcn401_fams2_global_control_lock_fast, 100 100 .program_outstanding_updates = dcn401_program_outstanding_updates, 101 101 .wait_for_all_pending_updates = dcn30_wait_for_all_pending_updates, 102 + .detect_pipe_changes = dcn401_detect_pipe_changes, 103 + .enable_plane = dcn20_enable_plane, 104 + .update_dchubp_dpp = dcn20_update_dchubp_dpp, 105 + .post_unlock_reset_opp = dcn20_post_unlock_reset_opp, 102 106 }; 103 107 104 108 static const struct hwseq_private_funcs dcn401_private_funcs = {
+15 -3
drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
··· 194 194 DMUB_SUBVP_SAVE_SURF_ADDR, 195 195 HUBP_WAIT_FOR_DCC_META_PROP, 196 196 DMUB_FAMS2_GLOBAL_CONTROL_LOCK_FAST, 197 - 198 197 }; 199 198 200 199 struct block_sequence { ··· 457 458 struct dc_state *context); 458 459 void (*setup_hpo_hw_control)(const struct dce_hwseq *hws, bool enable); 459 460 void (*wait_for_all_pending_updates)(const struct pipe_ctx *pipe_ctx); 461 + void (*detect_pipe_changes)(struct dc_state *old_state, 462 + struct dc_state *new_state, 463 + struct pipe_ctx *old_pipe, 464 + struct pipe_ctx *new_pipe); 465 + void (*enable_plane)(struct dc *dc, 466 + struct pipe_ctx *pipe_ctx, 467 + struct dc_state *context); 468 + void (*update_dchubp_dpp)(struct dc *dc, 469 + struct pipe_ctx *pipe_ctx, 470 + struct dc_state *context); 471 + void (*post_unlock_reset_opp)(struct dc *dc, 472 + struct pipe_ctx *opp_head); 460 473 }; 461 474 462 475 void color_space_to_black_color( ··· 496 485 void get_mpctree_visual_confirm_color( 497 486 struct pipe_ctx *pipe_ctx, 498 487 struct tg_color *color); 499 - 488 + void get_vabc_visual_confirm_color( 489 + struct pipe_ctx *pipe_ctx, 490 + struct tg_color *color); 500 491 void get_subvp_visual_confirm_color( 501 492 struct pipe_ctx *pipe_ctx, 502 493 struct tg_color *color); 503 - 504 494 void get_fams2_visual_confirm_color( 505 495 struct dc *dc, 506 496 struct dc_state *context,
+2
drivers/gpu/drm/amd/display/dc/inc/core_types.h
··· 217 217 */ 218 218 int (*get_power_profile)(const struct dc_state *context); 219 219 unsigned int (*get_det_buffer_size)(const struct dc_state *context); 220 + unsigned int (*get_vstartup_for_pipe)(struct pipe_ctx *pipe_ctx); 220 221 }; 221 222 222 223 struct audio_support{ ··· 466 465 unsigned int surface_size_in_mall_bytes; 467 466 struct dml2_dchub_per_pipe_register_set hubp_regs; 468 467 struct dml2_hubp_pipe_mcache_regs mcache_regs; 468 + union dml2_global_sync_programming global_sync; 469 469 470 470 struct dwbc *dwbc; 471 471 struct mcif_wb *mcif_wb;
+3
drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
··· 306 306 */ 307 307 void (*set_hard_min_memclk)(struct clk_mgr *clk_mgr, bool current_mode); 308 308 309 + int (*get_hard_min_memclk)(struct clk_mgr *clk_mgr); 310 + int (*get_hard_min_fclk)(struct clk_mgr *clk_mgr); 311 + 309 312 /* Send message to PMFW to set hard max memclk frequency to highest DPM */ 310 313 void (*set_hard_max_memclk)(struct clk_mgr *clk_mgr); 311 314
+13 -2
drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
··· 42 42 #include "cursor_reg_cache.h" 43 43 44 44 #include "dml2/dml21/inc/dml_top_dchub_registers.h" 45 + #include "dml2/dml21/inc/dml_top_types.h" 45 46 46 47 #define OPP_ID_INVALID 0xf 47 48 #define MAX_TTU 0xffffff ··· 145 144 struct _vcs_dpi_display_rq_regs_st *rq_regs, 146 145 struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest); 147 146 147 + void (*hubp_setup2)( 148 + struct hubp *hubp, 149 + struct dml2_dchub_per_pipe_register_set *pipe_regs, 150 + union dml2_global_sync_programming *pipe_global_sync, 151 + struct dc_crtc_timing *timing); 152 + 148 153 void (*hubp_setup_interdependent)( 149 154 struct hubp *hubp, 150 155 struct _vcs_dpi_display_dlg_regs_st *dlg_regs, 151 156 struct _vcs_dpi_display_ttu_regs_st *ttu_regs); 157 + 158 + void (*hubp_setup_interdependent2)( 159 + struct hubp *hubp, 160 + struct dml2_dchub_per_pipe_register_set *pipe_regs); 152 161 153 162 void (*dcc_control)(struct hubp *hubp, bool enable, 154 163 enum hubp_ind_block_size blk_size); ··· 176 165 void (*hubp_program_pte_vm)( 177 166 struct hubp *hubp, 178 167 enum surface_pixel_format format, 179 - union dc_tiling_info *tiling_info, 168 + struct dc_tiling_info *tiling_info, 180 169 enum dc_rotation_angle rotation); 181 170 182 171 void (*hubp_set_vm_system_aperture_settings)( ··· 190 179 void (*hubp_program_surface_config)( 191 180 struct hubp *hubp, 192 181 enum surface_pixel_format format, 193 - union dc_tiling_info *tiling_info, 182 + struct dc_tiling_info *tiling_info, 194 183 struct plane_size *plane_size, 195 184 enum dc_rotation_angle rotation, 196 185 struct dc_plane_dcc_param *dcc,
+2 -2
drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h
··· 150 150 void (*mem_input_program_pte_vm)( 151 151 struct mem_input *mem_input, 152 152 enum surface_pixel_format format, 153 - union dc_tiling_info *tiling_info, 153 + struct dc_tiling_info *tiling_info, 154 154 enum dc_rotation_angle rotation); 155 155 156 156 void (*mem_input_set_vm_system_aperture_settings)( ··· 164 164 void (*mem_input_program_surface_config)( 165 165 struct mem_input *mem_input, 166 166 enum surface_pixel_format format, 167 - union dc_tiling_info *tiling_info, 167 + struct dc_tiling_info *tiling_info, 168 168 struct plane_size *plane_size, 169 169 enum dc_rotation_angle rotation, 170 170 struct dc_plane_dcc_param *dcc,
+1 -1
drivers/gpu/drm/amd/display/dc/inc/hw/optc.h
··· 210 210 211 211 bool optc1_configure_crc(struct timing_generator *optc, const struct crc_params *params); 212 212 213 - bool optc1_get_crc(struct timing_generator *optc, 213 + bool optc1_get_crc(struct timing_generator *optc, uint8_t idx, 214 214 uint32_t *r_cr, 215 215 uint32_t *g_y, 216 216 uint32_t *b_cb);
+4 -1
drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
··· 141 141 142 142 bool continuous_mode; 143 143 bool enable; 144 + 145 + uint8_t crc_eng_inst; 146 + bool reset; 144 147 }; 145 148 146 149 /** ··· 294 291 * @get_crc: Get CRCs for the given timing generator. Return false if 295 292 * CRCs are not enabled (via configure_crc). 296 293 */ 297 - bool (*get_crc)(struct timing_generator *tg, 294 + bool (*get_crc)(struct timing_generator *tg, uint8_t idx, 298 295 uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb); 299 296 300 297 void (*program_manual_trigger)(struct timing_generator *optc);
+4
drivers/gpu/drm/amd/display/dc/inc/link.h
··· 148 148 const struct dc_stream_state *stream, 149 149 const unsigned int num_streams); 150 150 151 + uint32_t (*dp_required_hblank_size_bytes)( 152 + const struct dc_link *link, 153 + struct dp_audio_bandwidth_params *audio_params); 154 + 151 155 152 156 /*************************** DPMS *************************************/ 153 157 void (*set_dpms_on)(struct dc_state *state, struct pipe_ctx *pipe_ctx);
+81 -31
drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c
··· 302 302 /* Enable stereo - only when we need to pack 3D frame. Other types 303 303 * of stereo handled in explicit call 304 304 */ 305 - 306 305 if (optc->funcs->is_two_pixels_per_container(&patched_crtc_timing) || optc1->opp_count == 2) 307 306 h_div = H_TIMING_DIV_BY2; 308 307 ··· 1470 1471 if (!optc1_is_tg_enabled(optc)) 1471 1472 return false; 1472 1473 1473 - REG_WRITE(OTG_CRC_CNTL, 0); 1474 + if (!params->enable || params->reset) 1475 + REG_WRITE(OTG_CRC_CNTL, 0); 1474 1476 1475 1477 if (!params->enable) 1476 1478 return true; 1477 1479 1478 1480 /* Program frame boundaries */ 1479 - /* Window A x axis start and end. */ 1480 - REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL, 1481 - OTG_CRC0_WINDOWA_X_START, params->windowa_x_start, 1482 - OTG_CRC0_WINDOWA_X_END, params->windowa_x_end); 1481 + switch (params->crc_eng_inst) { 1482 + case 0: 1483 + /* Window A x axis start and end. */ 1484 + REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL, 1485 + OTG_CRC0_WINDOWA_X_START, params->windowa_x_start, 1486 + OTG_CRC0_WINDOWA_X_END, params->windowa_x_end); 1483 1487 1484 - /* Window A y axis start and end. */ 1485 - REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL, 1486 - OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start, 1487 - OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end); 1488 + /* Window A y axis start and end. */ 1489 + REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL, 1490 + OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start, 1491 + OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end); 1488 1492 1489 - /* Window B x axis start and end. */ 1490 - REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL, 1491 - OTG_CRC0_WINDOWB_X_START, params->windowb_x_start, 1492 - OTG_CRC0_WINDOWB_X_END, params->windowb_x_end); 1493 + /* Window B x axis start and end. */ 1494 + REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL, 1495 + OTG_CRC0_WINDOWB_X_START, params->windowb_x_start, 1496 + OTG_CRC0_WINDOWB_X_END, params->windowb_x_end); 1493 1497 1494 - /* Window B y axis start and end. */ 1495 - REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL, 1496 - OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start, 1497 - OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end); 1498 + /* Window B y axis start and end. */ 1499 + REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL, 1500 + OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start, 1501 + OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end); 1498 1502 1499 - /* Set crc mode and selection, and enable. Only using CRC0*/ 1500 - REG_UPDATE_3(OTG_CRC_CNTL, 1501 - OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, 1502 - OTG_CRC0_SELECT, params->selection, 1503 - OTG_CRC_EN, 1); 1503 + /* Set crc mode and selection, and enable.*/ 1504 + REG_UPDATE_3(OTG_CRC_CNTL, 1505 + OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, 1506 + OTG_CRC0_SELECT, params->selection, 1507 + OTG_CRC_EN, 1); 1508 + break; 1509 + case 1: 1510 + /* Window A x axis start and end. */ 1511 + REG_UPDATE_2(OTG_CRC1_WINDOWA_X_CONTROL, 1512 + OTG_CRC1_WINDOWA_X_START, params->windowa_x_start, 1513 + OTG_CRC1_WINDOWA_X_END, params->windowa_x_end); 1514 + 1515 + /* Window A y axis start and end. */ 1516 + REG_UPDATE_2(OTG_CRC1_WINDOWA_Y_CONTROL, 1517 + OTG_CRC1_WINDOWA_Y_START, params->windowa_y_start, 1518 + OTG_CRC1_WINDOWA_Y_END, params->windowa_y_end); 1519 + 1520 + /* Window B x axis start and end. */ 1521 + REG_UPDATE_2(OTG_CRC1_WINDOWB_X_CONTROL, 1522 + OTG_CRC1_WINDOWB_X_START, params->windowb_x_start, 1523 + OTG_CRC1_WINDOWB_X_END, params->windowb_x_end); 1524 + 1525 + /* Window B y axis start and end. */ 1526 + REG_UPDATE_2(OTG_CRC1_WINDOWB_Y_CONTROL, 1527 + OTG_CRC1_WINDOWB_Y_START, params->windowb_y_start, 1528 + OTG_CRC1_WINDOWB_Y_END, params->windowb_y_end); 1529 + 1530 + /* Set crc mode and selection, and enable.*/ 1531 + REG_UPDATE_3(OTG_CRC_CNTL, 1532 + OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, 1533 + OTG_CRC1_SELECT, params->selection, 1534 + OTG_CRC_EN, 1); 1535 + break; 1536 + default: 1537 + return false; 1538 + } 1504 1539 1505 1540 return true; 1506 1541 } ··· 1543 1510 * optc1_get_crc - Capture CRC result per component 1544 1511 * 1545 1512 * @optc: timing_generator instance. 1513 + * @idx: index of crc engine to get CRC from 1546 1514 * @r_cr: 16-bit primary CRC signature for red data. 1547 1515 * @g_y: 16-bit primary CRC signature for green data. 1548 1516 * @b_cb: 16-bit primary CRC signature for blue data. ··· 1555 1521 * If CRC is disabled, return false; otherwise, return true, and the CRC 1556 1522 * results in the parameters. 1557 1523 */ 1558 - bool optc1_get_crc(struct timing_generator *optc, 1524 + bool optc1_get_crc(struct timing_generator *optc, uint8_t idx, 1559 1525 uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb) 1560 1526 { 1561 1527 uint32_t field = 0; ··· 1567 1533 if (!field) 1568 1534 return false; 1569 1535 1570 - /* OTG_CRC0_DATA_RG has the CRC16 results for the red and green component */ 1571 - REG_GET_2(OTG_CRC0_DATA_RG, 1572 - CRC0_R_CR, r_cr, 1573 - CRC0_G_Y, g_y); 1536 + switch (idx) { 1537 + case 0: 1538 + /* OTG_CRC0_DATA_RG has the CRC16 results for the red and green component */ 1539 + REG_GET_2(OTG_CRC0_DATA_RG, 1540 + CRC0_R_CR, r_cr, 1541 + CRC0_G_Y, g_y); 1574 1542 1575 - /* OTG_CRC0_DATA_B has the CRC16 results for the blue component */ 1576 - REG_GET(OTG_CRC0_DATA_B, 1577 - CRC0_B_CB, b_cb); 1543 + /* OTG_CRC0_DATA_B has the CRC16 results for the blue component */ 1544 + REG_GET(OTG_CRC0_DATA_B, 1545 + CRC0_B_CB, b_cb); 1546 + break; 1547 + case 1: 1548 + /* OTG_CRC1_DATA_RG has the CRC16 results for the red and green component */ 1549 + REG_GET_2(OTG_CRC1_DATA_RG, 1550 + CRC1_R_CR, r_cr, 1551 + CRC1_G_Y, g_y); 1552 + 1553 + /* OTG_CRC1_DATA_B has the CRC16 results for the blue component */ 1554 + REG_GET(OTG_CRC1_DATA_B, 1555 + CRC1_B_CB, b_cb); 1556 + break; 1557 + default: 1558 + return false; 1559 + } 1578 1560 1579 1561 return true; 1580 1562 }
+19
drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h
··· 86 86 SRI(OTG_CRC0_WINDOWA_Y_CONTROL, OTG, inst),\ 87 87 SRI(OTG_CRC0_WINDOWB_X_CONTROL, OTG, inst),\ 88 88 SRI(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst),\ 89 + SRI(OTG_CRC1_DATA_RG, OTG, inst),\ 90 + SRI(OTG_CRC1_DATA_B, OTG, inst),\ 91 + SRI(OTG_CRC1_WINDOWA_X_CONTROL, OTG, inst),\ 92 + SRI(OTG_CRC1_WINDOWA_Y_CONTROL, OTG, inst),\ 93 + SRI(OTG_CRC1_WINDOWB_X_CONTROL, OTG, inst),\ 94 + SRI(OTG_CRC1_WINDOWB_Y_CONTROL, OTG, inst),\ 89 95 SR(GSL_SOURCE_SELECT),\ 90 96 SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\ 91 97 SRI(OTG_TRIGA_MANUAL_TRIG, OTG, inst) ··· 321 315 SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_CHECK_ALL_FIELDS, mask_sh),\ 322 316 SF(OTG0_OTG_CRC_CNTL, OTG_CRC_CONT_EN, mask_sh),\ 323 317 SF(OTG0_OTG_CRC_CNTL, OTG_CRC0_SELECT, mask_sh),\ 318 + SF(OTG0_OTG_CRC_CNTL, OTG_CRC1_SELECT, mask_sh),\ 324 319 SF(OTG0_OTG_CRC_CNTL, OTG_CRC_EN, mask_sh),\ 325 320 SF(OTG0_OTG_CRC0_DATA_RG, CRC0_R_CR, mask_sh),\ 326 321 SF(OTG0_OTG_CRC0_DATA_RG, CRC0_G_Y, mask_sh),\ ··· 334 327 SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_END, mask_sh),\ 335 328 SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_START, mask_sh),\ 336 329 SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_END, mask_sh),\ 330 + SF(OTG0_OTG_CRC1_DATA_RG, CRC1_R_CR, mask_sh),\ 331 + SF(OTG0_OTG_CRC1_DATA_RG, CRC1_G_Y, mask_sh),\ 332 + SF(OTG0_OTG_CRC1_DATA_B, CRC1_B_CB, mask_sh),\ 333 + SF(OTG0_OTG_CRC1_WINDOWA_X_CONTROL, OTG_CRC1_WINDOWA_X_START, mask_sh),\ 334 + SF(OTG0_OTG_CRC1_WINDOWA_X_CONTROL, OTG_CRC1_WINDOWA_X_END, mask_sh),\ 335 + SF(OTG0_OTG_CRC1_WINDOWA_Y_CONTROL, OTG_CRC1_WINDOWA_Y_START, mask_sh),\ 336 + SF(OTG0_OTG_CRC1_WINDOWA_Y_CONTROL, OTG_CRC1_WINDOWA_Y_END, mask_sh),\ 337 + SF(OTG0_OTG_CRC1_WINDOWB_X_CONTROL, OTG_CRC1_WINDOWB_X_START, mask_sh),\ 338 + SF(OTG0_OTG_CRC1_WINDOWB_X_CONTROL, OTG_CRC1_WINDOWB_X_END, mask_sh),\ 339 + SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL, OTG_CRC1_WINDOWB_Y_START, mask_sh),\ 340 + SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL, OTG_CRC1_WINDOWB_Y_END, mask_sh),\ 337 341 SF(GSL_SOURCE_SELECT, GSL0_READY_SOURCE_SEL, mask_sh),\ 338 342 SF(GSL_SOURCE_SELECT, GSL1_READY_SOURCE_SEL, mask_sh),\ 339 343 SF(GSL_SOURCE_SELECT, GSL2_READY_SOURCE_SEL, mask_sh),\ ··· 500 482 type OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN;\ 501 483 type OTG_CRC_CONT_EN;\ 502 484 type OTG_CRC0_SELECT;\ 485 + type OTG_CRC1_SELECT;\ 503 486 type OTG_CRC_EN;\ 504 487 type CRC0_R_CR;\ 505 488 type CRC0_G_Y;\
+77 -24
drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
··· 183 183 { 184 184 struct optc *optc1 = DCN10TG_FROM_TG(optc); 185 185 186 + /* Cannot configure crc on a CRTC that is disabled */ 186 187 if (!optc1_is_tg_enabled(optc)) 187 188 return false; 188 - REG_WRITE(OTG_CRC_CNTL, 0); 189 + 190 + if (!params->enable || params->reset) 191 + REG_WRITE(OTG_CRC_CNTL, 0); 192 + 189 193 if (!params->enable) 190 194 return true; 191 - REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL, 192 - OTG_CRC0_WINDOWA_X_START, params->windowa_x_start, 193 - OTG_CRC0_WINDOWA_X_END, params->windowa_x_end); 194 - REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL, 195 - OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start, 196 - OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end); 197 - REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL, 198 - OTG_CRC0_WINDOWB_X_START, params->windowb_x_start, 199 - OTG_CRC0_WINDOWB_X_END, params->windowb_x_end); 200 - REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL, 201 - OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start, 202 - OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end); 203 - if (optc1->base.ctx->dc->debug.otg_crc_db && optc1->tg_mask->OTG_CRC_WINDOW_DB_EN != 0) { 204 - REG_UPDATE_4(OTG_CRC_CNTL, 205 - OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, 206 - OTG_CRC0_SELECT, params->selection, 207 - OTG_CRC_EN, 1, 208 - OTG_CRC_WINDOW_DB_EN, 1); 209 - } else 210 - REG_UPDATE_3(OTG_CRC_CNTL, 211 - OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, 212 - OTG_CRC0_SELECT, params->selection, 213 - OTG_CRC_EN, 1); 195 + 196 + /* Program frame boundaries */ 197 + switch (params->crc_eng_inst) { 198 + case 0: 199 + /* Window A x axis start and end. */ 200 + REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL, 201 + OTG_CRC0_WINDOWA_X_START, params->windowa_x_start, 202 + OTG_CRC0_WINDOWA_X_END, params->windowa_x_end); 203 + 204 + /* Window A y axis start and end. */ 205 + REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL, 206 + OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start, 207 + OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end); 208 + 209 + /* Window B x axis start and end. */ 210 + REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL, 211 + OTG_CRC0_WINDOWB_X_START, params->windowb_x_start, 212 + OTG_CRC0_WINDOWB_X_END, params->windowb_x_end); 213 + 214 + /* Window B y axis start and end. */ 215 + REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL, 216 + OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start, 217 + OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end); 218 + 219 + if (optc1->base.ctx->dc->debug.otg_crc_db && optc1->tg_mask->OTG_CRC_WINDOW_DB_EN != 0) 220 + REG_UPDATE_4(OTG_CRC_CNTL, 221 + OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, 222 + OTG_CRC0_SELECT, params->selection, 223 + OTG_CRC_EN, 1, 224 + OTG_CRC_WINDOW_DB_EN, 1); 225 + else 226 + REG_UPDATE_3(OTG_CRC_CNTL, 227 + OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, 228 + OTG_CRC0_SELECT, params->selection, 229 + OTG_CRC_EN, 1); 230 + break; 231 + case 1: 232 + /* Window A x axis start and end. */ 233 + REG_UPDATE_2(OTG_CRC1_WINDOWA_X_CONTROL, 234 + OTG_CRC1_WINDOWA_X_START, params->windowa_x_start, 235 + OTG_CRC1_WINDOWA_X_END, params->windowa_x_end); 236 + 237 + /* Window A y axis start and end. */ 238 + REG_UPDATE_2(OTG_CRC1_WINDOWA_Y_CONTROL, 239 + OTG_CRC1_WINDOWA_Y_START, params->windowa_y_start, 240 + OTG_CRC1_WINDOWA_Y_END, params->windowa_y_end); 241 + 242 + /* Window B x axis start and end. */ 243 + REG_UPDATE_2(OTG_CRC1_WINDOWB_X_CONTROL, 244 + OTG_CRC1_WINDOWB_X_START, params->windowb_x_start, 245 + OTG_CRC1_WINDOWB_X_END, params->windowb_x_end); 246 + 247 + /* Window B y axis start and end. */ 248 + REG_UPDATE_2(OTG_CRC1_WINDOWB_Y_CONTROL, 249 + OTG_CRC1_WINDOWB_Y_START, params->windowb_y_start, 250 + OTG_CRC1_WINDOWB_Y_END, params->windowb_y_end); 251 + 252 + if (optc1->base.ctx->dc->debug.otg_crc_db && optc1->tg_mask->OTG_CRC_WINDOW_DB_EN != 0) 253 + REG_UPDATE_4(OTG_CRC_CNTL, 254 + OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, 255 + OTG_CRC1_SELECT, params->selection, 256 + OTG_CRC_EN, 1, 257 + OTG_CRC_WINDOW_DB_EN, 1); 258 + else 259 + REG_UPDATE_3(OTG_CRC_CNTL, 260 + OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, 261 + OTG_CRC1_SELECT, params->selection, 262 + OTG_CRC_EN, 1); 263 + break; 264 + default: 265 + return false; 266 + } 214 267 return true; 215 268 } 216 269
+7 -1
drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c
··· 1258 1258 return NULL; 1259 1259 } 1260 1260 1261 + unsigned int dcn10_get_vstartup_for_pipe(struct pipe_ctx *pipe_ctx) 1262 + { 1263 + return pipe_ctx->pipe_dlg_param.vstartup_start; 1264 + } 1265 + 1261 1266 static const struct dc_cap_funcs cap_funcs = { 1262 1267 .get_dcc_compression_cap = dcn10_get_dcc_compression_cap 1263 1268 }; ··· 1277 1272 .validate_global = dcn10_validate_global, 1278 1273 .add_stream_to_ctx = dcn10_add_stream_to_ctx, 1279 1274 .patch_unknown_plane_state = dcn10_patch_unknown_plane_state, 1280 - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link 1275 + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, 1276 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 1281 1277 }; 1282 1278 1283 1279 static uint32_t read_pipe_fuses(struct dc_context *ctx)
+1
drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h
··· 51 51 const struct resource_pool *pool, 52 52 struct dc_stream_state *stream); 53 53 54 + unsigned int dcn10_get_vstartup_for_pipe(struct pipe_ctx *pipe_ctx); 54 55 55 56 #endif /* __DC_RESOURCE_DCN10_H__ */ 56 57
+2 -1
drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
··· 2229 2229 .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, 2230 2230 .set_mcif_arb_params = dcn20_set_mcif_arb_params, 2231 2231 .populate_dml_pipes = dcn20_populate_dml_pipes_from_context, 2232 - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link 2232 + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, 2233 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 2233 2234 }; 2234 2235 2235 2236 bool dcn20_dwbc_create(struct dc_context *ctx, struct resource_pool *pool)
+2 -1
drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c
··· 1079 1079 .populate_dml_writeback_from_context = dcn201_populate_dml_writeback_from_context, 1080 1080 .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, 1081 1081 .set_mcif_arb_params = dcn20_set_mcif_arb_params, 1082 - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link 1082 + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, 1083 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 1083 1084 }; 1084 1085 1085 1086 static bool dcn201_resource_construct(
+1
drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c
··· 1378 1378 .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, 1379 1379 .update_bw_bounding_box = dcn21_update_bw_bounding_box, 1380 1380 .get_panel_config_defaults = dcn21_get_panel_config_defaults, 1381 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 1381 1382 }; 1382 1383 1383 1384 static bool dcn21_resource_construct(
+1
drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
··· 2250 2250 .update_bw_bounding_box = dcn30_update_bw_bounding_box, 2251 2251 .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, 2252 2252 .get_panel_config_defaults = dcn30_get_panel_config_defaults, 2253 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 2253 2254 }; 2254 2255 2255 2256 #define CTX ctx
+6 -5
drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
··· 671 671 672 672 /* 6:1 downscaling ratio: 1000/6 = 166.666 */ 673 673 .max_downscale_factor = { 674 - .argb8888 = 167, 675 - .nv12 = 167, 676 - .fp16 = 167 674 + .argb8888 = 358, 675 + .nv12 = 358, 676 + .fp16 = 358 677 677 }, 678 678 64, 679 679 64 ··· 693 693 .disable_dcc = DCC_ENABLE, 694 694 .vsr_support = true, 695 695 .performance_trace = false, 696 - .max_downscale_src_width = 7680,/*upto 8K*/ 696 + .max_downscale_src_width = 4096,/*upto true 4k*/ 697 697 .scl_reset_length10 = true, 698 698 .sanity_checks = false, 699 699 .underflow_assert_delay_us = 0xFFFFFFFF, ··· 1400 1400 .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, 1401 1401 .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, 1402 1402 .update_bw_bounding_box = dcn301_update_bw_bounding_box, 1403 - .patch_unknown_plane_state = dcn20_patch_unknown_plane_state 1403 + .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, 1404 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 1404 1405 }; 1405 1406 1406 1407 static bool dcn301_resource_construct(
+1
drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c
··· 1151 1151 .update_bw_bounding_box = dcn302_update_bw_bounding_box, 1152 1152 .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, 1153 1153 .get_panel_config_defaults = dcn302_get_panel_config_defaults, 1154 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 1154 1155 }; 1155 1156 1156 1157 static struct dc_cap_funcs cap_funcs = {
+1
drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
··· 1096 1096 .update_bw_bounding_box = dcn303_update_bw_bounding_box, 1097 1097 .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, 1098 1098 .get_panel_config_defaults = dcn303_get_panel_config_defaults, 1099 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 1099 1100 }; 1100 1101 1101 1102 static struct dc_cap_funcs cap_funcs = {
+1
drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
··· 1849 1849 .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, 1850 1850 .get_panel_config_defaults = dcn31_get_panel_config_defaults, 1851 1851 .get_det_buffer_size = dcn31_get_det_buffer_size, 1852 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 1852 1853 }; 1853 1854 1854 1855 static struct clock_source *dcn30_clock_source_create(
+1
drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
··· 1778 1778 .get_panel_config_defaults = dcn314_get_panel_config_defaults, 1779 1779 .get_preferred_eng_id_dpia = dcn314_get_preferred_eng_id_dpia, 1780 1780 .get_det_buffer_size = dcn31_get_det_buffer_size, 1781 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 1781 1782 }; 1782 1783 1783 1784 static struct clock_source *dcn30_clock_source_create(
+1
drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
··· 1846 1846 .get_panel_config_defaults = dcn315_get_panel_config_defaults, 1847 1847 .get_power_profile = dcn315_get_power_profile, 1848 1848 .get_det_buffer_size = dcn31_get_det_buffer_size, 1849 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 1849 1850 }; 1850 1851 1851 1852 static bool dcn315_resource_construct(
+1
drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c
··· 1720 1720 .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, 1721 1721 .get_panel_config_defaults = dcn316_get_panel_config_defaults, 1722 1722 .get_det_buffer_size = dcn31_get_det_buffer_size, 1723 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 1723 1724 }; 1724 1725 1725 1726 static bool dcn316_resource_construct(
+1
drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
··· 2066 2066 .add_phantom_pipes = dcn32_add_phantom_pipes, 2067 2067 .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, 2068 2068 .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes, 2069 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 2069 2070 }; 2070 2071 2071 2072 static uint32_t read_pipe_fuses(struct dc_context *ctx)
+1
drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
··· 1624 1624 .add_phantom_pipes = dcn32_add_phantom_pipes, 1625 1625 .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, 1626 1626 .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes, 1627 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 1627 1628 }; 1628 1629 1629 1630 static uint32_t read_pipe_fuses(struct dc_context *ctx)
+9 -1
drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
··· 1752 1752 return out; 1753 1753 } 1754 1754 1755 + enum dc_status dcn35_patch_unknown_plane_state(struct dc_plane_state *plane_state) 1756 + { 1757 + plane_state->tiling_info.gfxversion = DcGfxVersion9; 1758 + dcn20_patch_unknown_plane_state(plane_state); 1759 + return DC_OK; 1760 + } 1761 + 1755 1762 1756 1763 static struct resource_funcs dcn35_res_pool_funcs = { 1757 1764 .destroy = dcn35_destroy_resource_pool, ··· 1782 1775 .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, 1783 1776 .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, 1784 1777 .update_bw_bounding_box = dcn35_update_bw_bounding_box_fpu, 1785 - .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, 1778 + .patch_unknown_plane_state = dcn35_patch_unknown_plane_state, 1786 1779 .get_panel_config_defaults = dcn35_get_panel_config_defaults, 1787 1780 .get_preferred_eng_id_dpia = dcn35_get_preferred_eng_id_dpia, 1788 1781 .get_det_buffer_size = dcn31_get_det_buffer_size, 1782 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 1789 1783 }; 1790 1784 1791 1785 static bool dcn35_resource_construct(
+1
drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h
··· 35 35 36 36 extern struct _vcs_dpi_ip_params_st dcn3_5_ip; 37 37 extern struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc; 38 + enum dc_status dcn35_patch_unknown_plane_state(struct dc_plane_state *plane_state); 38 39 39 40 struct dcn35_resource_pool { 40 41 struct resource_pool base;
+2 -1
drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
··· 1754 1754 .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, 1755 1755 .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, 1756 1756 .update_bw_bounding_box = dcn351_update_bw_bounding_box_fpu, 1757 - .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, 1757 + .patch_unknown_plane_state = dcn35_patch_unknown_plane_state, 1758 1758 .get_panel_config_defaults = dcn35_get_panel_config_defaults, 1759 1759 .get_preferred_eng_id_dpia = dcn351_get_preferred_eng_id_dpia, 1760 1760 .get_det_buffer_size = dcn31_get_det_buffer_size, 1761 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 1761 1762 }; 1762 1763 1763 1764 static bool dcn351_resource_construct(
+36 -21
drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
··· 737 737 .enable_stall_recovery = true, 738 738 } 739 739 }, 740 - .force_cositing = CHROMA_COSITING_TOPLEFT + 1, 740 + .force_cositing = CHROMA_COSITING_NONE + 1, 741 741 }; 742 742 743 743 static struct dce_aux *dcn401_aux_engine_create( ··· 1297 1297 return &hpo_dp_enc31->base; 1298 1298 } 1299 1299 1300 + static unsigned int dcn401_calc_num_avail_chans_for_mall(struct dc *dc, unsigned int num_chans) 1301 + { 1302 + unsigned int num_available_chans = 1; 1303 + 1304 + /* channels for MALL must be a power of 2 */ 1305 + while (num_chans > 1) { 1306 + num_available_chans = (num_available_chans << 1); 1307 + num_chans = (num_chans >> 1); 1308 + } 1309 + 1310 + /* cannot be odd */ 1311 + num_available_chans &= ~1; 1312 + 1313 + /* clamp to max available channels for MALL per ASIC */ 1314 + if (ASICREV_IS_GC_12_0_0_A0(dc->ctx->asic_id.hw_internal_rev)) { 1315 + num_available_chans = num_available_chans > 16 ? 16 : num_available_chans; 1316 + } else if (ASICREV_IS_GC_12_0_1_A0(dc->ctx->asic_id.hw_internal_rev)) { 1317 + num_available_chans = num_available_chans > 8 ? 8 : num_available_chans; 1318 + } 1319 + 1320 + return num_available_chans; 1321 + } 1322 + 1300 1323 static struct dce_hwseq *dcn401_hwseq_create( 1301 1324 struct dc_context *ctx) 1302 1325 { ··· 1615 1592 1616 1593 memcpy(dml2_opt, &dc->dml2_options, sizeof(dc->dml2_options)); 1617 1594 1595 + /* re-calculate the available MALL size if required */ 1596 + if (bw_params->num_channels > 0) { 1597 + dc->caps.max_cab_allocation_bytes = dcn401_calc_num_avail_chans_for_mall( 1598 + dc, bw_params->num_channels) * 1599 + dc->caps.mall_size_per_mem_channel * 1024 * 1024; 1600 + dc->caps.mall_size_total = dc->caps.max_cab_allocation_bytes; 1601 + } 1602 + 1618 1603 DC_FP_START(); 1619 1604 1620 1605 dcn401_update_bw_bounding_box_fpu(dc, bw_params); ··· 1640 1609 1641 1610 enum dc_status dcn401_patch_unknown_plane_state(struct dc_plane_state *plane_state) 1642 1611 { 1612 + plane_state->tiling_info.gfxversion = DcGfxAddr3; 1643 1613 plane_state->tiling_info.gfx_addr3.swizzle = DC_ADDR3_SW_64KB_2D; 1644 1614 return DC_OK; 1645 1615 } ··· 1740 1708 return dpm_level; 1741 1709 } 1742 1710 1743 - static unsigned int dcn401_calc_num_avail_chans_for_mall(struct dc *dc, unsigned int num_chans) 1711 + static unsigned int dcn401_get_vstartup_for_pipe(struct pipe_ctx *pipe_ctx) 1744 1712 { 1745 - unsigned int num_available_chans = 1; 1746 - 1747 - /* channels for MALL must be a power of 2 */ 1748 - while (num_chans > 1) { 1749 - num_available_chans = (num_available_chans << 1); 1750 - num_chans = (num_chans >> 1); 1751 - } 1752 - 1753 - /* cannot be odd */ 1754 - num_available_chans &= ~1; 1755 - 1756 - /* clamp to max available channels for MALL per ASIC */ 1757 - if (ASICREV_IS_GC_12_0_0_A0(dc->ctx->asic_id.hw_internal_rev)) { 1758 - num_available_chans = num_available_chans > 16 ? 16 : num_available_chans; 1759 - } else if (ASICREV_IS_GC_12_0_1_A0(dc->ctx->asic_id.hw_internal_rev)) { 1760 - num_available_chans = num_available_chans > 8 ? 8 : num_available_chans; 1761 - } 1762 - 1763 - return num_available_chans; 1713 + return pipe_ctx->global_sync.dcn4x.vstartup_lines; 1764 1714 } 1765 1715 1766 1716 static struct resource_funcs dcn401_res_pool_funcs = { ··· 1772 1758 .build_pipe_pix_clk_params = dcn401_build_pipe_pix_clk_params, 1773 1759 .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes, 1774 1760 .get_power_profile = dcn401_get_power_profile, 1761 + .get_vstartup_for_pipe = dcn401_get_vstartup_for_pipe 1775 1762 }; 1776 1763 1777 1764 static uint32_t read_pipe_fuses(struct dc_context *ctx)
+50 -51
drivers/gpu/drm/amd/display/dc/spl/dc_spl.c
··· 11 11 #define IDENTITY_RATIO(ratio) (spl_fixpt_u2d19(ratio) == (1 << 19)) 12 12 #define MIN_VIEWPORT_SIZE 12 13 13 14 + static bool spl_is_yuv420(enum spl_pixel_format format) 15 + { 16 + if ((format >= SPL_PIXEL_FORMAT_420BPP8) && 17 + (format <= SPL_PIXEL_FORMAT_420BPP10)) 18 + return true; 19 + 20 + return false; 21 + } 22 + 23 + static bool spl_is_rgb8(enum spl_pixel_format format) 24 + { 25 + if (format == SPL_PIXEL_FORMAT_ARGB8888) 26 + return true; 27 + 28 + return false; 29 + } 30 + 31 + static bool spl_is_video_format(enum spl_pixel_format format) 32 + { 33 + if (format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN 34 + && format <= SPL_PIXEL_FORMAT_VIDEO_END) 35 + return true; 36 + else 37 + return false; 38 + } 39 + 40 + static bool spl_is_subsampled_format(enum spl_pixel_format format) 41 + { 42 + if (format >= SPL_PIXEL_FORMAT_SUBSAMPLED_BEGIN 43 + && format <= SPL_PIXEL_FORMAT_SUBSAMPLED_END) 44 + return true; 45 + else 46 + return false; 47 + } 48 + 14 49 static struct spl_rect intersect_rec(const struct spl_rect *r0, const struct spl_rect *r1) 15 50 { 16 51 struct spl_rect rec; ··· 443 408 spl_scratch->scl_data.ratios.horz_c = spl_scratch->scl_data.ratios.horz; 444 409 spl_scratch->scl_data.ratios.vert_c = spl_scratch->scl_data.ratios.vert; 445 410 446 - if (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8 447 - || spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10) { 411 + if (spl_is_yuv420(spl_in->basic_in.format)) { 448 412 spl_scratch->scl_data.ratios.horz_c.value /= 2; 449 413 spl_scratch->scl_data.ratios.vert_c.value /= 2; 450 414 } ··· 580 546 *vp_offset = src_size - *vp_offset - *vp_size; 581 547 } 582 548 583 - static bool spl_is_yuv420(enum spl_pixel_format format) 584 - { 585 - if ((format >= SPL_PIXEL_FORMAT_420BPP8) && 586 - (format <= SPL_PIXEL_FORMAT_420BPP10)) 587 - return true; 588 - 589 - return false; 590 - } 591 - 592 - static bool spl_is_rgb8(enum spl_pixel_format format) 593 - { 594 - if (format == SPL_PIXEL_FORMAT_ARGB8888) 595 - return true; 596 - 597 - return false; 598 - } 599 - 600 - static bool spl_is_video_format(enum spl_pixel_format format) 601 - { 602 - if (format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN 603 - && format <= SPL_PIXEL_FORMAT_VIDEO_END) 604 - return true; 605 - else 606 - return false; 607 - } 608 - 609 - static bool spl_is_subsampled_format(enum spl_pixel_format format) 610 - { 611 - if (format >= SPL_PIXEL_FORMAT_SUBSAMPLED_BEGIN 612 - && format <= SPL_PIXEL_FORMAT_SUBSAMPLED_END) 613 - return true; 614 - else 615 - return false; 616 - } 617 - 618 549 /*Calculate inits and viewport */ 619 550 static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, 620 551 struct spl_scratch *spl_scratch) ··· 590 591 struct spl_rect recout_clip_in_recout_dst; 591 592 struct spl_rect overlap_in_active_timing; 592 593 struct spl_rect odm_slice = calculate_odm_slice_in_timing_active(spl_in); 593 - int vpc_div = (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8 594 - || spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10) ? 2 : 1; 594 + int vpc_div = spl_is_subsampled_format(spl_in->basic_in.format) ? 2 : 1; 595 595 bool orthogonal_rotation, flip_vert_scan_dir, flip_horz_scan_dir; 596 596 struct spl_fixed31_32 init_adj_h = spl_fixpt_zero; 597 597 struct spl_fixed31_32 init_adj_v = spl_fixpt_zero; ··· 618 620 &flip_vert_scan_dir, 619 621 &flip_horz_scan_dir); 620 622 621 - if (orthogonal_rotation) { 622 - spl_swap(src.width, src.height); 623 - spl_swap(flip_vert_scan_dir, flip_horz_scan_dir); 624 - } 625 - 626 623 if (spl_is_subsampled_format(spl_in->basic_in.format)) { 627 624 /* this gives the direction of the cositing (negative will move 628 625 * left, right otherwise) ··· 626 633 627 634 switch (spl_in->basic_in.cositing) { 628 635 629 - case CHROMA_COSITING_LEFT: 630 - init_adj_h = spl_fixpt_zero; 631 - init_adj_v = spl_fixpt_from_fraction(sign, 4); 632 - break; 633 - case CHROMA_COSITING_NONE: 636 + case CHROMA_COSITING_TOPLEFT: 634 637 init_adj_h = spl_fixpt_from_fraction(sign, 4); 635 638 init_adj_v = spl_fixpt_from_fraction(sign, 4); 636 639 break; 637 - case CHROMA_COSITING_TOPLEFT: 640 + case CHROMA_COSITING_LEFT: 641 + init_adj_h = spl_fixpt_from_fraction(sign, 4); 642 + init_adj_v = spl_fixpt_zero; 643 + break; 644 + case CHROMA_COSITING_NONE: 638 645 default: 639 646 init_adj_h = spl_fixpt_zero; 640 647 init_adj_v = spl_fixpt_zero; 641 648 break; 642 649 } 650 + } 651 + 652 + if (orthogonal_rotation) { 653 + spl_swap(src.width, src.height); 654 + spl_swap(flip_vert_scan_dir, flip_horz_scan_dir); 655 + spl_swap(init_adj_h, init_adj_v); 643 656 } 644 657 645 658 spl_calculate_init_and_vp( ··· 1598 1599 0x0; // fp1.5.10, C3 coefficient 1599 1600 } 1600 1601 1601 - if (spl_is_video_format(format)) { /* TODO: 0 = RGB, 1 = YUV */ 1602 + if (spl_is_subsampled_format(format)) { /* TODO: 0 = RGB, 1 = YUV */ 1602 1603 dscl_prog_data->easf_matrix_mode = 1; 1603 1604 /* 1604 1605 * 2-bit, BF3 chroma mode correction calculation mode
+64 -1
drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
··· 431 431 */ 432 432 uint32_t enable_ips_residency_profiling : 1; 433 433 434 - uint32_t reserved : 20; 434 + /** 435 + * 0x1000 (bit 12) 436 + * @enable_coasting_vtotal_check: Enable Coasting_vtotal_check 437 + */ 438 + uint32_t enable_coasting_vtotal_check : 1; 439 + /** 440 + * 0x2000 (bit 13) 441 + * @enable_visual_confirm_debug: Enable Visual Confirm Debug 442 + */ 443 + uint32_t enable_visual_confirm_debug : 1; 444 + 445 + uint32_t reserved : 18; 446 + } bitfields; 447 + 448 + uint32_t u32All; 449 + }; 450 + 451 + /** 452 + * Flags record error state. 453 + */ 454 + union replay_visual_confirm_error_state_flags { 455 + struct { 456 + /** 457 + * 0x1 (bit 0) - Desync Error flag. 458 + */ 459 + uint32_t desync_error : 1; 460 + 461 + /** 462 + * 0x2 (bit 1) - State Transition Error flag. 463 + */ 464 + uint32_t state_transition_error : 1; 465 + 466 + /** 467 + * 0x4 (bit 2) - Crc Error flag 468 + */ 469 + uint32_t crc_error : 1; 470 + 471 + /** 472 + * 0x8 (bit 3) - Reserved 473 + */ 474 + uint32_t reserved_3 : 1; 475 + 476 + /** 477 + * 0x10 (bit 4) - Incorrect Coasting vtotal checking --> use debug flag to control DPCD write. 478 + * Added new debug flag to control DPCD. 479 + */ 480 + uint32_t incorrect_vtotal_in_static_screen : 1; 481 + 482 + /** 483 + * 0x20 (bit 5) - No doubled Refresh Rate. 484 + */ 485 + uint32_t no_double_rr : 1; 486 + 487 + /** 488 + * Reserved bit 6-7 489 + */ 490 + uint32_t reserved_6_7 : 2; 491 + 492 + /** 493 + * Reserved bit 9-31 494 + */ 495 + uint32_t reserved_9_31 : 24; 435 496 } bitfields; 436 497 437 498 uint32_t u32All; ··· 3705 3644 */ 3706 3645 REPLAY_GENERAL_CMD_DISABLED_ADAPTIVE_SYNC_SDP, 3707 3646 REPLAY_GENERAL_CMD_DISABLED_DESYNC_ERROR_DETECTION, 3647 + REPLAY_GENERAL_CMD_UPDATE_ERROR_STATUS, 3648 + REPLAY_GENERAL_CMD_SET_LOW_RR_ACTIVATE, 3708 3649 }; 3709 3650 3710 3651 /**
+7 -2
drivers/gpu/drm/amd/display/modules/power/power_helpers.c
··· 996 996 link->replay_settings.coasting_vtotal_table[type] = vtotal; 997 997 } 998 998 999 - void set_replay_ips_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal) 999 + void set_replay_low_rr_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal) 1000 1000 { 1001 - link->replay_settings.abm_with_ips_on_full_screen_video_pseudo_vtotal = vtotal; 1001 + link->replay_settings.low_rr_full_screen_video_pseudo_vtotal = vtotal; 1002 1002 } 1003 1003 1004 1004 void calculate_replay_link_off_frame_count(struct dc_link *link, ··· 1038 1038 caps->num_data_points = custom_backlight_profiles[config_no].num_data_points; 1039 1039 memcpy(caps->data_points, custom_backlight_profiles[config_no].data_points, data_points_size); 1040 1040 return true; 1041 + } 1042 + 1043 + void reset_replay_dsync_error_count(struct dc_link *link) 1044 + { 1045 + link->replay_settings.replay_desync_error_fail_count = 0; 1041 1046 }
+2 -1
drivers/gpu/drm/amd/display/modules/power/power_helpers.h
··· 62 62 uint32_t vtotal); 63 63 void update_replay_coasting_vtotal_from_defer(struct dc_link *link, 64 64 enum replay_coasting_vtotal_type type); 65 - void set_replay_ips_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal); 65 + void set_replay_low_rr_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal); 66 66 void calculate_replay_link_off_frame_count(struct dc_link *link, 67 67 uint16_t vtotal, uint16_t htotal); 68 68 ··· 78 78 79 79 bool fill_custom_backlight_caps(unsigned int config_no, 80 80 struct dm_acpi_atif_backlight_caps *caps); 81 + void reset_replay_dsync_error_count(struct dc_link *link); 81 82 #endif /* MODULES_POWER_POWER_HELPERS_H_ */
+5
drivers/gpu/drm/amd/include/amd_shared.h
··· 344 344 * eDP display from ACPI _DDC method. 345 345 */ 346 346 DC_DISABLE_ACPI_EDID = 0x8000, 347 + 348 + /* 349 + * @DC_DISABLE_HDMI_CEC: If set, disable HDMI-CEC feature in amdgpu driver. 350 + */ 351 + DC_DISABLE_HDMI_CEC = 0x10000, 347 352 }; 348 353 349 354 enum amd_dpm_forced_level;
+11 -3
drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
··· 34 34 #define NUM_PCIE_BITRATES 4 35 35 #define NUM_XGMI_BITRATES 4 36 36 #define NUM_XGMI_WIDTHS 3 37 + #define NUM_SOC_P2S_TABLES 3 38 + #define NUM_TDP_GROUPS 4 37 39 38 40 typedef enum { 39 41 /*0*/ FEATURE_DATA_CALCULATION = 0, ··· 82 80 /*41*/ FEATURE_CXL_QOS = 41, 83 81 /*42*/ FEATURE_SOC_DC_RTC = 42, 84 82 /*43*/ FEATURE_GFX_DC_RTC = 43, 83 + /*44*/ FEATURE_DVM_MIN_PSM = 44, 84 + /*45*/ FEATURE_PRC = 45, 85 85 86 - /*44*/ NUM_FEATURES = 44 86 + /*46*/ NUM_FEATURES = 46 87 87 } FEATURE_LIST_e; 88 88 89 89 //enum for MPIO PCIe gen speed msgs ··· 127 123 VOLTAGE_GUARDBAND_COUNT 128 124 } GFX_GUARDBAND_e; 129 125 130 - #define SMU_METRICS_TABLE_VERSION 0xE 126 + #define SMU_METRICS_TABLE_VERSION 0xF 131 127 132 128 typedef struct __attribute__((packed, aligned(4))) { 133 129 uint32_t AccumulationCounter; ··· 238 234 239 235 //PCIE BW Data and error count 240 236 uint32_t PCIeOtherEndRecoveryAcc; // The Pcie counter itself is accumulated 237 + 238 + //Total App Clock Counter 239 + uint64_t GfxclkBelowHostLimitAcc[8]; 241 240 } MetricsTableX_t; 242 241 243 242 typedef struct __attribute__((packed, aligned(4))) { ··· 335 328 uint32_t JpegBusy[32]; 336 329 } MetricsTableA_t; 337 330 338 - #define SMU_VF_METRICS_TABLE_VERSION 0x3 331 + #define SMU_VF_METRICS_TABLE_VERSION 0x5 339 332 340 333 typedef struct __attribute__((packed, aligned(4))) { 341 334 uint32_t AccumulationCounter; 342 335 uint32_t InstGfxclk_TargFreq; 343 336 uint64_t AccGfxclk_TargFreq; 344 337 uint64_t AccGfxRsmuDpm_Busy; 338 + uint64_t AccGfxclkBelowHostLimit; 345 339 } VfMetricsTable_t; 346 340 347 341 #endif
+1
drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
··· 93 93 #define PPSMC_MSG_SelectPLPDMode 0x40 94 94 #define PPSMC_MSG_RmaDueToBadPageThreshold 0x43 95 95 #define PPSMC_MSG_SelectPstatePolicy 0x44 96 + #define PPSMC_MSG_ResetSDMA2 0x45 96 97 #define PPSMC_MSG_ResetSDMA 0x4D 97 98 #define PPSMC_Message_Count 0x4E 98 99
+2 -1
drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
··· 276 276 __SMU_DUMMY_MAP(SelectPstatePolicy), \ 277 277 __SMU_DUMMY_MAP(MALLPowerController), \ 278 278 __SMU_DUMMY_MAP(MALLPowerState), \ 279 - __SMU_DUMMY_MAP(ResetSDMA), 279 + __SMU_DUMMY_MAP(ResetSDMA), \ 280 + __SMU_DUMMY_MAP(ResetSDMA2), 280 281 281 282 #undef __SMU_DUMMY_MAP 282 283 #define __SMU_DUMMY_MAP(type) SMU_MSG_##type
+2
drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
··· 303 303 int smu_v13_0_get_boot_freq_by_index(struct smu_context *smu, 304 304 enum smu_clk_type clk_type, 305 305 uint32_t *value); 306 + 307 + void smu_v13_0_interrupt_work(struct smu_context *smu); 306 308 #endif 307 309 #endif
+6 -6
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
··· 1321 1321 return 0; 1322 1322 } 1323 1323 1324 - static int smu_v13_0_ack_ac_dc_interrupt(struct smu_context *smu) 1324 + void smu_v13_0_interrupt_work(struct smu_context *smu) 1325 1325 { 1326 - return smu_cmn_send_smc_msg(smu, 1327 - SMU_MSG_ReenableAcDcInterrupt, 1328 - NULL); 1326 + smu_cmn_send_smc_msg(smu, 1327 + SMU_MSG_ReenableAcDcInterrupt, 1328 + NULL); 1329 1329 } 1330 1330 1331 1331 #define THM_11_0__SRCID__THM_DIG_THERM_L2H 0 /* ASIC_TEMP > CG_THERMAL_INT.DIG_THERM_INTH */ ··· 1378 1378 switch (ctxid) { 1379 1379 case SMU_IH_INTERRUPT_CONTEXT_ID_AC: 1380 1380 dev_dbg(adev->dev, "Switched to AC mode!\n"); 1381 - smu_v13_0_ack_ac_dc_interrupt(smu); 1381 + schedule_work(&smu->interrupt_work); 1382 1382 adev->pm.ac_power = true; 1383 1383 break; 1384 1384 case SMU_IH_INTERRUPT_CONTEXT_ID_DC: 1385 1385 dev_dbg(adev->dev, "Switched to DC mode!\n"); 1386 - smu_v13_0_ack_ac_dc_interrupt(smu); 1386 + schedule_work(&smu->interrupt_work); 1387 1387 adev->pm.ac_power = false; 1388 1388 break; 1389 1389 case SMU_IH_INTERRUPT_CONTEXT_ID_THERMAL_THROTTLING:
+7 -5
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
··· 2643 2643 &backend_workload_mask); 2644 2644 2645 2645 /* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */ 2646 - if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && 2647 - ((smu->adev->pm.fw_version == 0x004e6601) || 2648 - (smu->adev->pm.fw_version >= 0x004e7300))) || 2649 - (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) && 2650 - smu->adev->pm.fw_version >= 0x00504500)) { 2646 + if ((workload_mask & (1 << PP_SMC_POWER_PROFILE_COMPUTE)) && 2647 + ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && 2648 + ((smu->adev->pm.fw_version == 0x004e6601) || 2649 + (smu->adev->pm.fw_version >= 0x004e7300))) || 2650 + (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) && 2651 + smu->adev->pm.fw_version >= 0x00504500))) { 2651 2652 workload_type = smu_cmn_to_asic_specific_index(smu, 2652 2653 CMN2ASIC_MAPPING_WORKLOAD, 2653 2654 PP_SMC_POWER_PROFILE_POWERSAVING); ··· 3220 3219 .is_asic_wbrf_supported = smu_v13_0_0_wbrf_support_check, 3221 3220 .enable_uclk_shadow = smu_v13_0_enable_uclk_shadow, 3222 3221 .set_wbrf_exclusion_ranges = smu_v13_0_set_wbrf_exclusion_ranges, 3222 + .interrupt_work = smu_v13_0_interrupt_work, 3223 3223 }; 3224 3224 3225 3225 void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu)
+46 -8
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
··· 119 119 } 120 120 } 121 121 122 + static inline bool smu_v13_0_6_is_blw_host_limit_available(struct smu_context *smu) 123 + { 124 + if (smu->adev->flags & AMD_IS_APU) 125 + return smu->smc_fw_version >= 0x04556F00; 126 + 127 + switch (amdgpu_ip_version(smu->adev, MP1_HWIP, 0)) { 128 + case IP_VERSION(13, 0, 6): 129 + return smu->smc_fw_version >= 0x557900; 130 + case IP_VERSION(13, 0, 14): 131 + return smu->smc_fw_version >= 0x05551000; 132 + default: 133 + return false; 134 + } 135 + } 136 + 122 137 struct mca_bank_ipid { 123 138 enum amdgpu_mca_ip ip; 124 139 uint16_t hwid; ··· 209 194 MSG_MAP(RmaDueToBadPageThreshold, PPSMC_MSG_RmaDueToBadPageThreshold, 0), 210 195 MSG_MAP(SelectPstatePolicy, PPSMC_MSG_SelectPstatePolicy, 0), 211 196 MSG_MAP(ResetSDMA, PPSMC_MSG_ResetSDMA, 0), 197 + MSG_MAP(ResetSDMA2, PPSMC_MSG_ResetSDMA2, 0), 212 198 }; 213 199 214 200 // clang-format on ··· 2374 2358 gpu_metrics->average_umc_activity = 2375 2359 SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization, flag)); 2376 2360 2361 + gpu_metrics->mem_max_bandwidth = 2362 + SMUQ10_ROUND(GET_METRIC_FIELD(MaxDramBandwidth, flag)); 2363 + 2377 2364 gpu_metrics->curr_socket_power = 2378 2365 SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower, flag)); 2379 2366 /* Energy counter reported in 15.259uJ (2^-16) units */ ··· 2515 2496 SMUQ10_ROUND(metrics_x->GfxBusy[inst]); 2516 2497 gpu_metrics->xcp_stats[i].gfx_busy_acc[idx] = 2517 2498 SMUQ10_ROUND(metrics_x->GfxBusyAcc[inst]); 2499 + 2500 + if (smu_v13_0_6_is_blw_host_limit_available(smu)) 2501 + gpu_metrics->xcp_stats[i].gfx_below_host_limit_acc[idx] = 2502 + SMUQ10_ROUND(metrics_x->GfxclkBelowHostLimitAcc 2503 + [inst]); 2518 2504 idx++; 2519 2505 } 2520 2506 } ··· 2744 2720 2745 2721 static int smu_v13_0_6_reset_sdma(struct smu_context *smu, uint32_t inst_mask) 2746 2722 { 2747 - struct amdgpu_device *adev = smu->adev; 2723 + uint32_t smu_program; 2748 2724 int ret = 0; 2749 2725 2750 - /* the message is only valid on SMU 13.0.6 with pmfw 85.121.00 and above */ 2751 - if ((adev->flags & AMD_IS_APU) || 2752 - amdgpu_ip_version(adev, MP1_HWIP, 0) != IP_VERSION(13, 0, 6) || 2753 - smu->smc_fw_version < 0x00557900) 2754 - return 0; 2726 + smu_program = (smu->smc_fw_version >> 24) & 0xff; 2727 + switch (amdgpu_ip_version(smu->adev, MP1_HWIP, 0)) { 2728 + case IP_VERSION(13, 0, 6): 2729 + if (((smu_program == 7) && (smu->smc_fw_version > 0x07550700)) || 2730 + ((smu_program == 0) && (smu->smc_fw_version > 0x00557700))) 2731 + ret = smu_cmn_send_smc_msg_with_param(smu, 2732 + SMU_MSG_ResetSDMA, inst_mask, NULL); 2733 + else if ((smu_program == 4) && 2734 + (smu->smc_fw_version > 0x4556e6c)) 2735 + ret = smu_cmn_send_smc_msg_with_param(smu, 2736 + SMU_MSG_ResetSDMA2, inst_mask, NULL); 2737 + break; 2738 + case IP_VERSION(13, 0, 14): 2739 + if ((smu_program == 5) && 2740 + (smu->smc_fw_version > 0x05550f00)) 2741 + ret = smu_cmn_send_smc_msg_with_param(smu, 2742 + SMU_MSG_ResetSDMA2, inst_mask, NULL); 2743 + break; 2744 + default: 2745 + break; 2746 + } 2755 2747 2756 - ret = smu_cmn_send_smc_msg_with_param(smu, 2757 - SMU_MSG_ResetSDMA, inst_mask, NULL); 2758 2748 if (ret) 2759 2749 dev_err(smu->adev->dev, 2760 2750 "failed to send ResetSDMA event with mask 0x%x\n",
+1
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
··· 2797 2797 .is_asic_wbrf_supported = smu_v13_0_7_wbrf_support_check, 2798 2798 .enable_uclk_shadow = smu_v13_0_enable_uclk_shadow, 2799 2799 .set_wbrf_exclusion_ranges = smu_v13_0_set_wbrf_exclusion_ranges, 2800 + .interrupt_work = smu_v13_0_interrupt_work, 2800 2801 }; 2801 2802 2802 2803 void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu)