Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amdgpu: add multi-XCC initial support in gfx_v9_4_3.c

Each XCD needs to be initialized respectively. The major changes are:

1. add iteration to do rlc/kiq/kcq init/fini for each xcd
2. load rlc/mec microcode to each xcd
3. add argument to specify xcc index in initialization functions

Signed-off-by: Le Ma <le.ma@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Le Ma and committed by
Alex Deucher
6f917fdc 5aa998ba

+397 -330
+397 -330
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
··· 181 181 182 182 static void gfx_v9_4_3_set_kiq_pm4_funcs(struct amdgpu_device *adev) 183 183 { 184 - adev->gfx.kiq[0].pmf = &gfx_v9_4_3_kiq_pm4_funcs; 184 + int i; 185 + for (i = 0; i < adev->gfx.num_xcd; i++) 186 + adev->gfx.kiq[i].pmf = &gfx_v9_4_3_kiq_pm4_funcs; 185 187 } 186 188 187 189 static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev) ··· 506 504 507 505 const struct gfx_firmware_header_v1_0 *mec_hdr; 508 506 509 - bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 507 + for (i = 0; i < adev->gfx.num_xcd; i++) 508 + bitmap_zero(adev->gfx.mec_bitmap[i].queue_bitmap, 509 + AMDGPU_MAX_COMPUTE_QUEUES); 510 510 511 511 /* take ownership of the relevant compute queues */ 512 512 amdgpu_gfx_compute_queue_acquire(adev); ··· 735 731 } 736 732 737 733 static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, 738 - int mec, int pipe, int queue) 734 + int xcc_id, int mec, int pipe, int queue) 739 735 { 740 736 unsigned irq_type; 741 737 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; ··· 744 740 ring = &adev->gfx.compute_ring[ring_id]; 745 741 746 742 /* mec0 is me1 */ 743 + ring->xcc_id = xcc_id; 747 744 ring->me = mec + 1; 748 745 ring->pipe = pipe; 749 746 ring->queue = queue; ··· 755 750 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 756 751 + (ring_id * GFX9_MEC_HPD_SIZE); 757 752 ring->vm_hub = AMDGPU_GFXHUB_0; 758 - sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 753 + sprintf(ring->name, "comp_%d.%d.%d.%d", 754 + ring->xcc_id, ring->me, ring->pipe, ring->queue); 759 755 760 756 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 761 757 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) ··· 770 764 771 765 static int gfx_v9_4_3_sw_init(void *handle) 772 766 { 773 - int i, j, k, r, ring_id; 767 + int i, j, k, r, ring_id, xcc_id; 774 768 struct amdgpu_kiq *kiq; 775 769 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 776 770 ··· 811 805 812 806 /* set up the compute queues - allocate horizontally across pipes */ 813 807 ring_id = 0; 814 - for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 815 - for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 816 - for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 817 - if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, 818 - k, j)) 819 - continue; 808 + for (xcc_id = 0; xcc_id < adev->gfx.num_xcd; xcc_id++) { 820 809 821 - r = gfx_v9_4_3_compute_ring_init(adev, 822 - ring_id, 823 - i, k, j); 824 - if (r) 825 - return r; 810 + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 811 + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 812 + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; 813 + k++) { 814 + if (!amdgpu_gfx_is_mec_queue_enabled( 815 + adev, xcc_id, i, k, j)) 816 + continue; 826 817 827 - ring_id++; 818 + r = gfx_v9_4_3_compute_ring_init(adev, 819 + ring_id, 820 + xcc_id, 821 + i, k, j); 822 + if (r) 823 + return r; 824 + 825 + ring_id++; 826 + } 828 827 } 829 828 } 829 + 830 + r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, xcc_id); 831 + if (r) { 832 + DRM_ERROR("Failed to init KIQ BOs!\n"); 833 + return r; 834 + } 835 + 836 + kiq = &adev->gfx.kiq[xcc_id]; 837 + r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, xcc_id); 838 + if (r) 839 + return r; 840 + 841 + /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 842 + r = amdgpu_gfx_mqd_sw_init(adev, 843 + sizeof(struct v9_mqd_allocation), xcc_id); 844 + if (r) 845 + return r; 830 846 } 831 - 832 - r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0); 833 - if (r) { 834 - DRM_ERROR("Failed to init KIQ BOs!\n"); 835 - return r; 836 - } 837 - 838 - kiq = &adev->gfx.kiq[0]; 839 - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0); 840 - if (r) 841 - return r; 842 - 843 - /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 844 - r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0); 845 - if (r) 846 - return r; 847 847 848 848 r = gfx_v9_4_3_gpu_early_init(adev); 849 849 if (r) ··· 863 851 int i; 864 852 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 865 853 866 - for (i = 0; i < adev->gfx.num_compute_rings; i++) 854 + for (i = 0; i < adev->gfx.num_compute_rings * 855 + adev->gfx.num_xcd; i++) 867 856 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 868 857 869 - amdgpu_gfx_mqd_sw_fini(adev, 0); 870 - amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); 871 - amdgpu_gfx_kiq_fini(adev, 0); 858 + for (i = 0; i < adev->gfx.num_xcd; i++) { 859 + amdgpu_gfx_mqd_sw_fini(adev, i); 860 + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[i].ring); 861 + amdgpu_gfx_kiq_fini(adev, i); 862 + } 872 863 873 864 gfx_v9_4_3_mec_fini(adev); 874 865 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); ··· 896 881 return (~data) & mask; 897 882 } 898 883 899 - static void gfx_v9_4_3_setup_rb(struct amdgpu_device *adev) 884 + static void gfx_v9_4_3_setup_rb(struct amdgpu_device *adev, int xcc_id) 900 885 { 901 886 int i, j; 902 887 u32 data; ··· 921 906 } 922 907 923 908 #define DEFAULT_SH_MEM_BASES (0x6000) 924 - static void gfx_v9_4_3_init_compute_vmid(struct amdgpu_device *adev) 909 + static void gfx_v9_4_3_init_compute_vmid(struct amdgpu_device *adev, int xcc_id) 925 910 { 926 911 int i; 927 912 uint32_t sh_mem_config; ··· 941 926 942 927 mutex_lock(&adev->srbm_mutex); 943 928 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 944 - soc15_grbm_select(adev, 0, 0, 0, i, 0); 929 + soc15_grbm_select(adev, 0, 0, 0, i, xcc_id); 945 930 /* CP and shaders */ 946 - WREG32_SOC15_RLC(GC, 0, regSH_MEM_CONFIG, sh_mem_config); 947 - WREG32_SOC15_RLC(GC, 0, regSH_MEM_BASES, sh_mem_bases); 931 + WREG32_SOC15_RLC(GC, xcc_id, regSH_MEM_CONFIG, sh_mem_config); 932 + WREG32_SOC15_RLC(GC, xcc_id, regSH_MEM_BASES, sh_mem_bases); 948 933 } 949 - soc15_grbm_select(adev, 0, 0, 0, 0, 0); 934 + soc15_grbm_select(adev, 0, 0, 0, 0, xcc_id); 950 935 mutex_unlock(&adev->srbm_mutex); 951 936 952 937 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 953 938 acccess. These should be enabled by FW for target VMIDs. */ 954 939 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 955 - WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0); 956 - WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0); 957 - WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0); 958 - WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0); 940 + WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_VMID0_BASE, 2 * i, 0); 941 + WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_VMID0_SIZE, 2 * i, 0); 942 + WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_GWS_VMID0, i, 0); 943 + WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_OA_VMID0, i, 0); 959 944 } 960 945 } 961 946 962 - static void gfx_v9_4_3_init_gds_vmid(struct amdgpu_device *adev) 947 + static void gfx_v9_4_3_init_gds_vmid(struct amdgpu_device *adev, int xcc_id) 963 948 { 964 949 int vmid; 965 950 ··· 970 955 * access so that HWS firmware can save/restore entries. 971 956 */ 972 957 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { 973 - WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0); 974 - WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0); 975 - WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0); 976 - WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0); 958 + WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_VMID0_BASE, 2 * vmid, 0); 959 + WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_VMID0_SIZE, 2 * vmid, 0); 960 + WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_GWS_VMID0, vmid, 0); 961 + WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_OA_VMID0, vmid, 0); 977 962 } 978 963 } 979 964 980 965 static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) 981 966 { 982 967 u32 tmp; 983 - int i; 968 + int i, j; 984 969 985 - WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 970 + for (i = 0; i < adev->gfx.num_xcd; i++) { 971 + WREG32_FIELD15_PREREG(GC, i, GRBM_CNTL, READ_TIMEOUT, 0xff); 972 + gfx_v9_4_3_setup_rb(adev, i); 973 + } 986 974 987 - gfx_v9_4_3_setup_rb(adev); 988 975 gfx_v9_4_3_get_cu_info(adev, &adev->gfx.cu_info); 989 976 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, regDB_DEBUG2); 990 977 ··· 994 977 /* where to put LDS, scratch, GPUVM in FSA64 space */ 995 978 mutex_lock(&adev->srbm_mutex); 996 979 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { 997 - soc15_grbm_select(adev, 0, 0, 0, i, 0); 998 - /* CP and shaders */ 999 - if (i == 0) { 1000 - tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 1001 - SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1002 - tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 1003 - !!adev->gmc.noretry); 1004 - WREG32_SOC15_RLC(GC, 0, regSH_MEM_CONFIG, tmp); 1005 - WREG32_SOC15_RLC(GC, 0, regSH_MEM_BASES, 0); 1006 - } else { 1007 - tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 1008 - SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1009 - tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 1010 - !!adev->gmc.noretry); 1011 - WREG32_SOC15_RLC(GC, 0, regSH_MEM_CONFIG, tmp); 1012 - tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 1013 - (adev->gmc.private_aperture_start >> 48)); 1014 - tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 1015 - (adev->gmc.shared_aperture_start >> 48)); 1016 - WREG32_SOC15_RLC(GC, 0, regSH_MEM_BASES, tmp); 980 + for (j = 0; j < adev->gfx.num_xcd; j++) { 981 + soc15_grbm_select(adev, 0, 0, 0, i, j); 982 + /* CP and shaders */ 983 + if (i == 0) { 984 + tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 985 + SH_MEM_ALIGNMENT_MODE_UNALIGNED); 986 + tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 987 + !!adev->gmc.noretry); 988 + WREG32_SOC15_RLC(GC, j, regSH_MEM_CONFIG, tmp); 989 + WREG32_SOC15_RLC(GC, j, regSH_MEM_BASES, 0); 990 + } else { 991 + tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 992 + SH_MEM_ALIGNMENT_MODE_UNALIGNED); 993 + tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 994 + !!adev->gmc.noretry); 995 + WREG32_SOC15_RLC(GC, j, regSH_MEM_CONFIG, tmp); 996 + tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 997 + (adev->gmc.private_aperture_start >> 48)); 998 + tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 999 + (adev->gmc.shared_aperture_start >> 48)); 1000 + WREG32_SOC15_RLC(GC, j, regSH_MEM_BASES, tmp); 1001 + } 1017 1002 } 1018 1003 } 1019 1004 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 1020 1005 1021 1006 mutex_unlock(&adev->srbm_mutex); 1022 1007 1023 - gfx_v9_4_3_init_compute_vmid(adev); 1024 - gfx_v9_4_3_init_gds_vmid(adev); 1008 + for (i = 0; i < adev->gfx.num_xcd; i++) { 1009 + gfx_v9_4_3_init_compute_vmid(adev, i); 1010 + gfx_v9_4_3_init_gds_vmid(adev, i); 1011 + } 1025 1012 } 1026 1013 1027 - static void gfx_v9_4_3_enable_save_restore_machine(struct amdgpu_device *adev) 1014 + static void gfx_v9_4_3_enable_save_restore_machine(struct amdgpu_device *adev, 1015 + int xcc_id) 1028 1016 { 1029 - WREG32_FIELD15_PREREG(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 1017 + WREG32_FIELD15_PREREG(GC, xcc_id, RLC_SRM_CNTL, SRM_ENABLE, 1); 1030 1018 } 1031 1019 1032 - static void gfx_v9_4_3_init_csb(struct amdgpu_device *adev) 1020 + static void gfx_v9_4_3_init_csb(struct amdgpu_device *adev, int xcc_id) 1033 1021 { 1034 1022 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 1035 1023 /* csib */ 1036 - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regRLC_CSIB_ADDR_HI), 1024 + WREG32_RLC(SOC15_REG_OFFSET(GC, xcc_id, regRLC_CSIB_ADDR_HI), 1037 1025 adev->gfx.rlc.clear_state_gpu_addr >> 32); 1038 - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regRLC_CSIB_ADDR_LO), 1026 + WREG32_RLC(SOC15_REG_OFFSET(GC, xcc_id, regRLC_CSIB_ADDR_LO), 1039 1027 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 1040 - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regRLC_CSIB_LENGTH), 1028 + WREG32_RLC(SOC15_REG_OFFSET(GC, xcc_id, regRLC_CSIB_LENGTH), 1041 1029 adev->gfx.rlc.clear_state_size); 1042 1030 } 1043 1031 1044 - static void gfx_v9_4_3_init_pg(struct amdgpu_device *adev) 1032 + static void gfx_v9_4_3_init_pg(struct amdgpu_device *adev, int xcc_id) 1045 1033 { 1046 - gfx_v9_4_3_init_csb(adev); 1034 + gfx_v9_4_3_init_csb(adev, xcc_id); 1047 1035 1048 1036 /* 1049 1037 * Rlc save restore list is workable since v2_1. 1050 1038 * And it's needed by gfxoff feature. 1051 1039 */ 1052 1040 if (adev->gfx.rlc.is_rlc_v2_1) 1053 - gfx_v9_4_3_enable_save_restore_machine(adev); 1041 + gfx_v9_4_3_enable_save_restore_machine(adev, xcc_id); 1054 1042 1055 1043 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 1056 1044 AMD_PG_SUPPORT_GFX_SMG | ··· 1137 1115 return 0; 1138 1116 } 1139 1117 1140 - static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev) 1118 + static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev, 1119 + int xcc_id) 1141 1120 { 1142 1121 u32 i, j, k; 1143 1122 u32 mask; ··· 1194 1171 1195 1172 static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev) 1196 1173 { 1197 - WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 1198 - gfx_v9_4_3_enable_gui_idle_interrupt(adev, false); 1199 - gfx_v9_4_3_wait_for_rlc_serdes(adev); 1174 + int i; 1175 + 1176 + for (i = 0; i < adev->gfx.num_xcd; i++) { 1177 + WREG32_FIELD15_PREREG(GC, i, RLC_CNTL, RLC_ENABLE_F32, 0); 1178 + gfx_v9_4_3_enable_gui_idle_interrupt(adev, false); 1179 + gfx_v9_4_3_wait_for_rlc_serdes(adev, i); 1180 + } 1200 1181 } 1201 1182 1202 1183 static void gfx_v9_4_3_rlc_reset(struct amdgpu_device *adev) 1203 1184 { 1204 - WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 1205 - udelay(50); 1206 - WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 1207 - udelay(50); 1185 + int i; 1186 + 1187 + for (i = 0; i < adev->gfx.num_xcd; i++) { 1188 + WREG32_FIELD15_PREREG(GC, i, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 1189 + udelay(50); 1190 + WREG32_FIELD15_PREREG(GC, i, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 1191 + udelay(50); 1192 + } 1208 1193 } 1209 1194 1210 1195 static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev) ··· 1220 1189 #ifdef AMDGPU_RLC_DEBUG_RETRY 1221 1190 u32 rlc_ucode_ver; 1222 1191 #endif 1192 + int i; 1223 1193 1224 - WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 1225 - udelay(50); 1226 - 1227 - /* carrizo do enable cp interrupt after cp inited */ 1228 - if (!(adev->flags & AMD_IS_APU)) { 1229 - gfx_v9_4_3_enable_gui_idle_interrupt(adev, true); 1194 + for (i = 0; i < adev->gfx.num_xcd; i++) { 1195 + WREG32_FIELD15_PREREG(GC, i, RLC_CNTL, RLC_ENABLE_F32, 1); 1230 1196 udelay(50); 1231 - } 1197 + 1198 + /* carrizo do enable cp interrupt after cp inited */ 1199 + if (!(adev->flags & AMD_IS_APU)) { 1200 + gfx_v9_4_3_enable_gui_idle_interrupt(adev, true); 1201 + udelay(50); 1202 + } 1232 1203 1233 1204 #ifdef AMDGPU_RLC_DEBUG_RETRY 1234 - /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 1235 - rlc_ucode_ver = RREG32_SOC15(GC, 0, regRLC_GPM_GENERAL_6); 1236 - if (rlc_ucode_ver == 0x108) { 1237 - dev_info(adev->dev, 1238 - "Using rlc debug ucode. regRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 1239 - rlc_ucode_ver, adev->gfx.rlc_fw_version); 1240 - /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 1241 - * default is 0x9C4 to create a 100us interval */ 1242 - WREG32_SOC15(GC, 0, regRLC_GPM_TIMER_INT_3, 0x9C4); 1243 - /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 1244 - * to disable the page fault retry interrupts, default is 1245 - * 0x100 (256) */ 1246 - WREG32_SOC15(GC, 0, regRLC_GPM_GENERAL_12, 0x100); 1247 - } 1205 + /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 1206 + rlc_ucode_ver = RREG32_SOC15(GC, i, regRLC_GPM_GENERAL_6); 1207 + if (rlc_ucode_ver == 0x108) { 1208 + dev_info(adev->dev, 1209 + "Using rlc debug ucode. regRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 1210 + rlc_ucode_ver, adev->gfx.rlc_fw_version); 1211 + /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 1212 + * default is 0x9C4 to create a 100us interval */ 1213 + WREG32_SOC15(GC, i, regRLC_GPM_TIMER_INT_3, 0x9C4); 1214 + /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 1215 + * to disable the page fault retry interrupts, default is 1216 + * 0x100 (256) */ 1217 + WREG32_SOC15(GC, i, regRLC_GPM_GENERAL_12, 0x100); 1218 + } 1248 1219 #endif 1220 + } 1249 1221 } 1250 1222 1251 - static int gfx_v9_4_3_rlc_load_microcode(struct amdgpu_device *adev) 1223 + static int gfx_v9_4_3_rlc_load_microcode(struct amdgpu_device *adev, int xcc_id) 1252 1224 { 1253 1225 const struct rlc_firmware_header_v2_0 *hdr; 1254 1226 const __le32 *fw_data; ··· 1267 1233 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1268 1234 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 1269 1235 1270 - WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, 1236 + WREG32_SOC15(GC, xcc_id, regRLC_GPM_UCODE_ADDR, 1271 1237 RLCG_UCODE_LOADING_START_ADDRESS); 1272 1238 for (i = 0; i < fw_size; i++) { 1273 1239 if (amdgpu_emu_mode == 1 && i % 100 == 0) { 1274 1240 dev_info(adev->dev, "Write RLC ucode data %u DWs\n", i); 1275 1241 msleep(1); 1276 1242 } 1277 - WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 1243 + WREG32_SOC15(GC, xcc_id, regRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 1278 1244 } 1279 - WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 1245 + WREG32_SOC15(GC, xcc_id, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 1280 1246 1281 1247 return 0; 1282 1248 } 1283 1249 1284 1250 static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) 1285 1251 { 1286 - int r; 1252 + int r, i; 1287 1253 1288 1254 adev->gfx.rlc.funcs->stop(adev); 1289 1255 1290 - /* disable CG */ 1291 - WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0); 1256 + for (i = 0; i < adev->gfx.num_xcd; i++) { 1257 + /* disable CG */ 1258 + WREG32_SOC15(GC, i, regRLC_CGCG_CGLS_CTRL, 0); 1292 1259 1293 - gfx_v9_4_3_init_pg(adev); 1260 + gfx_v9_4_3_init_pg(adev, i); 1294 1261 1295 - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 1296 - /* legacy rlc firmware loading */ 1297 - r = gfx_v9_4_3_rlc_load_microcode(adev); 1298 - if (r) 1299 - return r; 1262 + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 1263 + /* legacy rlc firmware loading */ 1264 + r = gfx_v9_4_3_rlc_load_microcode(adev, i); 1265 + if (r) 1266 + return r; 1267 + } 1300 1268 } 1301 1269 1302 1270 adev->gfx.rlc.funcs->start(adev); ··· 1306 1270 return 0; 1307 1271 } 1308 1272 1309 - static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) 1273 + static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, 1274 + unsigned vmid) 1310 1275 { 1311 1276 u32 reg, data; 1312 1277 ··· 1360 1323 ARRAY_SIZE(rlcg_access_gc_9_4_3)); 1361 1324 } 1362 1325 1363 - static void gfx_v9_4_3_cp_compute_enable(struct amdgpu_device *adev, bool enable) 1326 + static void gfx_v9_4_3_cp_compute_enable(struct amdgpu_device *adev, 1327 + bool enable, int xcc_id) 1364 1328 { 1365 1329 if (enable) { 1366 - WREG32_SOC15_RLC(GC, 0, regCP_MEC_CNTL, 0); 1330 + WREG32_SOC15_RLC(GC, xcc_id, regCP_MEC_CNTL, 0); 1367 1331 } else { 1368 - WREG32_SOC15_RLC(GC, 0, regCP_MEC_CNTL, 1332 + WREG32_SOC15_RLC(GC, xcc_id, regCP_MEC_CNTL, 1369 1333 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 1370 - adev->gfx.kiq[0].ring.sched.ready = false; 1334 + adev->gfx.kiq[xcc_id].ring.sched.ready = false; 1371 1335 } 1372 1336 udelay(50); 1373 1337 } 1374 1338 1375 - static int gfx_v9_4_3_cp_compute_load_microcode(struct amdgpu_device *adev) 1339 + static int gfx_v9_4_3_cp_compute_load_microcode(struct amdgpu_device *adev, 1340 + int xcc_id) 1376 1341 { 1377 1342 const struct gfx_firmware_header_v1_0 *mec_hdr; 1378 1343 const __le32 *fw_data; ··· 1386 1347 if (!adev->gfx.mec_fw) 1387 1348 return -EINVAL; 1388 1349 1389 - gfx_v9_4_3_cp_compute_enable(adev, false); 1350 + gfx_v9_4_3_cp_compute_enable(adev, false, xcc_id); 1390 1351 1391 1352 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1392 1353 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); ··· 1397 1358 tmp = 0; 1398 1359 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 1399 1360 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 1400 - WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 1361 + WREG32_SOC15(GC, xcc_id, regCP_CPC_IC_BASE_CNTL, tmp); 1401 1362 1402 - WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, 1363 + WREG32_SOC15(GC, xcc_id, regCP_CPC_IC_BASE_LO, 1403 1364 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 1404 - WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 1365 + WREG32_SOC15(GC, xcc_id, regCP_CPC_IC_BASE_HI, 1405 1366 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 1406 1367 1407 1368 mec_ucode_addr_offset = 1408 - SOC15_REG_OFFSET(GC, 0, regCP_MEC_ME1_UCODE_ADDR); 1369 + SOC15_REG_OFFSET(GC, xcc_id, regCP_MEC_ME1_UCODE_ADDR); 1409 1370 mec_ucode_data_offset = 1410 - SOC15_REG_OFFSET(GC, 0, regCP_MEC_ME1_UCODE_DATA); 1371 + SOC15_REG_OFFSET(GC, xcc_id, regCP_MEC_ME1_UCODE_DATA); 1411 1372 1412 1373 /* MEC1 */ 1413 1374 WREG32(mec_ucode_addr_offset, mec_hdr->jt_offset); ··· 1422 1383 } 1423 1384 1424 1385 /* KIQ functions */ 1425 - static void gfx_v9_4_3_kiq_setting(struct amdgpu_ring *ring) 1386 + static void gfx_v9_4_3_kiq_setting(struct amdgpu_ring *ring, int xcc_id) 1426 1387 { 1427 1388 uint32_t tmp; 1428 1389 struct amdgpu_device *adev = ring->adev; 1429 1390 1430 1391 /* tell RLC which is KIQ queue */ 1431 - tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); 1392 + tmp = RREG32_SOC15(GC, xcc_id, regRLC_CP_SCHEDULERS); 1432 1393 tmp &= 0xffffff00; 1433 1394 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 1434 - WREG32_SOC15_RLC(GC, 0, regRLC_CP_SCHEDULERS, tmp); 1395 + WREG32_SOC15_RLC(GC, xcc_id, regRLC_CP_SCHEDULERS, tmp); 1435 1396 tmp |= 0x80; 1436 - WREG32_SOC15_RLC(GC, 0, regRLC_CP_SCHEDULERS, tmp); 1397 + WREG32_SOC15_RLC(GC, xcc_id, regRLC_CP_SCHEDULERS, tmp); 1437 1398 } 1438 1399 1439 1400 static void gfx_v9_4_3_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) ··· 1577 1538 return 0; 1578 1539 } 1579 1540 1580 - static int gfx_v9_4_3_kiq_init_register(struct amdgpu_ring *ring) 1541 + static int gfx_v9_4_3_kiq_init_register(struct amdgpu_ring *ring, int xcc_id) 1581 1542 { 1582 1543 struct amdgpu_device *adev = ring->adev; 1583 1544 struct v9_mqd *mqd = ring->mqd_ptr; 1584 1545 int j; 1585 1546 1586 1547 /* disable wptr polling */ 1587 - WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 1548 + WREG32_FIELD15_PREREG(GC, xcc_id, CP_PQ_WPTR_POLL_CNTL, EN, 0); 1588 1549 1589 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_EOP_BASE_ADDR, 1550 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_EOP_BASE_ADDR, 1590 1551 mqd->cp_hqd_eop_base_addr_lo); 1591 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI, 1552 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_EOP_BASE_ADDR_HI, 1592 1553 mqd->cp_hqd_eop_base_addr_hi); 1593 1554 1594 1555 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 1595 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_EOP_CONTROL, 1556 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_EOP_CONTROL, 1596 1557 mqd->cp_hqd_eop_control); 1597 1558 1598 1559 /* enable doorbell? */ 1599 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 1560 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_DOORBELL_CONTROL, 1600 1561 mqd->cp_hqd_pq_doorbell_control); 1601 1562 1602 1563 /* disable the queue if it's active */ 1603 - if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { 1604 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); 1564 + if (RREG32_SOC15(GC, xcc_id, regCP_HQD_ACTIVE) & 1) { 1565 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_DEQUEUE_REQUEST, 1); 1605 1566 for (j = 0; j < adev->usec_timeout; j++) { 1606 - if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) 1567 + if (!(RREG32_SOC15(GC, xcc_id, regCP_HQD_ACTIVE) & 1)) 1607 1568 break; 1608 1569 udelay(1); 1609 1570 } 1610 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1571 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_DEQUEUE_REQUEST, 1611 1572 mqd->cp_hqd_dequeue_request); 1612 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_RPTR, 1573 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_RPTR, 1613 1574 mqd->cp_hqd_pq_rptr); 1614 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_LO, 1575 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_LO, 1615 1576 mqd->cp_hqd_pq_wptr_lo); 1616 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_HI, 1577 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_HI, 1617 1578 mqd->cp_hqd_pq_wptr_hi); 1618 1579 } 1619 1580 1620 1581 /* set the pointer to the MQD */ 1621 - WREG32_SOC15_RLC(GC, 0, regCP_MQD_BASE_ADDR, 1582 + WREG32_SOC15_RLC(GC, xcc_id, regCP_MQD_BASE_ADDR, 1622 1583 mqd->cp_mqd_base_addr_lo); 1623 - WREG32_SOC15_RLC(GC, 0, regCP_MQD_BASE_ADDR_HI, 1584 + WREG32_SOC15_RLC(GC, xcc_id, regCP_MQD_BASE_ADDR_HI, 1624 1585 mqd->cp_mqd_base_addr_hi); 1625 1586 1626 1587 /* set MQD vmid to 0 */ 1627 - WREG32_SOC15_RLC(GC, 0, regCP_MQD_CONTROL, 1588 + WREG32_SOC15_RLC(GC, xcc_id, regCP_MQD_CONTROL, 1628 1589 mqd->cp_mqd_control); 1629 1590 1630 1591 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 1631 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_BASE, 1592 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_BASE, 1632 1593 mqd->cp_hqd_pq_base_lo); 1633 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_BASE_HI, 1594 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_BASE_HI, 1634 1595 mqd->cp_hqd_pq_base_hi); 1635 1596 1636 1597 /* set up the HQD, this is similar to CP_RB0_CNTL */ 1637 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_CONTROL, 1598 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_CONTROL, 1638 1599 mqd->cp_hqd_pq_control); 1639 1600 1640 1601 /* set the wb address whether it's enabled or not */ 1641 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR, 1602 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_RPTR_REPORT_ADDR, 1642 1603 mqd->cp_hqd_pq_rptr_report_addr_lo); 1643 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 1604 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 1644 1605 mqd->cp_hqd_pq_rptr_report_addr_hi); 1645 1606 1646 1607 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 1647 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR, 1608 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_POLL_ADDR, 1648 1609 mqd->cp_hqd_pq_wptr_poll_addr_lo); 1649 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 1610 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 1650 1611 mqd->cp_hqd_pq_wptr_poll_addr_hi); 1651 1612 1652 1613 /* enable the doorbell if requested */ 1653 1614 if (ring->use_doorbell) { 1654 - WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 1615 + WREG32_SOC15(GC, xcc_id, regCP_MEC_DOORBELL_RANGE_LOWER, 1655 1616 (adev->doorbell_index.kiq * 2) << 2); 1656 - WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 1617 + WREG32_SOC15(GC, xcc_id, regCP_MEC_DOORBELL_RANGE_UPPER, 1657 1618 (adev->doorbell_index.userqueue_end * 2) << 2); 1658 1619 } 1659 1620 1660 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 1621 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_DOORBELL_CONTROL, 1661 1622 mqd->cp_hqd_pq_doorbell_control); 1662 1623 1663 1624 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 1664 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_LO, 1625 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_LO, 1665 1626 mqd->cp_hqd_pq_wptr_lo); 1666 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_HI, 1627 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_HI, 1667 1628 mqd->cp_hqd_pq_wptr_hi); 1668 1629 1669 1630 /* set the vmid for the queue */ 1670 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid); 1631 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_VMID, mqd->cp_hqd_vmid); 1671 1632 1672 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PERSISTENT_STATE, 1633 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PERSISTENT_STATE, 1673 1634 mqd->cp_hqd_persistent_state); 1674 1635 1675 1636 /* activate the queue */ 1676 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_ACTIVE, 1637 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_ACTIVE, 1677 1638 mqd->cp_hqd_active); 1678 1639 1679 1640 if (ring->use_doorbell) 1680 - WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 1641 + WREG32_FIELD15_PREREG(GC, xcc_id, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 1681 1642 1682 1643 return 0; 1683 1644 } 1684 1645 1685 - static int gfx_v9_4_3_kiq_fini_register(struct amdgpu_ring *ring) 1646 + static int gfx_v9_4_3_kiq_fini_register(struct amdgpu_ring *ring, int xcc_id) 1686 1647 { 1687 1648 struct amdgpu_device *adev = ring->adev; 1688 1649 int j; 1689 1650 1690 1651 /* disable the queue if it's active */ 1691 - if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { 1652 + if (RREG32_SOC15(GC, xcc_id, regCP_HQD_ACTIVE) & 1) { 1692 1653 1693 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); 1654 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_DEQUEUE_REQUEST, 1); 1694 1655 1695 1656 for (j = 0; j < adev->usec_timeout; j++) { 1696 - if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) 1657 + if (!(RREG32_SOC15(GC, xcc_id, regCP_HQD_ACTIVE) & 1)) 1697 1658 break; 1698 1659 udelay(1); 1699 1660 } ··· 1702 1663 DRM_DEBUG("KIQ dequeue request failed.\n"); 1703 1664 1704 1665 /* Manual disable if dequeue request times out */ 1705 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_ACTIVE, 0); 1666 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_ACTIVE, 0); 1706 1667 } 1707 1668 1708 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1669 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_DEQUEUE_REQUEST, 1709 1670 0); 1710 1671 } 1711 1672 1712 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_IQ_TIMER, 0); 1713 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_IB_CONTROL, 0); 1714 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PERSISTENT_STATE, 0); 1715 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 1716 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0); 1717 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_RPTR, 0); 1718 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_HI, 0); 1719 - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_LO, 0); 1673 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_IQ_TIMER, 0); 1674 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_IB_CONTROL, 0); 1675 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PERSISTENT_STATE, 0); 1676 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 1677 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_DOORBELL_CONTROL, 0); 1678 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_RPTR, 0); 1679 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_HI, 0); 1680 + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_LO, 0); 1720 1681 1721 1682 return 0; 1722 1683 } 1723 1684 1724 - static int gfx_v9_4_3_kiq_init_queue(struct amdgpu_ring *ring) 1685 + static int gfx_v9_4_3_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id) 1725 1686 { 1726 1687 struct amdgpu_device *adev = ring->adev; 1727 1688 struct v9_mqd *mqd = ring->mqd_ptr; 1728 1689 struct v9_mqd *tmp_mqd; 1729 1690 1730 - gfx_v9_4_3_kiq_setting(ring); 1691 + gfx_v9_4_3_kiq_setting(ring, xcc_id); 1731 1692 1732 1693 /* GPU could be in bad state during probe, driver trigger the reset 1733 1694 * after load the SMU, in this case , the mqd is not be initialized. 1734 1695 * driver need to re-init the mqd. 1735 1696 * check mqd->cp_hqd_pq_control since this value should not be 0 1736 1697 */ 1737 - tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup; 1698 + tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[xcc_id].mqd_backup; 1738 1699 if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control) { 1739 1700 /* for GPU_RESET case , reset MQD to a clean status */ 1740 - if (adev->gfx.kiq[0].mqd_backup) 1741 - memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation)); 1701 + if (adev->gfx.kiq[xcc_id].mqd_backup) 1702 + memcpy(mqd, adev->gfx.kiq[xcc_id].mqd_backup, sizeof(struct v9_mqd_allocation)); 1742 1703 1743 1704 /* reset ring buffer */ 1744 1705 ring->wptr = 0; 1745 1706 amdgpu_ring_clear_ring(ring); 1746 - 1747 1707 mutex_lock(&adev->srbm_mutex); 1748 - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 1749 - gfx_v9_4_3_kiq_init_register(ring); 1750 - soc15_grbm_select(adev, 0, 0, 0, 0, 0); 1708 + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, xcc_id); 1709 + gfx_v9_4_3_kiq_init_register(ring, xcc_id); 1710 + soc15_grbm_select(adev, 0, 0, 0, 0, xcc_id); 1751 1711 mutex_unlock(&adev->srbm_mutex); 1752 1712 } else { 1753 1713 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 1754 1714 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 1755 1715 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 1756 1716 mutex_lock(&adev->srbm_mutex); 1757 - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 1717 + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, xcc_id); 1758 1718 gfx_v9_4_3_mqd_init(ring); 1759 - gfx_v9_4_3_kiq_init_register(ring); 1760 - soc15_grbm_select(adev, 0, 0, 0, 0, 0); 1719 + gfx_v9_4_3_kiq_init_register(ring, xcc_id); 1720 + soc15_grbm_select(adev, 0, 0, 0, 0, xcc_id); 1761 1721 mutex_unlock(&adev->srbm_mutex); 1762 1722 1763 - if (adev->gfx.kiq[0].mqd_backup) 1764 - memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation)); 1723 + if (adev->gfx.kiq[xcc_id].mqd_backup) 1724 + memcpy(adev->gfx.kiq[xcc_id].mqd_backup, mqd, sizeof(struct v9_mqd_allocation)); 1765 1725 } 1766 1726 1767 1727 return 0; 1768 1728 } 1769 1729 1770 - static int gfx_v9_4_3_kcq_init_queue(struct amdgpu_ring *ring) 1730 + static int gfx_v9_4_3_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id) 1771 1731 { 1772 1732 struct amdgpu_device *adev = ring->adev; 1773 1733 struct v9_mqd *mqd = ring->mqd_ptr; ··· 1784 1746 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 1785 1747 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 1786 1748 mutex_lock(&adev->srbm_mutex); 1787 - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 1749 + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, xcc_id); 1788 1750 gfx_v9_4_3_mqd_init(ring); 1789 - soc15_grbm_select(adev, 0, 0, 0, 0, 0); 1751 + soc15_grbm_select(adev, 0, 0, 0, 0, xcc_id); 1790 1752 mutex_unlock(&adev->srbm_mutex); 1791 1753 1792 1754 if (adev->gfx.mec.mqd_backup[mqd_idx]) ··· 1807 1769 return 0; 1808 1770 } 1809 1771 1810 - static int gfx_v9_4_3_kiq_resume(struct amdgpu_device *adev) 1772 + static int gfx_v9_4_3_kiq_resume(struct amdgpu_device *adev, int xcc_id) 1811 1773 { 1812 1774 struct amdgpu_ring *ring; 1813 1775 int r; 1814 1776 1815 - ring = &adev->gfx.kiq[0].ring; 1777 + ring = &adev->gfx.kiq[xcc_id].ring; 1816 1778 1817 1779 r = amdgpu_bo_reserve(ring->mqd_obj, false); 1818 1780 if (unlikely(r != 0)) ··· 1822 1784 if (unlikely(r != 0)) 1823 1785 return r; 1824 1786 1825 - gfx_v9_4_3_kiq_init_queue(ring); 1787 + gfx_v9_4_3_kiq_init_queue(ring, xcc_id); 1826 1788 amdgpu_bo_kunmap(ring->mqd_obj); 1827 1789 ring->mqd_ptr = NULL; 1828 1790 amdgpu_bo_unreserve(ring->mqd_obj); ··· 1830 1792 return 0; 1831 1793 } 1832 1794 1833 - static int gfx_v9_4_3_kcq_resume(struct amdgpu_device *adev) 1795 + static int gfx_v9_4_3_kcq_resume(struct amdgpu_device *adev, int xcc_id) 1834 1796 { 1835 1797 struct amdgpu_ring *ring = NULL; 1836 1798 int r = 0, i; 1837 1799 1838 - gfx_v9_4_3_cp_compute_enable(adev, true); 1800 + gfx_v9_4_3_cp_compute_enable(adev, true, xcc_id); 1839 1801 1840 1802 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 1841 - ring = &adev->gfx.compute_ring[i]; 1803 + ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings]; 1842 1804 1843 1805 r = amdgpu_bo_reserve(ring->mqd_obj, false); 1844 1806 if (unlikely(r != 0)) 1845 1807 goto done; 1846 1808 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 1847 1809 if (!r) { 1848 - r = gfx_v9_4_3_kcq_init_queue(ring); 1810 + r = gfx_v9_4_3_kcq_init_queue(ring, xcc_id); 1849 1811 amdgpu_bo_kunmap(ring->mqd_obj); 1850 1812 ring->mqd_ptr = NULL; 1851 1813 } ··· 1854 1816 goto done; 1855 1817 } 1856 1818 1857 - r = amdgpu_gfx_enable_kcq(adev, 0); 1819 + r = amdgpu_gfx_enable_kcq(adev, xcc_id); 1858 1820 done: 1859 1821 return r; 1860 1822 } 1861 1823 1862 1824 static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) 1863 1825 { 1864 - int r, i; 1826 + int r, i, j; 1865 1827 struct amdgpu_ring *ring; 1866 1828 1867 - gfx_v9_4_3_enable_gui_idle_interrupt(adev, false); 1829 + for (i = 0; i < adev->gfx.num_xcd; i++) { 1830 + gfx_v9_4_3_enable_gui_idle_interrupt(adev, false); 1868 1831 1869 - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 1870 - gfx_v9_4_3_disable_gpa_mode(adev); 1832 + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 1833 + gfx_v9_4_3_disable_gpa_mode(adev); 1871 1834 1872 - r = gfx_v9_4_3_cp_compute_load_microcode(adev); 1835 + r = gfx_v9_4_3_cp_compute_load_microcode(adev, i); 1836 + if (r) 1837 + return r; 1838 + } 1839 + 1840 + r = gfx_v9_4_3_kiq_resume(adev, i); 1873 1841 if (r) 1874 1842 return r; 1843 + 1844 + r = gfx_v9_4_3_kcq_resume(adev, i); 1845 + if (r) 1846 + return r; 1847 + 1848 + for (j = 0; j < adev->gfx.num_compute_rings; j++) { 1849 + ring = &adev->gfx.compute_ring[j + i * adev->gfx.num_compute_rings]; 1850 + amdgpu_ring_test_helper(ring); 1851 + } 1852 + 1853 + gfx_v9_4_3_enable_gui_idle_interrupt(adev, true); 1875 1854 } 1876 - 1877 - r = gfx_v9_4_3_kiq_resume(adev); 1878 - if (r) 1879 - return r; 1880 - 1881 - r = gfx_v9_4_3_kcq_resume(adev); 1882 - if (r) 1883 - return r; 1884 - 1885 - for (i = 0; i < adev->gfx.num_compute_rings; i++) { 1886 - ring = &adev->gfx.compute_ring[i]; 1887 - amdgpu_ring_test_helper(ring); 1888 - } 1889 - 1890 - gfx_v9_4_3_enable_gui_idle_interrupt(adev, true); 1891 1855 1892 1856 return 0; 1893 1857 } 1894 1858 1895 - static void gfx_v9_4_3_cp_enable(struct amdgpu_device *adev, bool enable) 1859 + static void gfx_v9_4_3_cp_enable(struct amdgpu_device *adev, bool enable, 1860 + int xcc_id) 1896 1861 { 1897 - gfx_v9_4_3_cp_compute_enable(adev, enable); 1862 + gfx_v9_4_3_cp_compute_enable(adev, enable, xcc_id); 1898 1863 } 1899 1864 1900 1865 static int gfx_v9_4_3_hw_init(void *handle) ··· 1923 1882 static int gfx_v9_4_3_hw_fini(void *handle) 1924 1883 { 1925 1884 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1885 + int i; 1926 1886 1927 1887 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 1928 1888 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 1929 1889 1930 - if (amdgpu_gfx_disable_kcq(adev, 0)) 1931 - DRM_ERROR("KCQ disable failed\n"); 1890 + for (i = 0; i < adev->gfx.num_xcd; i++) { 1891 + if (amdgpu_gfx_disable_kcq(adev, i)) 1892 + DRM_ERROR("XCD %d KCQ disable failed\n", i); 1932 1893 1933 - /* Use deinitialize sequence from CAIL when unbinding device from driver, 1934 - * otherwise KIQ is hanging when binding back 1935 - */ 1936 - if (!amdgpu_in_reset(adev) && !adev->in_suspend) { 1937 - mutex_lock(&adev->srbm_mutex); 1938 - soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me, 1939 - adev->gfx.kiq[0].ring.pipe, 1940 - adev->gfx.kiq[0].ring.queue, 0, 0); 1941 - gfx_v9_4_3_kiq_fini_register(&adev->gfx.kiq[0].ring); 1942 - soc15_grbm_select(adev, 0, 0, 0, 0, 0); 1943 - mutex_unlock(&adev->srbm_mutex); 1894 + /* Use deinitialize sequence from CAIL when unbinding device 1895 + * from driver, otherwise KIQ is hanging when binding back 1896 + */ 1897 + if (!amdgpu_in_reset(adev) && !adev->in_suspend) { 1898 + mutex_lock(&adev->srbm_mutex); 1899 + soc15_grbm_select(adev, adev->gfx.kiq[i].ring.me, 1900 + adev->gfx.kiq[i].ring.pipe, 1901 + adev->gfx.kiq[i].ring.queue, 0, i); 1902 + gfx_v9_4_3_kiq_fini_register(&adev->gfx.kiq[i].ring, i); 1903 + soc15_grbm_select(adev, 0, 0, 0, 0, i); 1904 + mutex_unlock(&adev->srbm_mutex); 1905 + } 1906 + 1907 + gfx_v9_4_3_cp_enable(adev, false, i); 1944 1908 } 1945 - 1946 - gfx_v9_4_3_cp_enable(adev, false); 1947 1909 1948 1910 /* Skip suspend with A+A reset */ 1949 1911 if (adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) { ··· 1971 1927 static bool gfx_v9_4_3_is_idle(void *handle) 1972 1928 { 1973 1929 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1930 + int i; 1974 1931 1975 - if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS), 1976 - GRBM_STATUS, GUI_ACTIVE)) 1977 - return false; 1978 - else 1979 - return true; 1932 + for (i = 0; i < adev->gfx.num_xcd; i++) { 1933 + if (REG_GET_FIELD(RREG32_SOC15(GC, i, regGRBM_STATUS), 1934 + GRBM_STATUS, GUI_ACTIVE)) 1935 + return false; 1936 + } 1937 + return true; 1980 1938 } 1981 1939 1982 1940 static int gfx_v9_4_3_wait_for_idle(void *handle) ··· 2031 1985 adev->gfx.rlc.funcs->stop(adev); 2032 1986 2033 1987 /* Disable MEC parsing/prefetching */ 2034 - gfx_v9_4_3_cp_compute_enable(adev, false); 1988 + gfx_v9_4_3_cp_compute_enable(adev, false, 0); 2035 1989 2036 1990 if (grbm_soft_reset) { 2037 1991 tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); ··· 2086 2040 { 2087 2041 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2088 2042 2043 + /* hardcode in emulation phase */ 2044 + adev->gfx.num_xcd = 1; 2045 + adev->gfx.num_xcc_per_xcp = 1; 2046 + adev->gfx.partition_mode = AMDGPU_SPX_PARTITION_MODE; 2047 + 2089 2048 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 2090 2049 AMDGPU_MAX_COMPUTE_RINGS); 2091 2050 gfx_v9_4_3_set_kiq_pm4_funcs(adev); ··· 2119 2068 } 2120 2069 2121 2070 static void gfx_v9_4_3_update_medium_grain_clock_gating(struct amdgpu_device *adev, 2122 - bool enable) 2071 + bool enable, int xcc_id) 2123 2072 { 2124 2073 uint32_t data, def; 2125 2074 ··· 2128 2077 /* It is disabled by HW by default */ 2129 2078 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 2130 2079 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 2131 - def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 2080 + def = data = RREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE); 2132 2081 2133 2082 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 2134 2083 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | ··· 2138 2087 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 2139 2088 2140 2089 if (def != data) 2141 - WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 2090 + WREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE, data); 2142 2091 2143 2092 /* MGLS is a global flag to control all MGLS in GFX */ 2144 2093 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 2145 2094 /* 2 - RLC memory Light sleep */ 2146 2095 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 2147 - def = data = RREG32_SOC15(GC, 0, regRLC_MEM_SLP_CNTL); 2096 + def = data = RREG32_SOC15(GC, xcc_id, regRLC_MEM_SLP_CNTL); 2148 2097 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 2149 2098 if (def != data) 2150 - WREG32_SOC15(GC, 0, regRLC_MEM_SLP_CNTL, data); 2099 + WREG32_SOC15(GC, xcc_id, regRLC_MEM_SLP_CNTL, data); 2151 2100 } 2152 2101 /* 3 - CP memory Light sleep */ 2153 2102 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 2154 - def = data = RREG32_SOC15(GC, 0, regCP_MEM_SLP_CNTL); 2103 + def = data = RREG32_SOC15(GC, xcc_id, regCP_MEM_SLP_CNTL); 2155 2104 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 2156 2105 if (def != data) 2157 - WREG32_SOC15(GC, 0, regCP_MEM_SLP_CNTL, data); 2106 + WREG32_SOC15(GC, xcc_id, regCP_MEM_SLP_CNTL, data); 2158 2107 } 2159 2108 } 2160 2109 } else { 2161 2110 /* 1 - MGCG_OVERRIDE */ 2162 - def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 2111 + def = data = RREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE); 2163 2112 2164 2113 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 2165 2114 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | ··· 2167 2116 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 2168 2117 2169 2118 if (def != data) 2170 - WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 2119 + WREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE, data); 2171 2120 2172 2121 /* 2 - disable MGLS in RLC */ 2173 - data = RREG32_SOC15(GC, 0, regRLC_MEM_SLP_CNTL); 2122 + data = RREG32_SOC15(GC, xcc_id, regRLC_MEM_SLP_CNTL); 2174 2123 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 2175 2124 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 2176 - WREG32_SOC15(GC, 0, regRLC_MEM_SLP_CNTL, data); 2125 + WREG32_SOC15(GC, xcc_id, regRLC_MEM_SLP_CNTL, data); 2177 2126 } 2178 2127 2179 2128 /* 3 - disable MGLS in CP */ 2180 - data = RREG32_SOC15(GC, 0, regCP_MEM_SLP_CNTL); 2129 + data = RREG32_SOC15(GC, xcc_id, regCP_MEM_SLP_CNTL); 2181 2130 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 2182 2131 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 2183 - WREG32_SOC15(GC, 0, regCP_MEM_SLP_CNTL, data); 2132 + WREG32_SOC15(GC, xcc_id, regCP_MEM_SLP_CNTL, data); 2184 2133 } 2185 2134 } 2186 2135 ··· 2188 2137 } 2189 2138 2190 2139 static void gfx_v9_4_3_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 2191 - bool enable) 2140 + bool enable, int xcc_id) 2192 2141 { 2193 2142 uint32_t def, data; 2194 2143 2195 2144 amdgpu_gfx_rlc_enter_safe_mode(adev); 2196 2145 2197 2146 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 2198 - def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 2147 + def = data = RREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE); 2199 2148 /* unset CGCG override */ 2200 2149 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 2201 2150 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) ··· 2204 2153 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 2205 2154 /* update CGCG and CGLS override bits */ 2206 2155 if (def != data) 2207 - WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 2156 + WREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE, data); 2208 2157 2209 2158 /* enable cgcg FSM(0x0000363F) */ 2210 - def = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 2159 + def = RREG32_SOC15(GC, xcc_id, regRLC_CGCG_CGLS_CTRL); 2211 2160 2212 2161 if (adev->asic_type == CHIP_ARCTURUS) 2213 2162 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | ··· 2219 2168 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 2220 2169 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 2221 2170 if (def != data) 2222 - WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 2171 + WREG32_SOC15(GC, xcc_id, regRLC_CGCG_CGLS_CTRL, data); 2223 2172 2224 2173 /* set IDLE_POLL_COUNT(0x00900100) */ 2225 - def = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL); 2174 + def = RREG32_SOC15(GC, xcc_id, regCP_RB_WPTR_POLL_CNTL); 2226 2175 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 2227 2176 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2228 2177 if (def != data) 2229 - WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data); 2178 + WREG32_SOC15(GC, xcc_id, regCP_RB_WPTR_POLL_CNTL, data); 2230 2179 } else { 2231 - def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 2180 + def = data = RREG32_SOC15(GC, xcc_id, regRLC_CGCG_CGLS_CTRL); 2232 2181 /* reset CGCG/CGLS bits */ 2233 2182 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 2234 2183 /* disable cgcg and cgls in FSM */ 2235 2184 if (def != data) 2236 - WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 2185 + WREG32_SOC15(GC, xcc_id, regRLC_CGCG_CGLS_CTRL, data); 2237 2186 } 2238 2187 2239 2188 amdgpu_gfx_rlc_exit_safe_mode(adev); 2240 2189 } 2241 2190 2242 2191 static int gfx_v9_4_3_update_gfx_clock_gating(struct amdgpu_device *adev, 2243 - bool enable) 2192 + bool enable, int xcc_id) 2244 2193 { 2245 2194 if (enable) { 2246 2195 /* CGCG/CGLS should be enabled after MGCG/MGLS 2247 2196 * === MGCG + MGLS === 2248 2197 */ 2249 - gfx_v9_4_3_update_medium_grain_clock_gating(adev, enable); 2198 + gfx_v9_4_3_update_medium_grain_clock_gating(adev, enable, xcc_id); 2250 2199 /* === CGCG + CGLS === */ 2251 - gfx_v9_4_3_update_coarse_grain_clock_gating(adev, enable); 2200 + gfx_v9_4_3_update_coarse_grain_clock_gating(adev, enable, xcc_id); 2252 2201 } else { 2253 2202 /* CGCG/CGLS should be disabled before MGCG/MGLS 2254 2203 * === CGCG + CGLS === 2255 2204 */ 2256 - gfx_v9_4_3_update_coarse_grain_clock_gating(adev, enable); 2205 + gfx_v9_4_3_update_coarse_grain_clock_gating(adev, enable, xcc_id); 2257 2206 /* === MGCG + MGLS === */ 2258 - gfx_v9_4_3_update_medium_grain_clock_gating(adev, enable); 2207 + gfx_v9_4_3_update_medium_grain_clock_gating(adev, enable, xcc_id); 2259 2208 } 2260 2209 return 0; 2261 2210 } ··· 2285 2234 enum amd_clockgating_state state) 2286 2235 { 2287 2236 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2237 + int i; 2288 2238 2289 2239 if (amdgpu_sriov_vf(adev)) 2290 2240 return 0; 2291 2241 2292 2242 switch (adev->ip_versions[GC_HWIP][0]) { 2293 2243 case IP_VERSION(9, 4, 3): 2294 - gfx_v9_4_3_update_gfx_clock_gating(adev, 2295 - state == AMD_CG_STATE_GATE); 2244 + for (i = 0; i < adev->gfx.num_xcd; i++) 2245 + gfx_v9_4_3_update_gfx_clock_gating(adev, 2246 + state == AMD_CG_STATE_GATE, i); 2296 2247 break; 2297 2248 default: 2298 2249 break; ··· 2562 2509 } 2563 2510 2564 2511 static void gfx_v9_4_3_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 2565 - int me, int pipe, 2566 - enum amdgpu_interrupt_state state) 2512 + int me, int pipe, 2513 + enum amdgpu_interrupt_state state, 2514 + int xcc_id) 2567 2515 { 2568 2516 u32 mec_int_cntl, mec_int_cntl_reg; 2569 2517 ··· 2577 2523 if (me == 1) { 2578 2524 switch (pipe) { 2579 2525 case 0: 2580 - mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 2526 + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, xcc_id, regCP_ME1_PIPE0_INT_CNTL); 2581 2527 break; 2582 2528 case 1: 2583 - mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 2529 + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, xcc_id, regCP_ME1_PIPE1_INT_CNTL); 2584 2530 break; 2585 2531 case 2: 2586 - mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 2532 + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, xcc_id, regCP_ME1_PIPE2_INT_CNTL); 2587 2533 break; 2588 2534 case 3: 2589 - mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 2535 + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, xcc_id, regCP_ME1_PIPE3_INT_CNTL); 2590 2536 break; 2591 2537 default: 2592 2538 DRM_DEBUG("invalid pipe %d\n", pipe); ··· 2620 2566 unsigned type, 2621 2567 enum amdgpu_interrupt_state state) 2622 2568 { 2569 + int i; 2570 + 2623 2571 switch (state) { 2624 2572 case AMDGPU_IRQ_STATE_DISABLE: 2625 2573 case AMDGPU_IRQ_STATE_ENABLE: 2626 - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, 2627 - PRIV_REG_INT_ENABLE, 2628 - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 2574 + for (i = 0; i < adev->gfx.num_xcd; i++) 2575 + WREG32_FIELD15_PREREG(GC, i, CP_INT_CNTL_RING0, 2576 + PRIV_REG_INT_ENABLE, 2577 + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 2629 2578 break; 2630 2579 default: 2631 2580 break; ··· 2642 2585 unsigned type, 2643 2586 enum amdgpu_interrupt_state state) 2644 2587 { 2588 + int i; 2589 + 2645 2590 switch (state) { 2646 2591 case AMDGPU_IRQ_STATE_DISABLE: 2647 2592 case AMDGPU_IRQ_STATE_ENABLE: 2648 - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, 2649 - PRIV_INSTR_INT_ENABLE, 2650 - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 2593 + for (i = 0; i < adev->gfx.num_xcd; i++) 2594 + WREG32_FIELD15_PREREG(GC, i, CP_INT_CNTL_RING0, 2595 + PRIV_INSTR_INT_ENABLE, 2596 + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 2651 2597 break; 2652 2598 default: 2653 2599 break; ··· 2664 2604 unsigned type, 2665 2605 enum amdgpu_interrupt_state state) 2666 2606 { 2667 - switch (type) { 2668 - case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 2669 - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 0, state); 2670 - break; 2671 - case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 2672 - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 1, state); 2673 - break; 2674 - case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 2675 - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 2, state); 2676 - break; 2677 - case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 2678 - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 3, state); 2679 - break; 2680 - case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 2681 - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 0, state); 2682 - break; 2683 - case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 2684 - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 1, state); 2685 - break; 2686 - case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 2687 - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 2, state); 2688 - break; 2689 - case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 2690 - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 3, state); 2691 - break; 2692 - default: 2693 - break; 2607 + int i; 2608 + for (i = 0; i < adev->gfx.num_xcd; i++) { 2609 + switch (type) { 2610 + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 2611 + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 0, state, i); 2612 + break; 2613 + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 2614 + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 1, state, i); 2615 + break; 2616 + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 2617 + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 2, state, i); 2618 + break; 2619 + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 2620 + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 3, state, i); 2621 + break; 2622 + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 2623 + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 0, state, i); 2624 + break; 2625 + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 2626 + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 1, state, i); 2627 + break; 2628 + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 2629 + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 2, state, i); 2630 + break; 2631 + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 2632 + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 3, state, i); 2633 + break; 2634 + default: 2635 + break; 2636 + } 2694 2637 } 2638 + 2695 2639 return 0; 2696 2640 } 2697 2641 ··· 2935 2871 2936 2872 static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev) 2937 2873 { 2938 - int i; 2874 + int i, j; 2939 2875 2940 - adev->gfx.kiq[0].ring.funcs = &gfx_v9_4_3_ring_funcs_kiq; 2876 + for (i = 0; i < adev->gfx.num_xcd; i++) { 2877 + adev->gfx.kiq[i].ring.funcs = &gfx_v9_4_3_ring_funcs_kiq; 2941 2878 2942 - for (i = 0; i < adev->gfx.num_compute_rings; i++) 2943 - adev->gfx.compute_ring[i].funcs = &gfx_v9_4_3_ring_funcs_compute; 2879 + for (j = 0; j < adev->gfx.num_compute_rings; j++) 2880 + adev->gfx.compute_ring[j + i * adev->gfx.num_compute_rings].funcs 2881 + = &gfx_v9_4_3_ring_funcs_compute; 2882 + } 2944 2883 } 2945 2884 2946 2885 static const struct amdgpu_irq_src_funcs gfx_v9_4_3_eop_irq_funcs = {