Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amdgpu: adjust xcc_cp_resume function for gfx_v12_1

Adjust gfx_v12_1_xcc_cp_resume function to program
cp resume per xcc_id (logic xcc number) to fix for
xcp_resume.
V2: Allocate compute microcode bo when sw init

Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Likun Gao and committed by
Alex Deucher
1a856863 bf93f1fe

+145 -119
+145 -119
drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
··· 89 89 bool enable); 90 90 static void gfx_v12_1_xcc_update_perf_clk(struct amdgpu_device *adev, 91 91 bool enable, int xcc_id); 92 + static int gfx_v12_1_init_cp_compute_microcode_bo(struct amdgpu_device *adev); 92 93 93 94 static void gfx_v12_1_kiq_set_resources(struct amdgpu_ring *kiq_ring, 94 95 uint64_t queue_mask) ··· 1246 1245 r = gfx_v12_1_rlc_autoload_buffer_init(adev); 1247 1246 if (r) 1248 1247 return r; 1248 + } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1249 + r = gfx_v12_1_init_cp_compute_microcode_bo(adev); 1250 + if (r) 1251 + return r; 1249 1252 } 1250 1253 1251 1254 r = gfx_v12_1_gpu_early_init(adev); ··· 1924 1919 udelay(50); 1925 1920 } 1926 1921 1927 - static int gfx_v12_1_xcc_cp_compute_load_microcode_rs64(struct amdgpu_device *adev, 1928 - uint16_t xcc_mask) 1922 + static int gfx_v12_1_init_cp_compute_microcode_bo(struct amdgpu_device *adev) 1929 1923 { 1930 1924 const struct gfx_firmware_header_v2_0 *mec_hdr; 1931 1925 const __le32 *fw_ucode, *fw_data; 1932 - u32 tmp, fw_ucode_size, fw_data_size; 1933 - u32 i, usec_timeout = 50000; /* Wait for 50 ms */ 1926 + u32 fw_ucode_size, fw_data_size; 1934 1927 u32 *fw_ucode_ptr, *fw_data_ptr; 1935 - int r, xcc_id; 1928 + int i, r, xcc_id; 1936 1929 1937 1930 if (!adev->gfx.mec_fw) 1938 1931 return -EINVAL; ··· 1946 1943 le32_to_cpu(mec_hdr->data_offset_bytes)); 1947 1944 fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes); 1948 1945 1949 - r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 1950 - 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, 1951 - &adev->gfx.mec.mec_fw_obj, 1952 - &adev->gfx.mec.mec_fw_gpu_addr, 1953 - (void **)&fw_ucode_ptr); 1954 - if (r) { 1955 - dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 1956 - gfx_v12_1_mec_fini(adev); 1957 - return r; 1958 - } 1959 - 1960 - r = amdgpu_bo_create_reserved(adev, 1961 - ALIGN(fw_data_size, 64 * 1024) * 1962 - adev->gfx.mec.num_pipe_per_mec * NUM_XCC(xcc_mask), 1963 - 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, 1964 - &adev->gfx.mec.mec_fw_data_obj, 1965 - &adev->gfx.mec.mec_fw_data_gpu_addr, 1966 - (void **)&fw_data_ptr); 1967 - if (r) { 1968 - dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 1969 - gfx_v12_1_mec_fini(adev); 1970 - return r; 1971 - } 1972 - 1973 - memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size); 1974 - for (xcc_id = 0; xcc_id < NUM_XCC(xcc_mask); xcc_id++) { 1975 - for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 1976 - u32 offset = (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) * 1977 - ALIGN(fw_data_size, 64 * 1024) / 4; 1978 - memcpy(fw_data_ptr + offset, fw_data, fw_data_size); 1946 + if (adev->gfx.mec.mec_fw_obj == NULL) { 1947 + r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 1948 + 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, 1949 + &adev->gfx.mec.mec_fw_obj, 1950 + &adev->gfx.mec.mec_fw_gpu_addr, 1951 + (void **)&fw_ucode_ptr); 1952 + if (r) { 1953 + dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 1954 + gfx_v12_1_mec_fini(adev); 1955 + return r; 1979 1956 } 1957 + 1958 + memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size); 1959 + 1960 + amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1961 + amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1980 1962 } 1981 1963 1982 - amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1983 - amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj); 1984 - amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1985 - amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj); 1986 - 1987 - for (xcc_id = 0; xcc_id < NUM_XCC(xcc_mask); xcc_id++) { 1988 - gfx_v12_1_xcc_cp_compute_enable(adev, false, xcc_id); 1989 - 1990 - tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL); 1991 - tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 1992 - tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 1993 - tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 1994 - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL, tmp); 1995 - 1996 - tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_BASE_CNTL); 1997 - tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 1998 - tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 1999 - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_BASE_CNTL, tmp); 2000 - 2001 - mutex_lock(&adev->srbm_mutex); 2002 - for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 2003 - soc_v1_0_grbm_select(adev, 1, i, 0, 0, GET_INST(GC, xcc_id)); 2004 - 2005 - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_MDBASE_LO, 2006 - lower_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr + 2007 - (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) * 2008 - ALIGN(fw_data_size, 64 * 1024))); 2009 - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_MDBASE_HI, 2010 - upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr + 2011 - (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) * 2012 - ALIGN(fw_data_size, 64 * 1024))); 2013 - 2014 - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_LO, 2015 - lower_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 2016 - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_HI, 2017 - upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 1964 + if (adev->gfx.mec.mec_fw_data_obj == NULL) { 1965 + r = amdgpu_bo_create_reserved(adev, 1966 + ALIGN(fw_data_size, 64 * 1024) * 1967 + adev->gfx.mec.num_pipe_per_mec * NUM_XCC(adev->gfx.xcc_mask), 1968 + 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, 1969 + &adev->gfx.mec.mec_fw_data_obj, 1970 + &adev->gfx.mec.mec_fw_data_gpu_addr, 1971 + (void **)&fw_data_ptr); 1972 + if (r) { 1973 + dev_err(adev->dev, "(%d) failed to create mec fw data bo\n", r); 1974 + gfx_v12_1_mec_fini(adev); 1975 + return r; 2018 1976 } 2019 - mutex_unlock(&adev->srbm_mutex); 2020 - soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, 0)); 2021 1977 2022 - /* Trigger an invalidation of the L1 instruction caches */ 1978 + for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) { 1979 + for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 1980 + u32 offset = (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) * 1981 + ALIGN(fw_data_size, 64 * 1024) / 4; 1982 + memcpy(fw_data_ptr + offset, fw_data, fw_data_size); 1983 + } 1984 + } 1985 + 1986 + amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj); 1987 + amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj); 1988 + } 1989 + 1990 + return 0; 1991 + } 1992 + 1993 + static int gfx_v12_1_xcc_cp_compute_load_microcode_rs64(struct amdgpu_device *adev, 1994 + int xcc_id) 1995 + { 1996 + const struct gfx_firmware_header_v2_0 *mec_hdr; 1997 + u32 fw_data_size; 1998 + u32 tmp, i, usec_timeout = 50000; /* Wait for 50 ms */ 1999 + 2000 + if (!adev->gfx.mec_fw) 2001 + return -EINVAL; 2002 + 2003 + mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; 2004 + fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes); 2005 + 2006 + gfx_v12_1_xcc_cp_compute_enable(adev, false, xcc_id); 2007 + 2008 + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL); 2009 + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2010 + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2011 + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2012 + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL, tmp); 2013 + 2014 + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_BASE_CNTL); 2015 + tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 2016 + tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 2017 + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_BASE_CNTL, tmp); 2018 + 2019 + mutex_lock(&adev->srbm_mutex); 2020 + for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 2021 + soc_v1_0_grbm_select(adev, 1, i, 0, 0, GET_INST(GC, xcc_id)); 2022 + 2023 + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_MDBASE_LO, 2024 + lower_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr + 2025 + (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) * 2026 + ALIGN(fw_data_size, 64 * 1024))); 2027 + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_MDBASE_HI, 2028 + upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr + 2029 + (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) * 2030 + ALIGN(fw_data_size, 64 * 1024))); 2031 + 2032 + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_LO, 2033 + lower_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 2034 + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_HI, 2035 + upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 2036 + } 2037 + mutex_unlock(&adev->srbm_mutex); 2038 + soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, 0)); 2039 + 2040 + /* Trigger an invalidation of the L1 instruction caches */ 2041 + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL); 2042 + tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2043 + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL, tmp); 2044 + 2045 + /* Wait for invalidation complete */ 2046 + for (i = 0; i < usec_timeout; i++) { 2023 2047 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL); 2024 - tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2025 - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL, tmp); 2026 - 2027 - /* Wait for invalidation complete */ 2028 - for (i = 0; i < usec_timeout; i++) { 2029 - tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL); 2030 - if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 2031 - INVALIDATE_DCACHE_COMPLETE)) 2032 - break; 2033 - udelay(1); 2034 - } 2035 - 2036 - if (i >= usec_timeout) { 2037 - dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2038 - return -EINVAL; 2039 - } 2040 - 2041 - /* Trigger an invalidation of the L1 instruction caches */ 2042 - tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL); 2043 - tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2044 - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL, tmp); 2045 - 2046 - /* Wait for invalidation complete */ 2047 - for (i = 0; i < usec_timeout; i++) { 2048 - tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL); 2049 - if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2050 - INVALIDATE_CACHE_COMPLETE)) 2051 - break; 2052 - udelay(1); 2053 - } 2054 - 2055 - if (i >= usec_timeout) { 2056 - dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2057 - return -EINVAL; 2058 - } 2059 - 2060 - gfx_v12_1_xcc_set_mec_ucode_start_addr(adev, xcc_id); 2048 + if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 2049 + INVALIDATE_DCACHE_COMPLETE)) 2050 + break; 2051 + udelay(1); 2061 2052 } 2053 + 2054 + if (i >= usec_timeout) { 2055 + dev_err(adev->dev, "failed to invalidate data cache\n"); 2056 + return -EINVAL; 2057 + } 2058 + 2059 + /* Trigger an invalidation of the L1 instruction caches */ 2060 + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL); 2061 + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2062 + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL, tmp); 2063 + 2064 + /* Wait for invalidation complete */ 2065 + for (i = 0; i < usec_timeout; i++) { 2066 + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL); 2067 + if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2068 + INVALIDATE_CACHE_COMPLETE)) 2069 + break; 2070 + udelay(1); 2071 + } 2072 + 2073 + if (i >= usec_timeout) { 2074 + dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2075 + return -EINVAL; 2076 + } 2077 + 2078 + gfx_v12_1_xcc_set_mec_ucode_start_addr(adev, xcc_id); 2062 2079 2063 2080 return 0; 2064 2081 } ··· 2483 2460 int r, i, xcc_id; 2484 2461 struct amdgpu_ring *ring; 2485 2462 2486 - if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 2487 - /* legacy firmware loading */ 2488 - r = gfx_v12_1_xcc_cp_compute_load_microcode_rs64(adev, xcc_mask); 2489 - if (r) 2490 - return r; 2491 - } 2463 + for_each_inst(xcc_id, xcc_mask) { 2464 + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 2465 + /* legacy firmware loading */ 2466 + r = gfx_v12_1_xcc_cp_compute_load_microcode_rs64(adev, xcc_id); 2467 + if (r) 2468 + return r; 2469 + } 2492 2470 2493 - for (xcc_id = 0; xcc_id < NUM_XCC(xcc_mask); xcc_id++) { 2494 2471 if (!(adev->flags & AMD_IS_APU)) 2495 2472 gfx_v12_1_xcc_enable_gui_idle_interrupt(adev, false, xcc_id); 2496 2473 ··· 2523 2500 static int gfx_v12_1_cp_resume(struct amdgpu_device *adev) 2524 2501 { 2525 2502 int num_xcc, num_xcp, num_xcc_per_xcp; 2503 + uint16_t xcc_mask; 2526 2504 int r = 0; 2527 2505 2528 2506 num_xcc = NUM_XCC(adev->gfx.xcc_mask); ··· 2555 2531 if (r) 2556 2532 return r; 2557 2533 2558 - return gfx_v12_1_xcc_cp_resume(adev, adev->gfx.xcc_mask); 2534 + xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0); 2535 + 2536 + return gfx_v12_1_xcc_cp_resume(adev, xcc_mask); 2559 2537 } 2560 2538 2561 2539 static int gfx_v12_1_gfxhub_enable(struct amdgpu_device *adev)