Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amdgpu/mes_v12_1: add mes self test

Add mes self test to ensure that mes user queue work.

V2: add pasid on amdgpu_vm_init.
V3: Squash in fix non-SPX modes (Mukul)

Signed-off-by: Jack Xiao <Jack.Xiao@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Jack Xiao and committed by
Alex Deucher
44e5195f 3fd20c14

+334 -1
+334 -1
drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
··· 31 31 #include "gc/gc_11_0_0_default.h" 32 32 #include "v12_structs.h" 33 33 #include "mes_v12_api_def.h" 34 + #include "gfx_v12_1_pkt.h" 35 + #include "sdma_v7_1_0_pkt_open.h" 34 36 35 37 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes.bin"); 36 38 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes1.bin"); ··· 43 41 static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block); 44 42 static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id); 45 43 static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id); 44 + static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id); 46 45 47 46 #define MES_EOP_SIZE 2048 48 47 ··· 1952 1949 return 0; 1953 1950 } 1954 1951 1952 + static int mes_v12_1_late_init(struct amdgpu_ip_block *ip_block) 1953 + { 1954 + struct amdgpu_device *adev = ip_block->adev; 1955 + int xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1956 + 1957 + /* TODO: remove it if issue fixed. */ 1958 + if (adev->mes.enable_coop_mode) 1959 + return 0; 1960 + 1961 + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1962 + /* for COOP mode, only test master xcc. */ 1963 + if (adev->mes.enable_coop_mode && 1964 + adev->mes.master_xcc_ids[xcc_id] != xcc_id) 1965 + continue; 1966 + 1967 + mes_v12_1_self_test(adev, xcc_id); 1968 + } 1969 + 1970 + return 0; 1971 + } 1972 + 1955 1973 static const struct amd_ip_funcs mes_v12_1_ip_funcs = { 1956 1974 .name = "mes_v12_1", 1957 1975 .early_init = mes_v12_1_early_init, 1958 - .late_init = NULL, 1976 + .late_init = mes_v12_1_late_init, 1959 1977 .sw_init = mes_v12_1_sw_init, 1960 1978 .sw_fini = mes_v12_1_sw_fini, 1961 1979 .hw_init = mes_v12_1_hw_init, ··· 1992 1968 .rev = 0, 1993 1969 .funcs = &mes_v12_1_ip_funcs, 1994 1970 }; 1971 + 1972 + static int mes_v12_1_alloc_test_buf(struct amdgpu_device *adev, 1973 + struct amdgpu_bo **bo, uint64_t *addr, 1974 + void **ptr, int size) 1975 + { 1976 + amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1977 + bo, addr, ptr); 1978 + if (!*bo) { 1979 + dev_err(adev->dev, "failed to allocate test buffer bo\n"); 1980 + return -ENOMEM; 1981 + } 1982 + memset(*ptr, 0, size); 1983 + return 0; 1984 + } 1985 + 1986 + static int mes_v12_1_map_test_bo(struct amdgpu_device *adev, 1987 + struct amdgpu_bo *bo, struct amdgpu_vm *vm, 1988 + struct amdgpu_bo_va **bo_va, u64 va, int size) 1989 + { 1990 + struct amdgpu_sync sync; 1991 + int r; 1992 + 1993 + r = amdgpu_map_static_csa(adev, vm, bo, bo_va, va, size); 1994 + if (r) 1995 + return r; 1996 + 1997 + amdgpu_sync_create(&sync); 1998 + 1999 + r = amdgpu_vm_bo_update(adev, *bo_va, false); 2000 + if (r) { 2001 + dev_err(adev->dev, "failed to do vm_bo_update on meta data\n"); 2002 + goto error; 2003 + } 2004 + amdgpu_sync_fence(&sync, (*bo_va)->last_pt_update, GFP_KERNEL); 2005 + 2006 + r = amdgpu_vm_update_pdes(adev, vm, false); 2007 + if (r) { 2008 + dev_err(adev->dev, "failed to update pdes on meta data\n"); 2009 + goto error; 2010 + } 2011 + amdgpu_sync_fence(&sync, vm->last_update, GFP_KERNEL); 2012 + amdgpu_sync_wait(&sync, false); 2013 + 2014 + error: 2015 + amdgpu_sync_free(&sync); 2016 + return 0; 2017 + } 2018 + 2019 + static int mes_v12_1_test_ring(struct amdgpu_device *adev, int xcc_id, 2020 + u32 *queue_ptr, u64 fence_gpu_addr, 2021 + void *fence_cpu_ptr, void *wptr_cpu_addr, 2022 + u64 doorbell_idx, int queue_type) 2023 + { 2024 + volatile uint32_t *cpu_ptr = fence_cpu_ptr; 2025 + int num_xcc = NUM_XCC(adev->gfx.xcc_mask); 2026 + int sdma_ring_align = 0x10, compute_ring_align = 0x100; 2027 + uint32_t tmp, xcc_offset; 2028 + int r = 0, i, wptr = 0; 2029 + 2030 + if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { 2031 + if (!adev->mes.enable_coop_mode) { 2032 + WREG32_SOC15(GC, GET_INST(GC, xcc_id), 2033 + regSCRATCH_REG0, 0xCAFEDEAD); 2034 + } else { 2035 + for (i = 0; i < num_xcc; i++) { 2036 + if (adev->mes.master_xcc_ids[i] == xcc_id) 2037 + WREG32_SOC15(GC, GET_INST(GC, i), 2038 + regSCRATCH_REG0, 0xCAFEDEAD); 2039 + } 2040 + } 2041 + 2042 + xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 2043 + queue_ptr[wptr++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 2044 + queue_ptr[wptr++] = xcc_offset - PACKET3_SET_UCONFIG_REG_START; 2045 + queue_ptr[wptr++] = 0xDEADBEEF; 2046 + 2047 + for (i = wptr; i < compute_ring_align; i++) 2048 + queue_ptr[wptr++] = PACKET3(PACKET3_NOP, 0x3FFF); 2049 + 2050 + } else if (queue_type == AMDGPU_RING_TYPE_SDMA) { 2051 + *cpu_ptr = 0xCAFEDEAD; 2052 + 2053 + queue_ptr[wptr++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | 2054 + SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 2055 + queue_ptr[wptr++] = lower_32_bits(fence_gpu_addr); 2056 + queue_ptr[wptr++] = upper_32_bits(fence_gpu_addr); 2057 + queue_ptr[wptr++] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0); 2058 + queue_ptr[wptr++] = 0xDEADBEEF; 2059 + 2060 + for (i = wptr; i < sdma_ring_align; i++) 2061 + queue_ptr[wptr++] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 2062 + 2063 + wptr <<= 2; 2064 + } 2065 + 2066 + atomic64_set((atomic64_t *)wptr_cpu_addr, wptr); 2067 + WDOORBELL64(doorbell_idx, wptr); 2068 + 2069 + for (i = 0; i < adev->usec_timeout; i++) { 2070 + if (queue_type == AMDGPU_RING_TYPE_SDMA) { 2071 + tmp = le32_to_cpu(*cpu_ptr); 2072 + } else { 2073 + if (!adev->mes.enable_coop_mode) { 2074 + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), 2075 + regSCRATCH_REG0); 2076 + } else { 2077 + for (i = 0; i < num_xcc; i++) { 2078 + if (xcc_id != adev->mes.master_xcc_ids[i]) 2079 + continue; 2080 + 2081 + tmp = RREG32_SOC15(GC, GET_INST(GC, i), 2082 + regSCRATCH_REG0); 2083 + if (tmp != 0xDEADBEEF) 2084 + break; 2085 + } 2086 + } 2087 + } 2088 + 2089 + if (tmp == 0xDEADBEEF) 2090 + break; 2091 + 2092 + if (amdgpu_emu_mode == 1) 2093 + msleep(1); 2094 + else 2095 + udelay(1); 2096 + } 2097 + 2098 + if (i >= adev->usec_timeout) { 2099 + dev_err(adev->dev, "xcc%d: mes self test (%s) failed\n", xcc_id, 2100 + queue_type == AMDGPU_RING_TYPE_SDMA ? "sdma" : "compute"); 2101 + 2102 + while (halt_if_hws_hang) 2103 + schedule(); 2104 + 2105 + r = -ETIMEDOUT; 2106 + } else { 2107 + dev_info(adev->dev, "xcc%d: mes self test (%s) pass\n", xcc_id, 2108 + queue_type == AMDGPU_RING_TYPE_SDMA ? "sdma" : "compute"); 2109 + } 2110 + 2111 + return r; 2112 + } 2113 + 2114 + #define USER_CTX_SIZE (PAGE_SIZE * 2) 2115 + #define USER_CTX_VA AMDGPU_VA_RESERVED_BOTTOM 2116 + #define RING_OFFSET(addr) ((addr)) 2117 + #define EOP_OFFSET(addr) ((addr) + PAGE_SIZE) 2118 + #define WPTR_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64)) 2119 + #define RPTR_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64) * 2) 2120 + #define FENCE_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64) * 3) 2121 + 2122 + static int mes_v12_1_test_queue(struct amdgpu_device *adev, int xcc_id, 2123 + int pasid, struct amdgpu_vm *vm, u64 meta_gpu_addr, 2124 + u64 queue_gpu_addr, void *ctx_ptr, int queue_type) 2125 + { 2126 + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; 2127 + struct amdgpu_mqd *mqd_mgr = &adev->mqds[queue_type]; 2128 + struct amdgpu_mqd_prop mqd_prop = {0}; 2129 + struct mes_add_queue_input add_queue = {0}; 2130 + struct mes_remove_queue_input remove_queue = {0}; 2131 + struct amdgpu_bo *mqd_bo = NULL; 2132 + int num_xcc = NUM_XCC(adev->gfx.xcc_mask); 2133 + int i, r, off, mqd_size, mqd_count = 1; 2134 + void *mqd_ptr = NULL; 2135 + u64 mqd_gpu_addr, doorbell_idx; 2136 + 2137 + /* extra one page size padding for mes fw */ 2138 + mqd_size = mqd_mgr->mqd_size + PAGE_SIZE; 2139 + 2140 + if (queue_type == AMDGPU_RING_TYPE_SDMA) { 2141 + doorbell_idx = adev->mes.db_start_dw_offset + \ 2142 + adev->doorbell_index.sdma_engine[0]; 2143 + } else { 2144 + doorbell_idx = adev->mes.db_start_dw_offset + \ 2145 + adev->doorbell_index.userqueue_start; 2146 + } 2147 + 2148 + if (adev->mes.enable_coop_mode && 2149 + queue_type == AMDGPU_RING_TYPE_COMPUTE) { 2150 + for (i = 0, mqd_count = 0; i < num_xcc; i++) { 2151 + if (adev->mes.master_xcc_ids[i] == xcc_id) 2152 + mqd_count++; 2153 + } 2154 + mqd_size *= mqd_count; 2155 + } 2156 + 2157 + r = mes_v12_1_alloc_test_buf(adev, &mqd_bo, &mqd_gpu_addr, 2158 + &mqd_ptr, mqd_size * mqd_count); 2159 + if (r < 0) 2160 + return r; 2161 + 2162 + mqd_prop.mqd_gpu_addr = mqd_gpu_addr; 2163 + mqd_prop.hqd_base_gpu_addr = RING_OFFSET(USER_CTX_VA); 2164 + mqd_prop.eop_gpu_addr = EOP_OFFSET(USER_CTX_VA); 2165 + mqd_prop.wptr_gpu_addr = WPTR_OFFSET(USER_CTX_VA); 2166 + mqd_prop.rptr_gpu_addr = RPTR_OFFSET(USER_CTX_VA); 2167 + mqd_prop.doorbell_index = doorbell_idx; 2168 + mqd_prop.queue_size = PAGE_SIZE; 2169 + mqd_prop.mqd_stride_size = mqd_size; 2170 + mqd_prop.use_doorbell = true; 2171 + mqd_prop.hqd_active = false; 2172 + 2173 + mqd_mgr->init_mqd(adev, mqd_ptr, &mqd_prop); 2174 + if (mqd_count > 1) { 2175 + for (i = 1; i < mqd_count; i++) { 2176 + off = mqd_size * i; 2177 + mqd_prop.mqd_gpu_addr = mqd_gpu_addr + off; 2178 + mqd_mgr->init_mqd(adev, (char *)mqd_ptr + off, 2179 + &mqd_prop); 2180 + } 2181 + } 2182 + 2183 + add_queue.xcc_id = xcc_id; 2184 + add_queue.process_id = pasid; 2185 + add_queue.page_table_base_addr = adev->vm_manager.vram_base_offset + 2186 + amdgpu_bo_gpu_offset(vm->root.bo) - adev->gmc.vram_start; 2187 + add_queue.process_va_start = 0; 2188 + add_queue.process_va_end = adev->vm_manager.max_pfn - 1; 2189 + add_queue.process_context_addr = meta_gpu_addr; 2190 + add_queue.gang_context_addr = meta_gpu_addr + AMDGPU_MES_PROC_CTX_SIZE; 2191 + add_queue.doorbell_offset = doorbell_idx; 2192 + add_queue.mqd_addr = mqd_gpu_addr; 2193 + add_queue.wptr_addr = mqd_prop.wptr_gpu_addr; 2194 + add_queue.wptr_mc_addr = WPTR_OFFSET(queue_gpu_addr); 2195 + add_queue.queue_type = queue_type; 2196 + add_queue.vm_cntx_cntl = hub->vm_cntx_cntl; 2197 + 2198 + r = mes_v12_1_add_hw_queue(&adev->mes, &add_queue); 2199 + if (r) 2200 + goto error; 2201 + 2202 + mes_v12_1_test_ring(adev, xcc_id, (u32 *)RING_OFFSET((char *)ctx_ptr), 2203 + FENCE_OFFSET(USER_CTX_VA), 2204 + FENCE_OFFSET((char *)ctx_ptr), 2205 + WPTR_OFFSET((char *)ctx_ptr), 2206 + doorbell_idx, queue_type); 2207 + 2208 + remove_queue.xcc_id = xcc_id; 2209 + remove_queue.doorbell_offset = doorbell_idx; 2210 + remove_queue.gang_context_addr = add_queue.gang_context_addr; 2211 + r = mes_v12_1_remove_hw_queue(&adev->mes, &remove_queue); 2212 + 2213 + error: 2214 + amdgpu_bo_free_kernel(&mqd_bo, &mqd_gpu_addr, &mqd_ptr); 2215 + return r; 2216 + } 2217 + 2218 + static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id) 2219 + { 2220 + int queue_types[] = { AMDGPU_RING_TYPE_COMPUTE, 2221 + /* AMDGPU_RING_TYPE_SDMA */ }; 2222 + struct amdgpu_bo_va *bo_va = NULL; 2223 + struct amdgpu_vm *vm = NULL; 2224 + struct amdgpu_bo *meta_bo = NULL, *ctx_bo = NULL; 2225 + void *meta_ptr = NULL, *ctx_ptr = NULL; 2226 + u64 meta_gpu_addr, ctx_gpu_addr; 2227 + int size, i, r, pasid;; 2228 + 2229 + pasid = amdgpu_pasid_alloc(16); 2230 + if (pasid < 0) 2231 + pasid = 0; 2232 + 2233 + size = AMDGPU_MES_PROC_CTX_SIZE + AMDGPU_MES_GANG_CTX_SIZE; 2234 + r = mes_v12_1_alloc_test_buf(adev, &meta_bo, &meta_gpu_addr, 2235 + &meta_ptr, size); 2236 + if (r < 0) 2237 + goto err2; 2238 + 2239 + r = mes_v12_1_alloc_test_buf(adev, &ctx_bo, &ctx_gpu_addr, 2240 + &ctx_ptr, USER_CTX_SIZE); 2241 + if (r < 0) 2242 + goto err2; 2243 + 2244 + vm = kzalloc(sizeof(*vm), GFP_KERNEL); 2245 + if (!vm) { 2246 + r = -ENOMEM; 2247 + goto err2; 2248 + } 2249 + 2250 + r = amdgpu_vm_init(adev, vm, -1, pasid); 2251 + if (r) 2252 + goto err1; 2253 + 2254 + r = mes_v12_1_map_test_bo(adev, ctx_bo, vm, &bo_va, 2255 + USER_CTX_VA, USER_CTX_SIZE); 2256 + if (r) 2257 + goto err0; 2258 + 2259 + for (i = 0; i < ARRAY_SIZE(queue_types); i++) { 2260 + memset(ctx_ptr, 0, USER_CTX_SIZE); 2261 + 2262 + r = mes_v12_1_test_queue(adev, xcc_id, pasid, vm, meta_gpu_addr, 2263 + ctx_gpu_addr, ctx_ptr, queue_types[i]); 2264 + if (r) 2265 + break; 2266 + } 2267 + 2268 + amdgpu_unmap_static_csa(adev, vm, ctx_bo, bo_va, USER_CTX_VA); 2269 + err0: 2270 + amdgpu_vm_fini(adev, vm); 2271 + err1: 2272 + kfree(vm); 2273 + err2: 2274 + amdgpu_bo_free_kernel(&meta_bo, &meta_gpu_addr, &meta_ptr); 2275 + amdgpu_bo_free_kernel(&ctx_bo, &ctx_gpu_addr, &ctx_ptr); 2276 + amdgpu_pasid_free(pasid); 2277 + return r; 2278 + } 2279 +