Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/radeon/cik: add support for sDMA dma engines (v8)

CIK has new asynchronous DMA engines called sDMA
(system DMA). Each engine supports 1 ring buffer
for kernel and gfx and 2 userspace queues for compute.

TODO: fill in the compute setup.

v2: update to the latest reset code
v3: remove ib_parse
v4: fix copy_dma()
v5: drop WIP compute sDMA queues
v6: rebase
v7: endian fixes for IB
v8: cleanup for release

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

+870 -6
+739 -6
drivers/gpu/drm/radeon/cik.c
··· 44 44 #define KV_RLC_UCODE_SIZE 2560 45 45 /* gddr controller */ 46 46 #define CIK_MC_UCODE_SIZE 7866 47 + /* sdma */ 48 + #define CIK_SDMA_UCODE_SIZE 1050 49 + #define CIK_SDMA_UCODE_VERSION 64 47 50 48 51 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin"); 49 52 MODULE_FIRMWARE("radeon/BONAIRE_me.bin"); ··· 54 51 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin"); 55 52 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin"); 56 53 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin"); 54 + MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin"); 57 55 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin"); 58 56 MODULE_FIRMWARE("radeon/KAVERI_me.bin"); 59 57 MODULE_FIRMWARE("radeon/KAVERI_ce.bin"); 60 58 MODULE_FIRMWARE("radeon/KAVERI_mec.bin"); 61 59 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin"); 60 + MODULE_FIRMWARE("radeon/KAVERI_sdma.bin"); 62 61 MODULE_FIRMWARE("radeon/KABINI_pfp.bin"); 63 62 MODULE_FIRMWARE("radeon/KABINI_me.bin"); 64 63 MODULE_FIRMWARE("radeon/KABINI_ce.bin"); 65 64 MODULE_FIRMWARE("radeon/KABINI_mec.bin"); 66 65 MODULE_FIRMWARE("radeon/KABINI_rlc.bin"); 66 + MODULE_FIRMWARE("radeon/KABINI_sdma.bin"); 67 67 68 68 extern int r600_ih_ring_alloc(struct radeon_device *rdev); 69 69 extern void r600_ih_ring_fini(struct radeon_device *rdev); ··· 204 198 struct platform_device *pdev; 205 199 const char *chip_name; 206 200 size_t pfp_req_size, me_req_size, ce_req_size, 207 - mec_req_size, rlc_req_size, mc_req_size; 201 + mec_req_size, rlc_req_size, mc_req_size, 202 + sdma_req_size; 208 203 char fw_name[30]; 209 204 int err; 210 205 ··· 227 220 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 228 221 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4; 229 222 mc_req_size = CIK_MC_UCODE_SIZE * 4; 223 + sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 230 224 break; 231 225 case CHIP_KAVERI: 232 226 chip_name = "KAVERI"; ··· 236 228 ce_req_size = CIK_CE_UCODE_SIZE * 4; 237 229 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 238 230 rlc_req_size = KV_RLC_UCODE_SIZE * 4; 231 + sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 239 232 break; 240 233 case CHIP_KABINI: 241 234 chip_name = "KABINI"; ··· 245 236 ce_req_size = CIK_CE_UCODE_SIZE * 4; 246 237 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 247 238 rlc_req_size = KB_RLC_UCODE_SIZE * 4; 239 + sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 248 240 break; 249 241 default: BUG(); 250 242 } ··· 305 295 printk(KERN_ERR 306 296 "cik_rlc: Bogus length %zu in firmware \"%s\"\n", 307 297 rdev->rlc_fw->size, fw_name); 298 + err = -EINVAL; 299 + } 300 + 301 + snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name); 302 + err = request_firmware(&rdev->sdma_fw, fw_name, &pdev->dev); 303 + if (err) 304 + goto out; 305 + if (rdev->sdma_fw->size != sdma_req_size) { 306 + printk(KERN_ERR 307 + "cik_sdma: Bogus length %zu in firmware \"%s\"\n", 308 + rdev->sdma_fw->size, fw_name); 308 309 err = -EINVAL; 309 310 } 310 311 ··· 1446 1425 WREG32(GB_ADDR_CONFIG, gb_addr_config); 1447 1426 WREG32(HDP_ADDR_CONFIG, gb_addr_config); 1448 1427 WREG32(DMIF_ADDR_CALC, gb_addr_config); 1428 + WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70); 1429 + WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70); 1449 1430 1450 1431 cik_tiling_mode_table_init(rdev); 1451 1432 ··· 2159 2136 return 0; 2160 2137 } 2161 2138 2139 + /* 2140 + * sDMA - System DMA 2141 + * Starting with CIK, the GPU has new asynchronous 2142 + * DMA engines. These engines are used for compute 2143 + * and gfx. There are two DMA engines (SDMA0, SDMA1) 2144 + * and each one supports 1 ring buffer used for gfx 2145 + * and 2 queues used for compute. 2146 + * 2147 + * The programming model is very similar to the CP 2148 + * (ring buffer, IBs, etc.), but sDMA has it's own 2149 + * packet format that is different from the PM4 format 2150 + * used by the CP. sDMA supports copying data, writing 2151 + * embedded data, solid fills, and a number of other 2152 + * things. It also has support for tiling/detiling of 2153 + * buffers. 2154 + */ 2155 + /** 2156 + * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine 2157 + * 2158 + * @rdev: radeon_device pointer 2159 + * @ib: IB object to schedule 2160 + * 2161 + * Schedule an IB in the DMA ring (CIK). 2162 + */ 2163 + void cik_sdma_ring_ib_execute(struct radeon_device *rdev, 2164 + struct radeon_ib *ib) 2165 + { 2166 + struct radeon_ring *ring = &rdev->ring[ib->ring]; 2167 + u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf; 2168 + 2169 + if (rdev->wb.enabled) { 2170 + u32 next_rptr = ring->wptr + 5; 2171 + while ((next_rptr & 7) != 4) 2172 + next_rptr++; 2173 + next_rptr += 4; 2174 + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); 2175 + radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 2176 + radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); 2177 + radeon_ring_write(ring, 1); /* number of DWs to follow */ 2178 + radeon_ring_write(ring, next_rptr); 2179 + } 2180 + 2181 + /* IB packet must end on a 8 DW boundary */ 2182 + while ((ring->wptr & 7) != 4) 2183 + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); 2184 + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); 2185 + radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ 2186 + radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff); 2187 + radeon_ring_write(ring, ib->length_dw); 2188 + 2189 + } 2190 + 2191 + /** 2192 + * cik_sdma_fence_ring_emit - emit a fence on the DMA ring 2193 + * 2194 + * @rdev: radeon_device pointer 2195 + * @fence: radeon fence object 2196 + * 2197 + * Add a DMA fence packet to the ring to write 2198 + * the fence seq number and DMA trap packet to generate 2199 + * an interrupt if needed (CIK). 2200 + */ 2201 + void cik_sdma_fence_ring_emit(struct radeon_device *rdev, 2202 + struct radeon_fence *fence) 2203 + { 2204 + struct radeon_ring *ring = &rdev->ring[fence->ring]; 2205 + u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 2206 + u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) | 2207 + SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */ 2208 + u32 ref_and_mask; 2209 + 2210 + if (fence->ring == R600_RING_TYPE_DMA_INDEX) 2211 + ref_and_mask = SDMA0; 2212 + else 2213 + ref_and_mask = SDMA1; 2214 + 2215 + /* write the fence */ 2216 + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0)); 2217 + radeon_ring_write(ring, addr & 0xffffffff); 2218 + radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 2219 + radeon_ring_write(ring, fence->seq); 2220 + /* generate an interrupt */ 2221 + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0)); 2222 + /* flush HDP */ 2223 + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); 2224 + radeon_ring_write(ring, GPU_HDP_FLUSH_DONE); 2225 + radeon_ring_write(ring, GPU_HDP_FLUSH_REQ); 2226 + radeon_ring_write(ring, ref_and_mask); /* REFERENCE */ 2227 + radeon_ring_write(ring, ref_and_mask); /* MASK */ 2228 + radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */ 2229 + } 2230 + 2231 + /** 2232 + * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring 2233 + * 2234 + * @rdev: radeon_device pointer 2235 + * @ring: radeon_ring structure holding ring information 2236 + * @semaphore: radeon semaphore object 2237 + * @emit_wait: wait or signal semaphore 2238 + * 2239 + * Add a DMA semaphore packet to the ring wait on or signal 2240 + * other rings (CIK). 2241 + */ 2242 + void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, 2243 + struct radeon_ring *ring, 2244 + struct radeon_semaphore *semaphore, 2245 + bool emit_wait) 2246 + { 2247 + u64 addr = semaphore->gpu_addr; 2248 + u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S; 2249 + 2250 + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits)); 2251 + radeon_ring_write(ring, addr & 0xfffffff8); 2252 + radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 2253 + } 2254 + 2255 + /** 2256 + * cik_sdma_gfx_stop - stop the gfx async dma engines 2257 + * 2258 + * @rdev: radeon_device pointer 2259 + * 2260 + * Stop the gfx async dma ring buffers (CIK). 2261 + */ 2262 + static void cik_sdma_gfx_stop(struct radeon_device *rdev) 2263 + { 2264 + u32 rb_cntl, reg_offset; 2265 + int i; 2266 + 2267 + radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 2268 + 2269 + for (i = 0; i < 2; i++) { 2270 + if (i == 0) 2271 + reg_offset = SDMA0_REGISTER_OFFSET; 2272 + else 2273 + reg_offset = SDMA1_REGISTER_OFFSET; 2274 + rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset); 2275 + rb_cntl &= ~SDMA_RB_ENABLE; 2276 + WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl); 2277 + WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0); 2278 + } 2279 + } 2280 + 2281 + /** 2282 + * cik_sdma_rlc_stop - stop the compute async dma engines 2283 + * 2284 + * @rdev: radeon_device pointer 2285 + * 2286 + * Stop the compute async dma queues (CIK). 2287 + */ 2288 + static void cik_sdma_rlc_stop(struct radeon_device *rdev) 2289 + { 2290 + /* XXX todo */ 2291 + } 2292 + 2293 + /** 2294 + * cik_sdma_enable - stop the async dma engines 2295 + * 2296 + * @rdev: radeon_device pointer 2297 + * @enable: enable/disable the DMA MEs. 2298 + * 2299 + * Halt or unhalt the async dma engines (CIK). 2300 + */ 2301 + static void cik_sdma_enable(struct radeon_device *rdev, bool enable) 2302 + { 2303 + u32 me_cntl, reg_offset; 2304 + int i; 2305 + 2306 + for (i = 0; i < 2; i++) { 2307 + if (i == 0) 2308 + reg_offset = SDMA0_REGISTER_OFFSET; 2309 + else 2310 + reg_offset = SDMA1_REGISTER_OFFSET; 2311 + me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset); 2312 + if (enable) 2313 + me_cntl &= ~SDMA_HALT; 2314 + else 2315 + me_cntl |= SDMA_HALT; 2316 + WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl); 2317 + } 2318 + } 2319 + 2320 + /** 2321 + * cik_sdma_gfx_resume - setup and start the async dma engines 2322 + * 2323 + * @rdev: radeon_device pointer 2324 + * 2325 + * Set up the gfx DMA ring buffers and enable them (CIK). 2326 + * Returns 0 for success, error for failure. 2327 + */ 2328 + static int cik_sdma_gfx_resume(struct radeon_device *rdev) 2329 + { 2330 + struct radeon_ring *ring; 2331 + u32 rb_cntl, ib_cntl; 2332 + u32 rb_bufsz; 2333 + u32 reg_offset, wb_offset; 2334 + int i, r; 2335 + 2336 + for (i = 0; i < 2; i++) { 2337 + if (i == 0) { 2338 + ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 2339 + reg_offset = SDMA0_REGISTER_OFFSET; 2340 + wb_offset = R600_WB_DMA_RPTR_OFFSET; 2341 + } else { 2342 + ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 2343 + reg_offset = SDMA1_REGISTER_OFFSET; 2344 + wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; 2345 + } 2346 + 2347 + WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); 2348 + WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); 2349 + 2350 + /* Set ring buffer size in dwords */ 2351 + rb_bufsz = drm_order(ring->ring_size / 4); 2352 + rb_cntl = rb_bufsz << 1; 2353 + #ifdef __BIG_ENDIAN 2354 + rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE; 2355 + #endif 2356 + WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl); 2357 + 2358 + /* Initialize the ring buffer's read and write pointers */ 2359 + WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0); 2360 + WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0); 2361 + 2362 + /* set the wb address whether it's enabled or not */ 2363 + WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset, 2364 + upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); 2365 + WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset, 2366 + ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); 2367 + 2368 + if (rdev->wb.enabled) 2369 + rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE; 2370 + 2371 + WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8); 2372 + WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40); 2373 + 2374 + ring->wptr = 0; 2375 + WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2); 2376 + 2377 + ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2; 2378 + 2379 + /* enable DMA RB */ 2380 + WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE); 2381 + 2382 + ib_cntl = SDMA_IB_ENABLE; 2383 + #ifdef __BIG_ENDIAN 2384 + ib_cntl |= SDMA_IB_SWAP_ENABLE; 2385 + #endif 2386 + /* enable DMA IBs */ 2387 + WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl); 2388 + 2389 + ring->ready = true; 2390 + 2391 + r = radeon_ring_test(rdev, ring->idx, ring); 2392 + if (r) { 2393 + ring->ready = false; 2394 + return r; 2395 + } 2396 + } 2397 + 2398 + radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 2399 + 2400 + return 0; 2401 + } 2402 + 2403 + /** 2404 + * cik_sdma_rlc_resume - setup and start the async dma engines 2405 + * 2406 + * @rdev: radeon_device pointer 2407 + * 2408 + * Set up the compute DMA queues and enable them (CIK). 2409 + * Returns 0 for success, error for failure. 2410 + */ 2411 + static int cik_sdma_rlc_resume(struct radeon_device *rdev) 2412 + { 2413 + /* XXX todo */ 2414 + return 0; 2415 + } 2416 + 2417 + /** 2418 + * cik_sdma_load_microcode - load the sDMA ME ucode 2419 + * 2420 + * @rdev: radeon_device pointer 2421 + * 2422 + * Loads the sDMA0/1 ucode. 2423 + * Returns 0 for success, -EINVAL if the ucode is not available. 2424 + */ 2425 + static int cik_sdma_load_microcode(struct radeon_device *rdev) 2426 + { 2427 + const __be32 *fw_data; 2428 + int i; 2429 + 2430 + if (!rdev->sdma_fw) 2431 + return -EINVAL; 2432 + 2433 + /* stop the gfx rings and rlc compute queues */ 2434 + cik_sdma_gfx_stop(rdev); 2435 + cik_sdma_rlc_stop(rdev); 2436 + 2437 + /* halt the MEs */ 2438 + cik_sdma_enable(rdev, false); 2439 + 2440 + /* sdma0 */ 2441 + fw_data = (const __be32 *)rdev->sdma_fw->data; 2442 + WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); 2443 + for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++) 2444 + WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++)); 2445 + WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); 2446 + 2447 + /* sdma1 */ 2448 + fw_data = (const __be32 *)rdev->sdma_fw->data; 2449 + WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); 2450 + for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++) 2451 + WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++)); 2452 + WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); 2453 + 2454 + WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); 2455 + WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); 2456 + return 0; 2457 + } 2458 + 2459 + /** 2460 + * cik_sdma_resume - setup and start the async dma engines 2461 + * 2462 + * @rdev: radeon_device pointer 2463 + * 2464 + * Set up the DMA engines and enable them (CIK). 2465 + * Returns 0 for success, error for failure. 2466 + */ 2467 + static int cik_sdma_resume(struct radeon_device *rdev) 2468 + { 2469 + int r; 2470 + 2471 + /* Reset dma */ 2472 + WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1); 2473 + RREG32(SRBM_SOFT_RESET); 2474 + udelay(50); 2475 + WREG32(SRBM_SOFT_RESET, 0); 2476 + RREG32(SRBM_SOFT_RESET); 2477 + 2478 + r = cik_sdma_load_microcode(rdev); 2479 + if (r) 2480 + return r; 2481 + 2482 + /* unhalt the MEs */ 2483 + cik_sdma_enable(rdev, true); 2484 + 2485 + /* start the gfx rings and rlc compute queues */ 2486 + r = cik_sdma_gfx_resume(rdev); 2487 + if (r) 2488 + return r; 2489 + r = cik_sdma_rlc_resume(rdev); 2490 + if (r) 2491 + return r; 2492 + 2493 + return 0; 2494 + } 2495 + 2496 + /** 2497 + * cik_sdma_fini - tear down the async dma engines 2498 + * 2499 + * @rdev: radeon_device pointer 2500 + * 2501 + * Stop the async dma engines and free the rings (CIK). 2502 + */ 2503 + static void cik_sdma_fini(struct radeon_device *rdev) 2504 + { 2505 + /* stop the gfx rings and rlc compute queues */ 2506 + cik_sdma_gfx_stop(rdev); 2507 + cik_sdma_rlc_stop(rdev); 2508 + /* halt the MEs */ 2509 + cik_sdma_enable(rdev, false); 2510 + radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); 2511 + radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); 2512 + /* XXX - compute dma queue tear down */ 2513 + } 2514 + 2515 + /** 2516 + * cik_copy_dma - copy pages using the DMA engine 2517 + * 2518 + * @rdev: radeon_device pointer 2519 + * @src_offset: src GPU address 2520 + * @dst_offset: dst GPU address 2521 + * @num_gpu_pages: number of GPU pages to xfer 2522 + * @fence: radeon fence object 2523 + * 2524 + * Copy GPU paging using the DMA engine (CIK). 2525 + * Used by the radeon ttm implementation to move pages if 2526 + * registered as the asic copy callback. 2527 + */ 2528 + int cik_copy_dma(struct radeon_device *rdev, 2529 + uint64_t src_offset, uint64_t dst_offset, 2530 + unsigned num_gpu_pages, 2531 + struct radeon_fence **fence) 2532 + { 2533 + struct radeon_semaphore *sem = NULL; 2534 + int ring_index = rdev->asic->copy.dma_ring_index; 2535 + struct radeon_ring *ring = &rdev->ring[ring_index]; 2536 + u32 size_in_bytes, cur_size_in_bytes; 2537 + int i, num_loops; 2538 + int r = 0; 2539 + 2540 + r = radeon_semaphore_create(rdev, &sem); 2541 + if (r) { 2542 + DRM_ERROR("radeon: moving bo (%d).\n", r); 2543 + return r; 2544 + } 2545 + 2546 + size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); 2547 + num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); 2548 + r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14); 2549 + if (r) { 2550 + DRM_ERROR("radeon: moving bo (%d).\n", r); 2551 + radeon_semaphore_free(rdev, &sem, NULL); 2552 + return r; 2553 + } 2554 + 2555 + if (radeon_fence_need_sync(*fence, ring->idx)) { 2556 + radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, 2557 + ring->idx); 2558 + radeon_fence_note_sync(*fence, ring->idx); 2559 + } else { 2560 + radeon_semaphore_free(rdev, &sem, NULL); 2561 + } 2562 + 2563 + for (i = 0; i < num_loops; i++) { 2564 + cur_size_in_bytes = size_in_bytes; 2565 + if (cur_size_in_bytes > 0x1fffff) 2566 + cur_size_in_bytes = 0x1fffff; 2567 + size_in_bytes -= cur_size_in_bytes; 2568 + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0)); 2569 + radeon_ring_write(ring, cur_size_in_bytes); 2570 + radeon_ring_write(ring, 0); /* src/dst endian swap */ 2571 + radeon_ring_write(ring, src_offset & 0xffffffff); 2572 + radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff); 2573 + radeon_ring_write(ring, dst_offset & 0xfffffffc); 2574 + radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff); 2575 + src_offset += cur_size_in_bytes; 2576 + dst_offset += cur_size_in_bytes; 2577 + } 2578 + 2579 + r = radeon_fence_emit(rdev, fence, ring->idx); 2580 + if (r) { 2581 + radeon_ring_unlock_undo(rdev, ring); 2582 + return r; 2583 + } 2584 + 2585 + radeon_ring_unlock_commit(rdev, ring); 2586 + radeon_semaphore_free(rdev, &sem, *fence); 2587 + 2588 + return r; 2589 + } 2590 + 2591 + /** 2592 + * cik_sdma_ring_test - simple async dma engine test 2593 + * 2594 + * @rdev: radeon_device pointer 2595 + * @ring: radeon_ring structure holding ring information 2596 + * 2597 + * Test the DMA engine by writing using it to write an 2598 + * value to memory. (CIK). 2599 + * Returns 0 for success, error for failure. 2600 + */ 2601 + int cik_sdma_ring_test(struct radeon_device *rdev, 2602 + struct radeon_ring *ring) 2603 + { 2604 + unsigned i; 2605 + int r; 2606 + void __iomem *ptr = (void *)rdev->vram_scratch.ptr; 2607 + u32 tmp; 2608 + 2609 + if (!ptr) { 2610 + DRM_ERROR("invalid vram scratch pointer\n"); 2611 + return -EINVAL; 2612 + } 2613 + 2614 + tmp = 0xCAFEDEAD; 2615 + writel(tmp, ptr); 2616 + 2617 + r = radeon_ring_lock(rdev, ring, 4); 2618 + if (r) { 2619 + DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); 2620 + return r; 2621 + } 2622 + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); 2623 + radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); 2624 + radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff); 2625 + radeon_ring_write(ring, 1); /* number of DWs to follow */ 2626 + radeon_ring_write(ring, 0xDEADBEEF); 2627 + radeon_ring_unlock_commit(rdev, ring); 2628 + 2629 + for (i = 0; i < rdev->usec_timeout; i++) { 2630 + tmp = readl(ptr); 2631 + if (tmp == 0xDEADBEEF) 2632 + break; 2633 + DRM_UDELAY(1); 2634 + } 2635 + 2636 + if (i < rdev->usec_timeout) { 2637 + DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); 2638 + } else { 2639 + DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", 2640 + ring->idx, tmp); 2641 + r = -EINVAL; 2642 + } 2643 + return r; 2644 + } 2645 + 2646 + /** 2647 + * cik_sdma_ib_test - test an IB on the DMA engine 2648 + * 2649 + * @rdev: radeon_device pointer 2650 + * @ring: radeon_ring structure holding ring information 2651 + * 2652 + * Test a simple IB in the DMA ring (CIK). 2653 + * Returns 0 on success, error on failure. 2654 + */ 2655 + int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) 2656 + { 2657 + struct radeon_ib ib; 2658 + unsigned i; 2659 + int r; 2660 + void __iomem *ptr = (void *)rdev->vram_scratch.ptr; 2661 + u32 tmp = 0; 2662 + 2663 + if (!ptr) { 2664 + DRM_ERROR("invalid vram scratch pointer\n"); 2665 + return -EINVAL; 2666 + } 2667 + 2668 + tmp = 0xCAFEDEAD; 2669 + writel(tmp, ptr); 2670 + 2671 + r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); 2672 + if (r) { 2673 + DRM_ERROR("radeon: failed to get ib (%d).\n", r); 2674 + return r; 2675 + } 2676 + 2677 + ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 2678 + ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc; 2679 + ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff; 2680 + ib.ptr[3] = 1; 2681 + ib.ptr[4] = 0xDEADBEEF; 2682 + ib.length_dw = 5; 2683 + 2684 + r = radeon_ib_schedule(rdev, &ib, NULL); 2685 + if (r) { 2686 + radeon_ib_free(rdev, &ib); 2687 + DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 2688 + return r; 2689 + } 2690 + r = radeon_fence_wait(ib.fence, false); 2691 + if (r) { 2692 + DRM_ERROR("radeon: fence wait failed (%d).\n", r); 2693 + return r; 2694 + } 2695 + for (i = 0; i < rdev->usec_timeout; i++) { 2696 + tmp = readl(ptr); 2697 + if (tmp == 0xDEADBEEF) 2698 + break; 2699 + DRM_UDELAY(1); 2700 + } 2701 + if (i < rdev->usec_timeout) { 2702 + DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); 2703 + } else { 2704 + DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp); 2705 + r = -EINVAL; 2706 + } 2707 + radeon_ib_free(rdev, &ib); 2708 + return r; 2709 + } 2710 + 2162 2711 /** 2163 2712 * cik_gpu_is_lockup - check if the 3D engine is locked up 2164 2713 * ··· 2923 2328 dev_info(rdev->dev, "Compute reset failed!\n"); 2924 2329 2925 2330 return cik_gfx_gpu_soft_reset(rdev); 2331 + } 2332 + 2333 + /** 2334 + * cik_sdma_is_lockup - Check if the DMA engine is locked up 2335 + * 2336 + * @rdev: radeon_device pointer 2337 + * @ring: radeon_ring structure holding ring information 2338 + * 2339 + * Check if the async DMA engine is locked up (CIK). 2340 + * Returns true if the engine appears to be locked up, false if not. 2341 + */ 2342 + bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 2343 + { 2344 + u32 dma_status_reg; 2345 + 2346 + if (ring->idx == R600_RING_TYPE_DMA_INDEX) 2347 + dma_status_reg = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET); 2348 + else 2349 + dma_status_reg = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET); 2350 + if (dma_status_reg & SDMA_IDLE) { 2351 + radeon_ring_lockup_update(ring); 2352 + return false; 2353 + } 2354 + /* force ring activities */ 2355 + radeon_ring_force_activity(rdev, ring); 2356 + return radeon_ring_test_lockup(rdev, ring); 2926 2357 } 2927 2358 2928 2359 /* MC */ ··· 3209 2588 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3210 2589 for (i = 0; i < 16; i++) { 3211 2590 WREG32(SRBM_GFX_CNTL, VMID(i)); 2591 + /* CP and shaders */ 3212 2592 WREG32(SH_MEM_CONFIG, 0); 3213 2593 WREG32(SH_MEM_APE1_BASE, 1); 3214 2594 WREG32(SH_MEM_APE1_LIMIT, 0); 3215 2595 WREG32(SH_MEM_BASES, 0); 2596 + /* SDMA GFX */ 2597 + WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0); 2598 + WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0); 2599 + WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0); 2600 + WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0); 2601 + /* XXX SDMA RLC - todo */ 3216 2602 } 3217 2603 WREG32(SRBM_GFX_CNTL, 0); 3218 2604 ··· 3620 2992 3621 2993 /* gfx ring */ 3622 2994 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 2995 + /* sdma */ 2996 + tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; 2997 + WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp); 2998 + tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; 2999 + WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp); 3623 3000 /* compute queues */ 3624 3001 WREG32(CP_ME1_PIPE0_INT_CNTL, 0); 3625 3002 WREG32(CP_ME1_PIPE1_INT_CNTL, 0); ··· 3765 3132 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0; 3766 3133 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6; 3767 3134 u32 grbm_int_cntl = 0; 3135 + u32 dma_cntl, dma_cntl1; 3768 3136 3769 3137 if (!rdev->irq.installed) { 3770 3138 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); ··· 3786 3152 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN; 3787 3153 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN; 3788 3154 3155 + dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; 3156 + dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; 3157 + 3789 3158 /* enable CP interrupts on all rings */ 3790 3159 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { 3791 3160 DRM_DEBUG("cik_irq_set: sw int gfx\n"); ··· 3796 3159 } 3797 3160 /* TODO: compute queues! */ 3798 3161 /* CP_ME[1-2]_PIPE[0-3]_INT_CNTL */ 3162 + 3163 + if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) { 3164 + DRM_DEBUG("cik_irq_set: sw int dma\n"); 3165 + dma_cntl |= TRAP_ENABLE; 3166 + } 3167 + 3168 + if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) { 3169 + DRM_DEBUG("cik_irq_set: sw int dma1\n"); 3170 + dma_cntl1 |= TRAP_ENABLE; 3171 + } 3799 3172 3800 3173 if (rdev->irq.crtc_vblank_int[0] || 3801 3174 atomic_read(&rdev->irq.pflip[0])) { ··· 3863 3216 } 3864 3217 3865 3218 WREG32(CP_INT_CNTL_RING0, cp_int_cntl); 3219 + 3220 + WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl); 3221 + WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1); 3866 3222 3867 3223 WREG32(GRBM_INT_CNTL, grbm_int_cntl); 3868 3224 ··· 4060 3410 * [31:8] - reserved 4061 3411 * [59:32] - interrupt source data 4062 3412 * [63:60] - reserved 4063 - * [71:64] - RINGID: ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0] 3413 + * [71:64] - RINGID 3414 + * CP: 3415 + * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0] 4064 3416 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher 4065 3417 * - for gfx, hw shader state (0=PS...5=LS, 6=CS) 4066 3418 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes 4067 3419 * PIPE_ID - ME0 0=3D 4068 3420 * - ME1&2 compute dispatcher (4 pipes each) 3421 + * SDMA: 3422 + * INSTANCE_ID [1:0], QUEUE_ID[1:0] 3423 + * INSTANCE_ID - 0 = sdma0, 1 = sdma1 3424 + * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1 4069 3425 * [79:72] - VMID 4070 3426 * [95:80] - PASID 4071 3427 * [127:96] - reserved ··· 4121 3465 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff; 4122 3466 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff; 4123 3467 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff; 4124 - /* XXX check the bitfield order! */ 4125 - me_id = (ring_id & 0x60) >> 5; 4126 - pipe_id = (ring_id & 0x18) >> 3; 4127 - queue_id = (ring_id & 0x7) >> 0; 4128 3468 4129 3469 switch (src_id) { 4130 3470 case 1: /* D1 vblank/vline */ ··· 4344 3692 break; 4345 3693 case 181: /* CP EOP event */ 4346 3694 DRM_DEBUG("IH: CP EOP\n"); 3695 + /* XXX check the bitfield order! */ 3696 + me_id = (ring_id & 0x60) >> 5; 3697 + pipe_id = (ring_id & 0x18) >> 3; 3698 + queue_id = (ring_id & 0x7) >> 0; 4347 3699 switch (me_id) { 4348 3700 case 0: 4349 3701 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); ··· 4383 3727 break; 4384 3728 case 185: /* CP Privileged inst */ 4385 3729 DRM_ERROR("Illegal instruction in command stream\n"); 3730 + /* XXX check the bitfield order! */ 3731 + me_id = (ring_id & 0x60) >> 5; 3732 + pipe_id = (ring_id & 0x18) >> 3; 3733 + queue_id = (ring_id & 0x7) >> 0; 4386 3734 switch (me_id) { 4387 3735 case 0: 4388 3736 /* This results in a full GPU reset, but all we need to do is soft ··· 4399 3739 break; 4400 3740 case 2: 4401 3741 /* XXX compute */ 3742 + break; 3743 + } 3744 + break; 3745 + case 224: /* SDMA trap event */ 3746 + /* XXX check the bitfield order! */ 3747 + me_id = (ring_id & 0x3) >> 0; 3748 + queue_id = (ring_id & 0xc) >> 2; 3749 + DRM_DEBUG("IH: SDMA trap\n"); 3750 + switch (me_id) { 3751 + case 0: 3752 + switch (queue_id) { 3753 + case 0: 3754 + radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX); 3755 + break; 3756 + case 1: 3757 + /* XXX compute */ 3758 + break; 3759 + case 2: 3760 + /* XXX compute */ 3761 + break; 3762 + } 3763 + break; 3764 + case 1: 3765 + switch (queue_id) { 3766 + case 0: 3767 + radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); 3768 + break; 3769 + case 1: 3770 + /* XXX compute */ 3771 + break; 3772 + case 2: 3773 + /* XXX compute */ 3774 + break; 3775 + } 3776 + break; 3777 + } 3778 + break; 3779 + case 241: /* SDMA Privileged inst */ 3780 + case 247: /* SDMA Privileged inst */ 3781 + DRM_ERROR("Illegal instruction in SDMA command stream\n"); 3782 + /* XXX check the bitfield order! */ 3783 + me_id = (ring_id & 0x3) >> 0; 3784 + queue_id = (ring_id & 0xc) >> 2; 3785 + switch (me_id) { 3786 + case 0: 3787 + switch (queue_id) { 3788 + case 0: 3789 + queue_reset = true; 3790 + break; 3791 + case 1: 3792 + /* XXX compute */ 3793 + queue_reset = true; 3794 + break; 3795 + case 2: 3796 + /* XXX compute */ 3797 + queue_reset = true; 3798 + break; 3799 + } 3800 + break; 3801 + case 1: 3802 + switch (queue_id) { 3803 + case 0: 3804 + queue_reset = true; 3805 + break; 3806 + case 1: 3807 + /* XXX compute */ 3808 + queue_reset = true; 3809 + break; 3810 + case 2: 3811 + /* XXX compute */ 3812 + queue_reset = true; 3813 + break; 3814 + } 4402 3815 break; 4403 3816 } 4404 3817 break;
+130
drivers/gpu/drm/radeon/cikd.h
··· 42 42 #define SRBM_STATUS2 0xE4C 43 43 #define SRBM_STATUS 0xE50 44 44 45 + #define SRBM_SOFT_RESET 0xE60 46 + #define SOFT_RESET_BIF (1 << 1) 47 + #define SOFT_RESET_R0PLL (1 << 4) 48 + #define SOFT_RESET_DC (1 << 5) 49 + #define SOFT_RESET_SDMA1 (1 << 6) 50 + #define SOFT_RESET_GRBM (1 << 8) 51 + #define SOFT_RESET_HDP (1 << 9) 52 + #define SOFT_RESET_IH (1 << 10) 53 + #define SOFT_RESET_MC (1 << 11) 54 + #define SOFT_RESET_ROM (1 << 14) 55 + #define SOFT_RESET_SEM (1 << 15) 56 + #define SOFT_RESET_VMC (1 << 17) 57 + #define SOFT_RESET_SDMA (1 << 20) 58 + #define SOFT_RESET_TST (1 << 21) 59 + #define SOFT_RESET_REGBB (1 << 22) 60 + #define SOFT_RESET_ORB (1 << 23) 61 + #define SOFT_RESET_VCE (1 << 24) 62 + 45 63 #define VM_L2_CNTL 0x1400 46 64 #define ENABLE_L2_CACHE (1 << 0) 47 65 #define ENABLE_L2_FRAGMENT_PROCESSING (1 << 1) ··· 1056 1038 #define PACKET3_WAIT_ON_CE_COUNTER 0x86 1057 1039 #define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 1058 1040 #define PACKET3_SWITCH_BUFFER 0x8B 1041 + 1042 + /* SDMA - first instance at 0xd000, second at 0xd800 */ 1043 + #define SDMA0_REGISTER_OFFSET 0x0 /* not a register */ 1044 + #define SDMA1_REGISTER_OFFSET 0x800 /* not a register */ 1045 + 1046 + #define SDMA0_UCODE_ADDR 0xD000 1047 + #define SDMA0_UCODE_DATA 0xD004 1048 + 1049 + #define SDMA0_CNTL 0xD010 1050 + # define TRAP_ENABLE (1 << 0) 1051 + # define SEM_INCOMPLETE_INT_ENABLE (1 << 1) 1052 + # define SEM_WAIT_INT_ENABLE (1 << 2) 1053 + # define DATA_SWAP_ENABLE (1 << 3) 1054 + # define FENCE_SWAP_ENABLE (1 << 4) 1055 + # define AUTO_CTXSW_ENABLE (1 << 18) 1056 + # define CTXEMPTY_INT_ENABLE (1 << 28) 1057 + 1058 + #define SDMA0_TILING_CONFIG 0xD018 1059 + 1060 + #define SDMA0_SEM_INCOMPLETE_TIMER_CNTL 0xD020 1061 + #define SDMA0_SEM_WAIT_FAIL_TIMER_CNTL 0xD024 1062 + 1063 + #define SDMA0_STATUS_REG 0xd034 1064 + # define SDMA_IDLE (1 << 0) 1065 + 1066 + #define SDMA0_ME_CNTL 0xD048 1067 + # define SDMA_HALT (1 << 0) 1068 + 1069 + #define SDMA0_GFX_RB_CNTL 0xD200 1070 + # define SDMA_RB_ENABLE (1 << 0) 1071 + # define SDMA_RB_SIZE(x) ((x) << 1) /* log2 */ 1072 + # define SDMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */ 1073 + # define SDMA_RPTR_WRITEBACK_ENABLE (1 << 12) 1074 + # define SDMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */ 1075 + # define SDMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */ 1076 + #define SDMA0_GFX_RB_BASE 0xD204 1077 + #define SDMA0_GFX_RB_BASE_HI 0xD208 1078 + #define SDMA0_GFX_RB_RPTR 0xD20C 1079 + #define SDMA0_GFX_RB_WPTR 0xD210 1080 + 1081 + #define SDMA0_GFX_RB_RPTR_ADDR_HI 0xD220 1082 + #define SDMA0_GFX_RB_RPTR_ADDR_LO 0xD224 1083 + #define SDMA0_GFX_IB_CNTL 0xD228 1084 + # define SDMA_IB_ENABLE (1 << 0) 1085 + # define SDMA_IB_SWAP_ENABLE (1 << 4) 1086 + # define SDMA_SWITCH_INSIDE_IB (1 << 8) 1087 + # define SDMA_CMD_VMID(x) ((x) << 16) 1088 + 1089 + #define SDMA0_GFX_VIRTUAL_ADDR 0xD29C 1090 + #define SDMA0_GFX_APE1_CNTL 0xD2A0 1091 + 1092 + #define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \ 1093 + (((sub_op) & 0xFF) << 8) | \ 1094 + (((op) & 0xFF) << 0)) 1095 + /* sDMA opcodes */ 1096 + #define SDMA_OPCODE_NOP 0 1097 + #define SDMA_OPCODE_COPY 1 1098 + # define SDMA_COPY_SUB_OPCODE_LINEAR 0 1099 + # define SDMA_COPY_SUB_OPCODE_TILED 1 1100 + # define SDMA_COPY_SUB_OPCODE_SOA 3 1101 + # define SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW 4 1102 + # define SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW 5 1103 + # define SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW 6 1104 + #define SDMA_OPCODE_WRITE 2 1105 + # define SDMA_WRITE_SUB_OPCODE_LINEAR 0 1106 + # define SDMA_WRTIE_SUB_OPCODE_TILED 1 1107 + #define SDMA_OPCODE_INDIRECT_BUFFER 4 1108 + #define SDMA_OPCODE_FENCE 5 1109 + #define SDMA_OPCODE_TRAP 6 1110 + #define SDMA_OPCODE_SEMAPHORE 7 1111 + # define SDMA_SEMAPHORE_EXTRA_O (1 << 13) 1112 + /* 0 - increment 1113 + * 1 - write 1 1114 + */ 1115 + # define SDMA_SEMAPHORE_EXTRA_S (1 << 14) 1116 + /* 0 - wait 1117 + * 1 - signal 1118 + */ 1119 + # define SDMA_SEMAPHORE_EXTRA_M (1 << 15) 1120 + /* mailbox */ 1121 + #define SDMA_OPCODE_POLL_REG_MEM 8 1122 + # define SDMA_POLL_REG_MEM_EXTRA_OP(x) ((x) << 10) 1123 + /* 0 - wait_reg_mem 1124 + * 1 - wr_wait_wr_reg 1125 + */ 1126 + # define SDMA_POLL_REG_MEM_EXTRA_FUNC(x) ((x) << 12) 1127 + /* 0 - always 1128 + * 1 - < 1129 + * 2 - <= 1130 + * 3 - == 1131 + * 4 - != 1132 + * 5 - >= 1133 + * 6 - > 1134 + */ 1135 + # define SDMA_POLL_REG_MEM_EXTRA_M (1 << 15) 1136 + /* 0 = register 1137 + * 1 = memory 1138 + */ 1139 + #define SDMA_OPCODE_COND_EXEC 9 1140 + #define SDMA_OPCODE_CONSTANT_FILL 11 1141 + # define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14) 1142 + /* 0 = byte fill 1143 + * 2 = DW fill 1144 + */ 1145 + #define SDMA_OPCODE_GENERATE_PTE_PDE 12 1146 + #define SDMA_OPCODE_TIMESTAMP 13 1147 + # define SDMA_TIMESTAMP_SUB_OPCODE_SET_LOCAL 0 1148 + # define SDMA_TIMESTAMP_SUB_OPCODE_GET_LOCAL 1 1149 + # define SDMA_TIMESTAMP_SUB_OPCODE_GET_GLOBAL 2 1150 + #define SDMA_OPCODE_SRBM_WRITE 14 1151 + # define SDMA_SRBM_WRITE_EXTRA_BYTE_ENABLE(x) ((x) << 12) 1152 + /* byte mask */ 1059 1153 1060 1154 #endif
+1
drivers/gpu/drm/radeon/radeon.h
··· 1726 1726 const struct firmware *ce_fw; /* SI CE firmware */ 1727 1727 const struct firmware *uvd_fw; /* UVD firmware */ 1728 1728 const struct firmware *mec_fw; /* CIK MEC firmware */ 1729 + const struct firmware *sdma_fw; /* CIK SDMA firmware */ 1729 1730 struct r600_blit r600_blit; 1730 1731 struct r600_vram_scratch vram_scratch; 1731 1732 int msi_enabled; /* msi enabled */