Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amdgpu: add SDMA v4.0 implementation (v2)

v2: fix Makefile

Signed-off-by: Ken Wang <Qingqing.Wang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Ken Wang and committed by
Alex Deucher
2130f89c e60f8db5

+1585 -1
+2 -1
drivers/gpu/drm/amd/amdgpu/Makefile
··· 69 69 # add async DMA block 70 70 amdgpu-y += \ 71 71 sdma_v2_4.o \ 72 - sdma_v3_0.o 72 + sdma_v3_0.o \ 73 + sdma_v4_0.o 73 74 74 75 # add UVD block 75 76 amdgpu-y += \
+1553
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
··· 1 + /* 2 + * Copyright 2016 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + 24 + #include <linux/firmware.h> 25 + #include <drm/drmP.h> 26 + #include "amdgpu.h" 27 + #include "amdgpu_ucode.h" 28 + #include "amdgpu_trace.h" 29 + 30 + #include "vega10/soc15ip.h" 31 + #include "vega10/SDMA0/sdma0_4_0_offset.h" 32 + #include "vega10/SDMA0/sdma0_4_0_sh_mask.h" 33 + #include "vega10/SDMA1/sdma1_4_0_offset.h" 34 + #include "vega10/SDMA1/sdma1_4_0_sh_mask.h" 35 + #include "vega10/MMHUB/mmhub_1_0_offset.h" 36 + #include "vega10/MMHUB/mmhub_1_0_sh_mask.h" 37 + #include "vega10/HDP/hdp_4_0_offset.h" 38 + 39 + #include "soc15_common.h" 40 + #include "soc15.h" 41 + #include "vega10_sdma_pkt_open.h" 42 + 43 + MODULE_FIRMWARE("amdgpu/vega10_sdma.bin"); 44 + MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin"); 45 + 46 + static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev); 47 + static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); 48 + static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev); 49 + static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev); 50 + 51 + static const u32 golden_settings_sdma_4[] = 52 + { 53 + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831f07, 54 + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xff000ff0, 0x3f000100, 55 + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0100, 0x00000100, 56 + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, 57 + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_PAGE_IB_CNTL), 0x800f0100, 0x00000100, 58 + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, 59 + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), 0x003ff006, 0x0003c000, 60 + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL), 0x800f0100, 0x00000100, 61 + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, 62 + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL), 0x800f0100, 0x00000100, 63 + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, 64 + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UTCL1_PAGE), 0x000003ff, 0x000003c0, 65 + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CHICKEN_BITS), 0xfe931f07, 0x02831f07, 66 + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), 0xffffffff, 0x3f000100, 67 + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_IB_CNTL), 0x800f0100, 0x00000100, 68 + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, 69 + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_PAGE_IB_CNTL), 0x800f0100, 0x00000100, 70 + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, 71 + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), 0x003ff000, 0x0003c000, 72 + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC0_IB_CNTL), 0x800f0100, 0x00000100, 73 + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, 74 + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC1_IB_CNTL), 0x800f0100, 0x00000100, 75 + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, 76 + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_UTCL1_PAGE), 0x000003ff, 0x000003c0 77 + }; 78 + 79 + static const u32 golden_settings_sdma_vg10[] = 80 + { 81 + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG), 0x0018773f, 0x00104002, 82 + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002, 83 + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG), 0x0018773f, 0x00104002, 84 + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002 85 + }; 86 + 87 + static u32 sdma_v4_0_get_reg_offset(u32 instance, u32 internal_offset) 88 + { 89 + u32 base = 0; 90 + switch (instance) { 91 + case 0: 92 + base = SDMA0_BASE.instance[0].segment[0]; 93 + break; 94 + case 1: 95 + base = SDMA1_BASE.instance[0].segment[0]; 96 + break; 97 + default: 98 + BUG(); 99 + break; 100 + } 101 + 102 + return base + internal_offset; 103 + } 104 + 105 + static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) 106 + { 107 + switch (adev->asic_type) { 108 + case CHIP_VEGA10: 109 + amdgpu_program_register_sequence(adev, 110 + golden_settings_sdma_4, 111 + (const u32)ARRAY_SIZE(golden_settings_sdma_4)); 112 + amdgpu_program_register_sequence(adev, 113 + golden_settings_sdma_vg10, 114 + (const u32)ARRAY_SIZE(golden_settings_sdma_vg10)); 115 + break; 116 + default: 117 + break; 118 + } 119 + } 120 + 121 + static void sdma_v4_0_print_ucode_regs(void *handle) 122 + { 123 + int i; 124 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 125 + 126 + dev_info(adev->dev, "VEGA10 SDMA ucode registers\n"); 127 + for (i = 0; i < adev->sdma.num_instances; i++) { 128 + dev_info(adev->dev, " SDMA%d_UCODE_ADDR=0x%08X\n", 129 + i, RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR))); 130 + dev_info(adev->dev, " SDMA%d_UCODE_CHECKSUM=0x%08X\n", 131 + i, RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_CHECKSUM))); 132 + } 133 + } 134 + 135 + /** 136 + * sdma_v4_0_init_microcode - load ucode images from disk 137 + * 138 + * @adev: amdgpu_device pointer 139 + * 140 + * Use the firmware interface to load the ucode images into 141 + * the driver (not loaded into hw). 142 + * Returns 0 on success, error on failure. 143 + */ 144 + 145 + // emulation only, won't work on real chip 146 + // vega10 real chip need to use PSP to load firmware 147 + static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) 148 + { 149 + const char *chip_name; 150 + char fw_name[30]; 151 + int err = 0, i; 152 + struct amdgpu_firmware_info *info = NULL; 153 + const struct common_firmware_header *header = NULL; 154 + const struct sdma_firmware_header_v1_0 *hdr; 155 + 156 + DRM_DEBUG("\n"); 157 + 158 + switch (adev->asic_type) { 159 + case CHIP_VEGA10: 160 + chip_name = "vega10"; 161 + break; 162 + default: BUG(); 163 + } 164 + 165 + for (i = 0; i < adev->sdma.num_instances; i++) { 166 + if (i == 0) 167 + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); 168 + else 169 + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); 170 + err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); 171 + if (err) 172 + goto out; 173 + err = amdgpu_ucode_validate(adev->sdma.instance[i].fw); 174 + if (err) 175 + goto out; 176 + hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; 177 + adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version); 178 + adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); 179 + if (adev->sdma.instance[i].feature_version >= 20) 180 + adev->sdma.instance[i].burst_nop = true; 181 + DRM_DEBUG("psp_load == '%s'\n", 182 + adev->firmware.load_type == AMDGPU_FW_LOAD_PSP? "true": "false"); 183 + 184 + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 185 + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; 186 + info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; 187 + info->fw = adev->sdma.instance[i].fw; 188 + header = (const struct common_firmware_header *)info->fw->data; 189 + adev->firmware.fw_size += 190 + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 191 + } 192 + } 193 + out: 194 + if (err) { 195 + printk(KERN_ERR 196 + "sdma_v4_0: Failed to load firmware \"%s\"\n", 197 + fw_name); 198 + for (i = 0; i < adev->sdma.num_instances; i++) { 199 + release_firmware(adev->sdma.instance[i].fw); 200 + adev->sdma.instance[i].fw = NULL; 201 + } 202 + } 203 + return err; 204 + } 205 + 206 + /** 207 + * sdma_v4_0_ring_get_rptr - get the current read pointer 208 + * 209 + * @ring: amdgpu ring pointer 210 + * 211 + * Get the current rptr from the hardware (VEGA10+). 212 + */ 213 + static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring) 214 + { 215 + u64* rptr; 216 + 217 + /* XXX check if swapping is necessary on BE */ 218 + rptr =((u64*)&ring->adev->wb.wb[ring->rptr_offs]); 219 + 220 + DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr); 221 + return ((*rptr) >> 2); 222 + } 223 + 224 + /** 225 + * sdma_v4_0_ring_get_wptr - get the current write pointer 226 + * 227 + * @ring: amdgpu ring pointer 228 + * 229 + * Get the current wptr from the hardware (VEGA10+). 230 + */ 231 + static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring) 232 + { 233 + struct amdgpu_device *adev = ring->adev; 234 + u64* wptr = NULL; 235 + uint64_t local_wptr=0; 236 + 237 + if (ring->use_doorbell) { 238 + /* XXX check if swapping is necessary on BE */ 239 + wptr = ((u64*)&adev->wb.wb[ring->wptr_offs]); 240 + DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr); 241 + *wptr = (*wptr) >> 2; 242 + DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr); 243 + } else { 244 + u32 lowbit, highbit; 245 + int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; 246 + wptr=&local_wptr; 247 + lowbit = RREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR)) >> 2; 248 + highbit = RREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; 249 + 250 + DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", 251 + me, highbit, lowbit); 252 + *wptr = highbit; 253 + *wptr = (*wptr) << 32; 254 + *wptr |= lowbit; 255 + } 256 + 257 + return *wptr; 258 + } 259 + 260 + /** 261 + * sdma_v4_0_ring_set_wptr - commit the write pointer 262 + * 263 + * @ring: amdgpu ring pointer 264 + * 265 + * Write the wptr back to the hardware (VEGA10+). 266 + */ 267 + static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) 268 + { 269 + struct amdgpu_device *adev = ring->adev; 270 + 271 + DRM_DEBUG("Setting write pointer\n"); 272 + if (ring->use_doorbell) { 273 + DRM_DEBUG("Using doorbell -- " 274 + "wptr_offs == 0x%08x " 275 + "lower_32_bits(ring->wptr) << 2 == 0x%08x " 276 + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", 277 + ring->wptr_offs, 278 + lower_32_bits(ring->wptr << 2), 279 + upper_32_bits(ring->wptr << 2)); 280 + /* XXX check if swapping is necessary on BE */ 281 + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr << 2); 282 + adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2); 283 + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", 284 + ring->doorbell_index, ring->wptr << 2); 285 + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); 286 + } else { 287 + int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; 288 + DRM_DEBUG("Not using doorbell -- " 289 + "mmSDMA%i_GFX_RB_WPTR == 0x%08x " 290 + "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x \n", 291 + me, 292 + me, 293 + lower_32_bits(ring->wptr << 2), 294 + upper_32_bits(ring->wptr << 2)); 295 + WREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); 296 + WREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); 297 + } 298 + } 299 + 300 + static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 301 + { 302 + struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); 303 + int i; 304 + 305 + for (i = 0; i < count; i++) 306 + if (sdma && sdma->burst_nop && (i == 0)) 307 + amdgpu_ring_write(ring, ring->funcs->nop | 308 + SDMA_PKT_NOP_HEADER_COUNT(count - 1)); 309 + else 310 + amdgpu_ring_write(ring, ring->funcs->nop); 311 + } 312 + 313 + /** 314 + * sdma_v4_0_ring_emit_ib - Schedule an IB on the DMA engine 315 + * 316 + * @ring: amdgpu ring pointer 317 + * @ib: IB object to schedule 318 + * 319 + * Schedule an IB in the DMA ring (VEGA10). 320 + */ 321 + static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, 322 + struct amdgpu_ib *ib, 323 + unsigned vm_id, bool ctx_switch) 324 + { 325 + u32 vmid = vm_id & 0xf; 326 + 327 + /* IB packet must end on a 8 DW boundary */ 328 + sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); 329 + 330 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | 331 + SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); 332 + /* base must be 32 byte aligned */ 333 + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); 334 + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 335 + amdgpu_ring_write(ring, ib->length_dw); 336 + amdgpu_ring_write(ring, 0); 337 + amdgpu_ring_write(ring, 0); 338 + 339 + } 340 + 341 + /** 342 + * sdma_v4_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring 343 + * 344 + * @ring: amdgpu ring pointer 345 + * 346 + * Emit an hdp flush packet on the requested DMA ring. 347 + */ 348 + static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 349 + { 350 + u32 ref_and_mask = 0; 351 + struct nbio_hdp_flush_reg *nbio_hf_reg; 352 + 353 + if (ring->adev->asic_type == CHIP_VEGA10) 354 + nbio_hf_reg = &nbio_v6_1_hdp_flush_reg; 355 + 356 + if (ring == &ring->adev->sdma.instance[0].ring) 357 + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0; 358 + else 359 + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1; 360 + 361 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 362 + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | 363 + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ 364 + amdgpu_ring_write(ring, nbio_hf_reg->hdp_flush_done_offset << 2); 365 + amdgpu_ring_write(ring, nbio_hf_reg->hdp_flush_req_offset << 2); 366 + amdgpu_ring_write(ring, ref_and_mask); /* reference */ 367 + amdgpu_ring_write(ring, ref_and_mask); /* mask */ 368 + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 369 + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ 370 + } 371 + 372 + static void sdma_v4_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) 373 + { 374 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 375 + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 376 + amdgpu_ring_write(ring, SOC15_REG_OFFSET(HDP, 0, mmHDP_DEBUG0)); 377 + amdgpu_ring_write(ring, 1); 378 + } 379 + 380 + /** 381 + * sdma_v4_0_ring_emit_fence - emit a fence on the DMA ring 382 + * 383 + * @ring: amdgpu ring pointer 384 + * @fence: amdgpu fence object 385 + * 386 + * Add a DMA fence packet to the ring to write 387 + * the fence seq number and DMA trap packet to generate 388 + * an interrupt if needed (VEGA10). 389 + */ 390 + static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 391 + unsigned flags) 392 + { 393 + bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 394 + /* write the fence */ 395 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); 396 + /* zero in first two bits */ 397 + BUG_ON(addr & 0x3); 398 + amdgpu_ring_write(ring, lower_32_bits(addr)); 399 + amdgpu_ring_write(ring, upper_32_bits(addr)); 400 + amdgpu_ring_write(ring, lower_32_bits(seq)); 401 + 402 + /* optionally write high bits as well */ 403 + if (write64bit) { 404 + addr += 4; 405 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); 406 + /* zero in first two bits */ 407 + BUG_ON(addr & 0x3); 408 + amdgpu_ring_write(ring, lower_32_bits(addr)); 409 + amdgpu_ring_write(ring, upper_32_bits(addr)); 410 + amdgpu_ring_write(ring, upper_32_bits(seq)); 411 + } 412 + 413 + /* generate an interrupt */ 414 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); 415 + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); 416 + } 417 + 418 + 419 + /** 420 + * sdma_v4_0_gfx_stop - stop the gfx async dma engines 421 + * 422 + * @adev: amdgpu_device pointer 423 + * 424 + * Stop the gfx async dma ring buffers (VEGA10). 425 + */ 426 + static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) 427 + { 428 + struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; 429 + struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; 430 + u32 rb_cntl, ib_cntl; 431 + int i; 432 + 433 + if ((adev->mman.buffer_funcs_ring == sdma0) || 434 + (adev->mman.buffer_funcs_ring == sdma1)) 435 + amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); 436 + 437 + for (i = 0; i < adev->sdma.num_instances; i++) { 438 + rb_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL)); 439 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); 440 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL), rb_cntl); 441 + ib_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL)); 442 + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); 443 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL), ib_cntl); 444 + } 445 + 446 + sdma0->ready = false; 447 + sdma1->ready = false; 448 + } 449 + 450 + /** 451 + * sdma_v4_0_rlc_stop - stop the compute async dma engines 452 + * 453 + * @adev: amdgpu_device pointer 454 + * 455 + * Stop the compute async dma queues (VEGA10). 456 + */ 457 + static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev) 458 + { 459 + /* XXX todo */ 460 + } 461 + 462 + /** 463 + * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch 464 + * 465 + * @adev: amdgpu_device pointer 466 + * @enable: enable/disable the DMA MEs context switch. 467 + * 468 + * Halt or unhalt the async dma engines context switch (VEGA10). 469 + */ 470 + static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) 471 + { 472 + u32 f32_cntl; 473 + int i; 474 + 475 + for (i = 0; i < adev->sdma.num_instances; i++) { 476 + f32_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL)); 477 + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, 478 + AUTO_CTXSW_ENABLE, enable ? 1 : 0); 479 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL), f32_cntl); 480 + } 481 + 482 + } 483 + 484 + /** 485 + * sdma_v4_0_enable - stop the async dma engines 486 + * 487 + * @adev: amdgpu_device pointer 488 + * @enable: enable/disable the DMA MEs. 489 + * 490 + * Halt or unhalt the async dma engines (VEGA10). 491 + */ 492 + static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable) 493 + { 494 + u32 f32_cntl; 495 + int i; 496 + 497 + if (enable == false) { 498 + sdma_v4_0_gfx_stop(adev); 499 + sdma_v4_0_rlc_stop(adev); 500 + } 501 + 502 + for (i = 0; i < adev->sdma.num_instances; i++) { 503 + f32_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL)); 504 + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); 505 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL), f32_cntl); 506 + } 507 + } 508 + 509 + /** 510 + * sdma_v4_0_gfx_resume - setup and start the async dma engines 511 + * 512 + * @adev: amdgpu_device pointer 513 + * 514 + * Set up the gfx DMA ring buffers and enable them (VEGA10). 515 + * Returns 0 for success, error for failure. 516 + */ 517 + static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) 518 + { 519 + struct amdgpu_ring *ring; 520 + u32 rb_cntl, ib_cntl; 521 + u32 rb_bufsz; 522 + u32 wb_offset; 523 + u32 doorbell; 524 + u32 doorbell_offset; 525 + int i,r; 526 + 527 + for (i = 0; i < adev->sdma.num_instances; i++) { 528 + ring = &adev->sdma.instance[i].ring; 529 + wb_offset = (ring->rptr_offs * 4); 530 + 531 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); 532 + 533 + /* Set ring buffer size in dwords */ 534 + rb_bufsz = order_base_2(ring->ring_size / 4); 535 + rb_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL)); 536 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); 537 + #ifdef __BIG_ENDIAN 538 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); 539 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, 540 + RPTR_WRITEBACK_SWAP_ENABLE, 1); 541 + #endif 542 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL), rb_cntl); 543 + 544 + /* Initialize the ring buffer's read and write pointers */ 545 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR), 0); 546 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR_HI), 0); 547 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR), 0); 548 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_HI), 0); 549 + 550 + /* set the wb address whether it's enabled or not */ 551 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), 552 + upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); 553 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), 554 + lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); 555 + 556 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); 557 + 558 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); 559 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); 560 + 561 + ring->wptr = 0; 562 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); 563 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); 564 + 565 + doorbell = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL)); 566 + doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET)); 567 + 568 + if (ring->use_doorbell){ 569 + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); 570 + doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET, 571 + OFFSET, ring->doorbell_index); 572 + } else { 573 + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); 574 + } 575 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL), doorbell); 576 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); 577 + nbio_v6_1_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index); 578 + 579 + /* enable DMA RB */ 580 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); 581 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL), rb_cntl); 582 + 583 + ib_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL)); 584 + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); 585 + #ifdef __BIG_ENDIAN 586 + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); 587 + #endif 588 + /* enable DMA IBs */ 589 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL), ib_cntl); 590 + 591 + ring->ready = true; 592 + 593 + r = amdgpu_ring_test_ring(ring); 594 + if (r) { 595 + ring->ready = false; 596 + return r; 597 + } 598 + 599 + if (adev->mman.buffer_funcs_ring == ring) 600 + amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); 601 + } 602 + 603 + return 0; 604 + } 605 + 606 + /** 607 + * sdma_v4_0_rlc_resume - setup and start the async dma engines 608 + * 609 + * @adev: amdgpu_device pointer 610 + * 611 + * Set up the compute DMA queues and enable them (VEGA10). 612 + * Returns 0 for success, error for failure. 613 + */ 614 + static int sdma_v4_0_rlc_resume(struct amdgpu_device *adev) 615 + { 616 + /* XXX todo */ 617 + return 0; 618 + } 619 + 620 + /** 621 + * sdma_v4_0_load_microcode - load the sDMA ME ucode 622 + * 623 + * @adev: amdgpu_device pointer 624 + * 625 + * Loads the sDMA0/1 ucode. 626 + * Returns 0 for success, -EINVAL if the ucode is not available. 627 + */ 628 + static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) 629 + { 630 + const struct sdma_firmware_header_v1_0 *hdr; 631 + const __le32 *fw_data; 632 + u32 fw_size; 633 + u32 digest_size = 0; 634 + int i, j; 635 + 636 + /* halt the MEs */ 637 + sdma_v4_0_enable(adev, false); 638 + 639 + for (i = 0; i < adev->sdma.num_instances; i++) { 640 + uint16_t version_major; 641 + uint16_t version_minor; 642 + if (!adev->sdma.instance[i].fw) 643 + return -EINVAL; 644 + 645 + hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; 646 + amdgpu_ucode_print_sdma_hdr(&hdr->header); 647 + fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 648 + 649 + version_major = le16_to_cpu(hdr->header.header_version_major); 650 + version_minor = le16_to_cpu(hdr->header.header_version_minor); 651 + 652 + if (version_major == 1 && version_minor >= 1) { 653 + const struct sdma_firmware_header_v1_1 *sdma_v1_1_hdr = (const struct sdma_firmware_header_v1_1 *) hdr; 654 + digest_size = le32_to_cpu(sdma_v1_1_hdr->digest_size); 655 + } 656 + 657 + fw_size -= digest_size; 658 + 659 + fw_data = (const __le32 *) 660 + (adev->sdma.instance[i].fw->data + 661 + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 662 + 663 + sdma_v4_0_print_ucode_regs(adev); 664 + 665 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), 0); 666 + 667 + 668 + for (j = 0; j < fw_size; j++) 669 + { 670 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); 671 + } 672 + 673 + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); 674 + } 675 + 676 + sdma_v4_0_print_ucode_regs(adev); 677 + 678 + return 0; 679 + } 680 + 681 + /** 682 + * sdma_v4_0_start - setup and start the async dma engines 683 + * 684 + * @adev: amdgpu_device pointer 685 + * 686 + * Set up the DMA engines and enable them (VEGA10). 687 + * Returns 0 for success, error for failure. 688 + */ 689 + static int sdma_v4_0_start(struct amdgpu_device *adev) 690 + { 691 + int r; 692 + 693 + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 694 + DRM_INFO("Loading via direct write\n"); 695 + r = sdma_v4_0_load_microcode(adev); 696 + if (r) 697 + return r; 698 + } 699 + 700 + /* unhalt the MEs */ 701 + sdma_v4_0_enable(adev, true); 702 + /* enable sdma ring preemption */ 703 + sdma_v4_0_ctx_switch_enable(adev, true); 704 + 705 + /* start the gfx rings and rlc compute queues */ 706 + r = sdma_v4_0_gfx_resume(adev); 707 + if (r) 708 + return r; 709 + r = sdma_v4_0_rlc_resume(adev); 710 + if (r) 711 + return r; 712 + 713 + return 0; 714 + } 715 + 716 + /** 717 + * sdma_v4_0_ring_test_ring - simple async dma engine test 718 + * 719 + * @ring: amdgpu_ring structure holding ring information 720 + * 721 + * Test the DMA engine by writing using it to write an 722 + * value to memory. (VEGA10). 723 + * Returns 0 for success, error for failure. 724 + */ 725 + static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) 726 + { 727 + struct amdgpu_device *adev = ring->adev; 728 + unsigned i; 729 + unsigned index; 730 + int r; 731 + u32 tmp; 732 + u64 gpu_addr; 733 + 734 + DRM_INFO("In Ring test func\n"); 735 + 736 + r = amdgpu_wb_get(adev, &index); 737 + if (r) { 738 + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); 739 + return r; 740 + } 741 + 742 + gpu_addr = adev->wb.gpu_addr + (index * 4); 743 + tmp = 0xCAFEDEAD; 744 + adev->wb.wb[index] = cpu_to_le32(tmp); 745 + 746 + r = amdgpu_ring_alloc(ring, 5); 747 + if (r) { 748 + DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); 749 + amdgpu_wb_free(adev, index); 750 + return r; 751 + } 752 + 753 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 754 + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); 755 + amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); 756 + amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); 757 + amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0)); 758 + amdgpu_ring_write(ring, 0xDEADBEEF); 759 + amdgpu_ring_commit(ring); 760 + 761 + for (i = 0; i < adev->usec_timeout; i++) { 762 + tmp = le32_to_cpu(adev->wb.wb[index]); 763 + if (tmp == 0xDEADBEEF) { 764 + break; 765 + } 766 + DRM_UDELAY(1); 767 + } 768 + 769 + if (i < adev->usec_timeout) { 770 + DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); 771 + } else { 772 + DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", 773 + ring->idx, tmp); 774 + r = -EINVAL; 775 + } 776 + amdgpu_wb_free(adev, index); 777 + 778 + return r; 779 + } 780 + 781 + /** 782 + * sdma_v4_0_ring_test_ib - test an IB on the DMA engine 783 + * 784 + * @ring: amdgpu_ring structure holding ring information 785 + * 786 + * Test a simple IB in the DMA ring (VEGA10). 787 + * Returns 0 on success, error on failure. 788 + */ 789 + static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 790 + { 791 + struct amdgpu_device *adev = ring->adev; 792 + struct amdgpu_ib ib; 793 + struct dma_fence *f = NULL; 794 + unsigned index; 795 + long r; 796 + u32 tmp = 0; 797 + u64 gpu_addr; 798 + 799 + r = amdgpu_wb_get(adev, &index); 800 + if (r) { 801 + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); 802 + return r; 803 + } 804 + 805 + gpu_addr = adev->wb.gpu_addr + (index * 4); 806 + tmp = 0xCAFEDEAD; 807 + adev->wb.wb[index] = cpu_to_le32(tmp); 808 + memset(&ib, 0, sizeof(ib)); 809 + r = amdgpu_ib_get(adev, NULL, 256, &ib); 810 + if (r) { 811 + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 812 + goto err0; 813 + } 814 + 815 + ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 816 + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 817 + ib.ptr[1] = lower_32_bits(gpu_addr); 818 + ib.ptr[2] = upper_32_bits(gpu_addr); 819 + ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0); 820 + ib.ptr[4] = 0xDEADBEEF; 821 + ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 822 + ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 823 + ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 824 + ib.length_dw = 8; 825 + 826 + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 827 + if (r) 828 + goto err1; 829 + 830 + r = dma_fence_wait_timeout(f, false, timeout); 831 + if (r == 0) { 832 + DRM_ERROR("amdgpu: IB test timed out\n"); 833 + r = -ETIMEDOUT; 834 + goto err1; 835 + } else if (r < 0) { 836 + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 837 + goto err1; 838 + } 839 + tmp = le32_to_cpu(adev->wb.wb[index]); 840 + if (tmp == 0xDEADBEEF) { 841 + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); 842 + r = 0; 843 + } else { 844 + DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); 845 + r = -EINVAL; 846 + } 847 + err1: 848 + amdgpu_ib_free(adev, &ib, NULL); 849 + dma_fence_put(f); 850 + err0: 851 + amdgpu_wb_free(adev, index); 852 + return r; 853 + } 854 + 855 + 856 + /** 857 + * sdma_v4_0_vm_copy_pte - update PTEs by copying them from the GART 858 + * 859 + * @ib: indirect buffer to fill with commands 860 + * @pe: addr of the page entry 861 + * @src: src addr to copy from 862 + * @count: number of page entries to update 863 + * 864 + * Update PTEs by copying them from the GART using sDMA (VEGA10). 865 + */ 866 + static void sdma_v4_0_vm_copy_pte(struct amdgpu_ib *ib, 867 + uint64_t pe, uint64_t src, 868 + unsigned count) 869 + { 870 + unsigned bytes = count * 8; 871 + 872 + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 873 + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 874 + ib->ptr[ib->length_dw++] = bytes - 1; 875 + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 876 + ib->ptr[ib->length_dw++] = lower_32_bits(src); 877 + ib->ptr[ib->length_dw++] = upper_32_bits(src); 878 + ib->ptr[ib->length_dw++] = lower_32_bits(pe); 879 + ib->ptr[ib->length_dw++] = upper_32_bits(pe); 880 + 881 + } 882 + 883 + /** 884 + * sdma_v4_0_vm_write_pte - update PTEs by writing them manually 885 + * 886 + * @ib: indirect buffer to fill with commands 887 + * @pe: addr of the page entry 888 + * @addr: dst addr to write into pe 889 + * @count: number of page entries to update 890 + * @incr: increase next addr by incr bytes 891 + * @flags: access flags 892 + * 893 + * Update PTEs by writing them manually using sDMA (VEGA10). 894 + */ 895 + static void sdma_v4_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, 896 + uint64_t value, unsigned count, 897 + uint32_t incr) 898 + { 899 + unsigned ndw = count * 2; 900 + 901 + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 902 + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 903 + ib->ptr[ib->length_dw++] = lower_32_bits(pe); 904 + ib->ptr[ib->length_dw++] = upper_32_bits(pe); 905 + ib->ptr[ib->length_dw++] = ndw - 1; 906 + for (; ndw > 0; ndw -= 2) { 907 + ib->ptr[ib->length_dw++] = lower_32_bits(value); 908 + ib->ptr[ib->length_dw++] = upper_32_bits(value); 909 + value += incr; 910 + } 911 + } 912 + 913 + /** 914 + * sdma_v4_0_vm_set_pte_pde - update the page tables using sDMA 915 + * 916 + * @ib: indirect buffer to fill with commands 917 + * @pe: addr of the page entry 918 + * @addr: dst addr to write into pe 919 + * @count: number of page entries to update 920 + * @incr: increase next addr by incr bytes 921 + * @flags: access flags 922 + * 923 + * Update the page tables using sDMA (VEGA10). 924 + */ 925 + static void sdma_v4_0_vm_set_pte_pde(struct amdgpu_ib *ib, 926 + uint64_t pe, 927 + uint64_t addr, unsigned count, 928 + uint32_t incr, uint64_t flags) 929 + { 930 + /* for physically contiguous pages (vram) */ 931 + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE); 932 + ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */ 933 + ib->ptr[ib->length_dw++] = upper_32_bits(pe); 934 + ib->ptr[ib->length_dw++] = flags; /* mask */ 935 + ib->ptr[ib->length_dw++] = 0; 936 + ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */ 937 + ib->ptr[ib->length_dw++] = upper_32_bits(addr); 938 + ib->ptr[ib->length_dw++] = incr; /* increment size */ 939 + ib->ptr[ib->length_dw++] = 0; 940 + ib->ptr[ib->length_dw++] = count - 1; /* number of entries */ 941 + } 942 + 943 + /** 944 + * sdma_v4_0_ring_pad_ib - pad the IB to the required number of dw 945 + * 946 + * @ib: indirect buffer to fill with padding 947 + * 948 + */ 949 + static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) 950 + { 951 + struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); 952 + u32 pad_count; 953 + int i; 954 + 955 + pad_count = (8 - (ib->length_dw & 0x7)) % 8; 956 + for (i = 0; i < pad_count; i++) 957 + if (sdma && sdma->burst_nop && (i == 0)) 958 + ib->ptr[ib->length_dw++] = 959 + SDMA_PKT_HEADER_OP(SDMA_OP_NOP) | 960 + SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1); 961 + else 962 + ib->ptr[ib->length_dw++] = 963 + SDMA_PKT_HEADER_OP(SDMA_OP_NOP); 964 + } 965 + 966 + 967 + /** 968 + * sdma_v4_0_ring_emit_pipeline_sync - sync the pipeline 969 + * 970 + * @ring: amdgpu_ring pointer 971 + * 972 + * Make sure all previous operations are completed (CIK). 973 + */ 974 + static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 975 + { 976 + uint32_t seq = ring->fence_drv.sync_seq; 977 + uint64_t addr = ring->fence_drv.gpu_addr; 978 + 979 + /* wait for idle */ 980 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 981 + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | 982 + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ 983 + SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1)); 984 + amdgpu_ring_write(ring, addr & 0xfffffffc); 985 + amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 986 + amdgpu_ring_write(ring, seq); /* reference */ 987 + amdgpu_ring_write(ring, 0xfffffff); /* mask */ 988 + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 989 + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ 990 + } 991 + 992 + 993 + /** 994 + * sdma_v4_0_ring_emit_vm_flush - vm flush using sDMA 995 + * 996 + * @ring: amdgpu_ring pointer 997 + * @vm: amdgpu_vm pointer 998 + * 999 + * Update the page table base and flush the VM TLB 1000 + * using sDMA (VEGA10). 1001 + */ 1002 + static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 1003 + unsigned vm_id, uint64_t pd_addr) 1004 + { 1005 + unsigned eng = ring->idx; 1006 + unsigned i; 1007 + 1008 + pd_addr = pd_addr | 0x1; /* valid bit */ 1009 + /* now only use physical base address of PDE and valid */ 1010 + BUG_ON(pd_addr & 0xFFFF00000000003EULL); 1011 + 1012 + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { 1013 + struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; 1014 + uint32_t req = hub->get_invalidate_req(vm_id); 1015 + 1016 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 1017 + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 1018 + amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2); 1019 + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 1020 + 1021 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 1022 + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 1023 + amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2); 1024 + amdgpu_ring_write(ring, upper_32_bits(pd_addr)); 1025 + 1026 + /* flush TLB */ 1027 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 1028 + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 1029 + amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng); 1030 + amdgpu_ring_write(ring, req); 1031 + 1032 + /* wait for flush */ 1033 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 1034 + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | 1035 + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ 1036 + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); 1037 + amdgpu_ring_write(ring, 0); 1038 + amdgpu_ring_write(ring, 1 << vm_id); /* reference */ 1039 + amdgpu_ring_write(ring, 1 << vm_id); /* mask */ 1040 + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 1041 + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); 1042 + } 1043 + } 1044 + 1045 + static int sdma_v4_0_early_init(void *handle) 1046 + { 1047 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1048 + 1049 + adev->sdma.num_instances = 2; 1050 + 1051 + sdma_v4_0_set_ring_funcs(adev); 1052 + sdma_v4_0_set_buffer_funcs(adev); 1053 + sdma_v4_0_set_vm_pte_funcs(adev); 1054 + sdma_v4_0_set_irq_funcs(adev); 1055 + 1056 + return 0; 1057 + } 1058 + 1059 + 1060 + static int sdma_v4_0_sw_init(void *handle) 1061 + { 1062 + struct amdgpu_ring *ring; 1063 + int r, i; 1064 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1065 + 1066 + /* SDMA trap event */ 1067 + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_SDMA0, 224, 1068 + &adev->sdma.trap_irq); 1069 + if (r) 1070 + return r; 1071 + 1072 + /* SDMA trap event */ 1073 + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_SDMA1, 224, 1074 + &adev->sdma.trap_irq); 1075 + if (r) 1076 + return r; 1077 + 1078 + r = sdma_v4_0_init_microcode(adev); 1079 + if (r) { 1080 + DRM_ERROR("Failed to load sdma firmware!\n"); 1081 + return r; 1082 + } 1083 + 1084 + for (i = 0; i < adev->sdma.num_instances; i++) { 1085 + ring = &adev->sdma.instance[i].ring; 1086 + ring->ring_obj = NULL; 1087 + ring->use_doorbell = true; 1088 + 1089 + DRM_INFO("use_doorbell being set to: [%s]\n", 1090 + ring->use_doorbell?"true":"false"); 1091 + 1092 + ring->doorbell_index = (i == 0) ? 1093 + (AMDGPU_DOORBELL64_sDMA_ENGINE0 << 1) //get DWORD offset 1094 + : (AMDGPU_DOORBELL64_sDMA_ENGINE1 << 1); // get DWORD offset 1095 + 1096 + sprintf(ring->name, "sdma%d", i); 1097 + r = amdgpu_ring_init(adev, ring, 1024, 1098 + &adev->sdma.trap_irq, 1099 + (i == 0) ? 1100 + AMDGPU_SDMA_IRQ_TRAP0 : 1101 + AMDGPU_SDMA_IRQ_TRAP1); 1102 + if (r) 1103 + return r; 1104 + } 1105 + 1106 + return r; 1107 + } 1108 + 1109 + static int sdma_v4_0_sw_fini(void *handle) 1110 + { 1111 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1112 + int i; 1113 + 1114 + for (i = 0; i < adev->sdma.num_instances; i++) 1115 + amdgpu_ring_fini(&adev->sdma.instance[i].ring); 1116 + 1117 + return 0; 1118 + } 1119 + 1120 + static int sdma_v4_0_hw_init(void *handle) 1121 + { 1122 + int r; 1123 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1124 + 1125 + sdma_v4_0_init_golden_registers(adev); 1126 + 1127 + r = sdma_v4_0_start(adev); 1128 + if (r) 1129 + return r; 1130 + 1131 + return r; 1132 + } 1133 + 1134 + static int sdma_v4_0_hw_fini(void *handle) 1135 + { 1136 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1137 + 1138 + sdma_v4_0_ctx_switch_enable(adev, false); 1139 + sdma_v4_0_enable(adev, false); 1140 + 1141 + return 0; 1142 + } 1143 + 1144 + static int sdma_v4_0_suspend(void *handle) 1145 + { 1146 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1147 + 1148 + return sdma_v4_0_hw_fini(adev); 1149 + } 1150 + 1151 + static int sdma_v4_0_resume(void *handle) 1152 + { 1153 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1154 + 1155 + return sdma_v4_0_hw_init(adev); 1156 + } 1157 + 1158 + static bool sdma_v4_0_is_idle(void *handle) 1159 + { 1160 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1161 + u32 i; 1162 + for (i = 0; i < adev->sdma.num_instances; i++) { 1163 + u32 tmp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_STATUS_REG)); 1164 + if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) 1165 + return false; 1166 + } 1167 + 1168 + return true; 1169 + } 1170 + 1171 + static int sdma_v4_0_wait_for_idle(void *handle) 1172 + { 1173 + unsigned i; 1174 + u32 sdma0,sdma1; 1175 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1176 + for (i = 0; i < adev->usec_timeout; i++) { 1177 + sdma0 = RREG32(sdma_v4_0_get_reg_offset(0, mmSDMA0_STATUS_REG)); 1178 + sdma1 = RREG32(sdma_v4_0_get_reg_offset(1, mmSDMA0_STATUS_REG)); 1179 + 1180 + if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK) 1181 + return 0; 1182 + udelay(1); 1183 + } 1184 + return -ETIMEDOUT; 1185 + } 1186 + 1187 + static int sdma_v4_0_soft_reset(void *handle) 1188 + { 1189 + /* todo */ 1190 + 1191 + return 0; 1192 + } 1193 + 1194 + static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev, 1195 + struct amdgpu_irq_src *source, 1196 + unsigned type, 1197 + enum amdgpu_interrupt_state state) 1198 + { 1199 + u32 sdma_cntl; 1200 + 1201 + u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ? 1202 + sdma_v4_0_get_reg_offset(0, mmSDMA0_CNTL) : 1203 + sdma_v4_0_get_reg_offset(1, mmSDMA0_CNTL); 1204 + 1205 + sdma_cntl = RREG32(reg_offset); 1206 + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1207 + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 1208 + WREG32(reg_offset, sdma_cntl); 1209 + 1210 + return 0; 1211 + } 1212 + 1213 + static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, 1214 + struct amdgpu_irq_src *source, 1215 + struct amdgpu_iv_entry *entry) 1216 + { 1217 + DRM_DEBUG("IH: SDMA trap\n"); 1218 + switch (entry->client_id) { 1219 + case AMDGPU_IH_CLIENTID_SDMA0: 1220 + switch (entry->ring_id) { 1221 + case 0: 1222 + amdgpu_fence_process(&adev->sdma.instance[0].ring); 1223 + break; 1224 + case 1: 1225 + /* XXX compute */ 1226 + break; 1227 + case 2: 1228 + /* XXX compute */ 1229 + break; 1230 + case 3: 1231 + /* XXX page queue*/ 1232 + break; 1233 + } 1234 + break; 1235 + case AMDGPU_IH_CLIENTID_SDMA1: 1236 + switch (entry->ring_id) { 1237 + case 0: 1238 + amdgpu_fence_process(&adev->sdma.instance[1].ring); 1239 + break; 1240 + case 1: 1241 + /* XXX compute */ 1242 + break; 1243 + case 2: 1244 + /* XXX compute */ 1245 + break; 1246 + case 3: 1247 + /* XXX page queue*/ 1248 + break; 1249 + } 1250 + break; 1251 + } 1252 + return 0; 1253 + } 1254 + 1255 + static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev, 1256 + struct amdgpu_irq_src *source, 1257 + struct amdgpu_iv_entry *entry) 1258 + { 1259 + DRM_ERROR("Illegal instruction in SDMA command stream\n"); 1260 + schedule_work(&adev->reset_work); 1261 + return 0; 1262 + } 1263 + 1264 + 1265 + static void sdma_v4_0_update_medium_grain_clock_gating( 1266 + struct amdgpu_device *adev, 1267 + bool enable) 1268 + { 1269 + uint32_t data, def; 1270 + 1271 + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { 1272 + /* enable sdma0 clock gating */ 1273 + def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL)); 1274 + data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | 1275 + SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | 1276 + SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | 1277 + SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | 1278 + SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | 1279 + SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | 1280 + SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | 1281 + SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); 1282 + if (def != data) 1283 + WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data); 1284 + 1285 + if (adev->asic_type == CHIP_VEGA10) { 1286 + def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL)); 1287 + data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | 1288 + SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | 1289 + SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK | 1290 + SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK | 1291 + SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK | 1292 + SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | 1293 + SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | 1294 + SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK); 1295 + if(def != data) 1296 + WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data); 1297 + } 1298 + } else { 1299 + /* disable sdma0 clock gating */ 1300 + def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL)); 1301 + data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | 1302 + SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | 1303 + SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | 1304 + SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | 1305 + SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | 1306 + SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | 1307 + SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | 1308 + SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); 1309 + 1310 + if (def != data) 1311 + WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data); 1312 + 1313 + if (adev->asic_type == CHIP_VEGA10) { 1314 + def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL)); 1315 + data |= (SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | 1316 + SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | 1317 + SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK | 1318 + SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK | 1319 + SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK | 1320 + SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | 1321 + SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | 1322 + SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK); 1323 + if (def != data) 1324 + WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data); 1325 + } 1326 + } 1327 + } 1328 + 1329 + 1330 + static void sdma_v4_0_update_medium_grain_light_sleep( 1331 + struct amdgpu_device *adev, 1332 + bool enable) 1333 + { 1334 + uint32_t data, def; 1335 + 1336 + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { 1337 + /* 1-not override: enable sdma0 mem light sleep */ 1338 + def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); 1339 + data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1340 + if (def != data) 1341 + WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); 1342 + 1343 + /* 1-not override: enable sdma1 mem light sleep */ 1344 + if (adev->asic_type == CHIP_VEGA10) { 1345 + def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); 1346 + data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1347 + if (def != data) 1348 + WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data); 1349 + } 1350 + } else { 1351 + /* 0-override:disable sdma0 mem light sleep */ 1352 + def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); 1353 + data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1354 + if (def != data) 1355 + WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); 1356 + 1357 + /* 0-override:disable sdma1 mem light sleep */ 1358 + if (adev->asic_type == CHIP_VEGA10) { 1359 + def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); 1360 + data &= ~SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1361 + if (def != data) 1362 + WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data); 1363 + } 1364 + } 1365 + } 1366 + 1367 + static int sdma_v4_0_set_clockgating_state(void *handle, 1368 + enum amd_clockgating_state state) 1369 + { 1370 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1371 + 1372 + switch (adev->asic_type) { 1373 + case CHIP_VEGA10: 1374 + sdma_v4_0_update_medium_grain_clock_gating(adev, 1375 + state == AMD_CG_STATE_GATE ? true : false); 1376 + sdma_v4_0_update_medium_grain_light_sleep(adev, 1377 + state == AMD_CG_STATE_GATE ? true : false); 1378 + break; 1379 + default: 1380 + break; 1381 + } 1382 + return 0; 1383 + } 1384 + 1385 + static int sdma_v4_0_set_powergating_state(void *handle, 1386 + enum amd_powergating_state state) 1387 + { 1388 + return 0; 1389 + } 1390 + 1391 + const struct amd_ip_funcs sdma_v4_0_ip_funcs = { 1392 + .name = "sdma_v4_0", 1393 + .early_init = sdma_v4_0_early_init, 1394 + .late_init = NULL, 1395 + .sw_init = sdma_v4_0_sw_init, 1396 + .sw_fini = sdma_v4_0_sw_fini, 1397 + .hw_init = sdma_v4_0_hw_init, 1398 + .hw_fini = sdma_v4_0_hw_fini, 1399 + .suspend = sdma_v4_0_suspend, 1400 + .resume = sdma_v4_0_resume, 1401 + .is_idle = sdma_v4_0_is_idle, 1402 + .wait_for_idle = sdma_v4_0_wait_for_idle, 1403 + .soft_reset = sdma_v4_0_soft_reset, 1404 + .set_clockgating_state = sdma_v4_0_set_clockgating_state, 1405 + .set_powergating_state = sdma_v4_0_set_powergating_state, 1406 + }; 1407 + 1408 + static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { 1409 + .type = AMDGPU_RING_TYPE_SDMA, 1410 + .align_mask = 0xf, 1411 + .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 1412 + .support_64bit_ptrs = true, 1413 + .get_rptr = sdma_v4_0_ring_get_rptr, 1414 + .get_wptr = sdma_v4_0_ring_get_wptr, 1415 + .set_wptr = sdma_v4_0_ring_set_wptr, 1416 + .emit_frame_size = 1417 + 6 + /* sdma_v4_0_ring_emit_hdp_flush */ 1418 + 3 + /* sdma_v4_0_ring_emit_hdp_invalidate */ 1419 + 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ 1420 + 36 + /* sdma_v4_0_ring_emit_vm_flush */ 1421 + 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ 1422 + .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ 1423 + .emit_ib = sdma_v4_0_ring_emit_ib, 1424 + .emit_fence = sdma_v4_0_ring_emit_fence, 1425 + .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, 1426 + .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, 1427 + .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, 1428 + .emit_hdp_invalidate = sdma_v4_0_ring_emit_hdp_invalidate, 1429 + .test_ring = sdma_v4_0_ring_test_ring, 1430 + .test_ib = sdma_v4_0_ring_test_ib, 1431 + .insert_nop = sdma_v4_0_ring_insert_nop, 1432 + .pad_ib = sdma_v4_0_ring_pad_ib, 1433 + }; 1434 + 1435 + static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) 1436 + { 1437 + int i; 1438 + 1439 + for (i = 0; i < adev->sdma.num_instances; i++) 1440 + adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs; 1441 + } 1442 + 1443 + static const struct amdgpu_irq_src_funcs sdma_v4_0_trap_irq_funcs = { 1444 + .set = sdma_v4_0_set_trap_irq_state, 1445 + .process = sdma_v4_0_process_trap_irq, 1446 + }; 1447 + 1448 + static const struct amdgpu_irq_src_funcs sdma_v4_0_illegal_inst_irq_funcs = { 1449 + .process = sdma_v4_0_process_illegal_inst_irq, 1450 + }; 1451 + 1452 + static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev) 1453 + { 1454 + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; 1455 + adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs; 1456 + adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs; 1457 + } 1458 + 1459 + /** 1460 + * sdma_v4_0_emit_copy_buffer - copy buffer using the sDMA engine 1461 + * 1462 + * @ring: amdgpu_ring structure holding ring information 1463 + * @src_offset: src GPU address 1464 + * @dst_offset: dst GPU address 1465 + * @byte_count: number of bytes to xfer 1466 + * 1467 + * Copy GPU buffers using the DMA engine (VEGA10). 1468 + * Used by the amdgpu ttm implementation to move pages if 1469 + * registered as the asic copy callback. 1470 + */ 1471 + static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib, 1472 + uint64_t src_offset, 1473 + uint64_t dst_offset, 1474 + uint32_t byte_count) 1475 + { 1476 + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 1477 + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 1478 + ib->ptr[ib->length_dw++] = byte_count - 1; 1479 + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 1480 + ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); 1481 + ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); 1482 + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 1483 + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 1484 + } 1485 + 1486 + /** 1487 + * sdma_v4_0_emit_fill_buffer - fill buffer using the sDMA engine 1488 + * 1489 + * @ring: amdgpu_ring structure holding ring information 1490 + * @src_data: value to write to buffer 1491 + * @dst_offset: dst GPU address 1492 + * @byte_count: number of bytes to xfer 1493 + * 1494 + * Fill GPU buffers using the DMA engine (VEGA10). 1495 + */ 1496 + static void sdma_v4_0_emit_fill_buffer(struct amdgpu_ib *ib, 1497 + uint32_t src_data, 1498 + uint64_t dst_offset, 1499 + uint32_t byte_count) 1500 + { 1501 + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL); 1502 + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 1503 + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 1504 + ib->ptr[ib->length_dw++] = src_data; 1505 + ib->ptr[ib->length_dw++] = byte_count - 1; 1506 + } 1507 + 1508 + static const struct amdgpu_buffer_funcs sdma_v4_0_buffer_funcs = { 1509 + .copy_max_bytes = 0x400000, 1510 + .copy_num_dw = 7, 1511 + .emit_copy_buffer = sdma_v4_0_emit_copy_buffer, 1512 + 1513 + .fill_max_bytes = 0x400000, 1514 + .fill_num_dw = 5, 1515 + .emit_fill_buffer = sdma_v4_0_emit_fill_buffer, 1516 + }; 1517 + 1518 + static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev) 1519 + { 1520 + if (adev->mman.buffer_funcs == NULL) { 1521 + adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs; 1522 + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; 1523 + } 1524 + } 1525 + 1526 + static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = { 1527 + .copy_pte = sdma_v4_0_vm_copy_pte, 1528 + .write_pte = sdma_v4_0_vm_write_pte, 1529 + .set_pte_pde = sdma_v4_0_vm_set_pte_pde, 1530 + }; 1531 + 1532 + static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev) 1533 + { 1534 + unsigned i; 1535 + 1536 + if (adev->vm_manager.vm_pte_funcs == NULL) { 1537 + adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs; 1538 + for (i = 0; i < adev->sdma.num_instances; i++) 1539 + adev->vm_manager.vm_pte_rings[i] = 1540 + &adev->sdma.instance[i].ring; 1541 + 1542 + adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; 1543 + } 1544 + } 1545 + 1546 + const struct amdgpu_ip_block_version sdma_v4_0_ip_block = 1547 + { 1548 + .type = AMD_IP_BLOCK_TYPE_SDMA, 1549 + .major = 4, 1550 + .minor = 0, 1551 + .rev = 0, 1552 + .funcs = &sdma_v4_0_ip_funcs, 1553 + };
+30
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.h
··· 1 + /* 2 + * Copyright 2016 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + 24 + #ifndef __SDMA_V4_0_H__ 25 + #define __SDMA_V4_0_H__ 26 + 27 + extern const struct amd_ip_funcs sdma_v4_0_ip_funcs; 28 + extern const struct amdgpu_ip_block_version sdma_v4_0_ip_block; 29 + 30 + #endif