···86868787 return bytes_written;
8888}
8989+9090+uint64_t SDMAEncoder::copy_linear(uint64_t src_va, uint64_t dst_va, uint64_t size, bool tmz) {
9191+ const unsigned max_size_per_packet =
9292+ info.sdma_version >= SDMAVersion::SDMA_5_2 ? SDMA_V5_2_COPY_MAX_BYTES : SDMA_V2_0_COPY_MAX_BYTES;
9393+ uint32_t align = ~0u;
9494+9595+ /* SDMA FW automatically enables a faster dword copy mode when
9696+ * source, destination and size are all dword-aligned.
9797+ *
9898+ * When source and destination are dword-aligned, round down the size to
9999+ * take advantage of faster dword copy, and copy the remaining few bytes
100100+ * with the last copy packet.
101101+ */
102102+ if ((src_va & 0x3) == 0 && (dst_va & 0x3) == 0 && size > 4 && (size & 0x3) != 0) {
103103+ align = ~0x3u;
104104+ }
105105+106106+ const uint64_t bytes_written = size >= 4 ? MIN2(size & align, max_size_per_packet) : size;
107107+108108+ cs.emit(SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, (tmz ? 4 : 0)));
109109+ cs.emit(info.sdma_version >= SDMAVersion::SDMA_4_0 ? bytes_written - 1 : bytes_written);
110110+ cs.emit(0);
111111+ cs.emit(src_va);
112112+ cs.emit(src_va >> 32);
113113+ cs.emit(dst_va);
114114+ cs.emit(dst_va >> 32);
115115+116116+ return bytes_written;
117117+}
+1
drivers/amdgpu/sdma_encoder.h
···30303131 // returns the number of bytes written; may need to be repeated.
3232 uint64_t constant_fill(uint64_t va, uint64_t size, uint32_t value);
3333+ uint64_t copy_linear(uint64_t src_va, uint64_t dst_va, uint64_t size, bool tmz);
3334private:
3435 GpuInfo &info;
3536 CommandStream &cs;