A Modern GPGPU API & wip linux RDNA2+ Driver
rdna driver linux gpu
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

amdgpu: cleanup

+30 -12
+18 -3
drivers/amdgpu/cmds.cpp
··· 296 296 void init_compute_shader_config(DeviceImpl *dev, Shader &shader) { 297 297 298 298 // @todo: ultra temporary. 299 - auto x = amdgpu_malloc(dev, 1024, 16, KesMemoryDefault); 300 - *((uint32_t *)x.cpu) = 0xBF810000; // s_endpgm 299 + auto x = amdgpu_malloc(dev, 1024, 256, KesMemoryDefault); 300 + uint32_t *pgm = (uint32_t *)x.cpu; 301 + auto pgmidx = 0; 302 + // pgm[pgmidx++] = 0x24020402; // v_lshlrev_b32 v1, 2, v0 303 + // pgm[pgmidx++] = 0x4A020300; // v_add_co_u32 v1, vcc_lo, s0, v1 304 + // pgm[pgmidx++] = 0x4A040201; // v_add_co_ci_u32 v2, vcc_lo, s1, 0, vcc_lo 305 + // pgm[pgmidx++] = 0xDC500000; // global_store_dword v[1:2], v0, off 306 + // pgm[pgmidx++] = 0x00000001; 307 + pgm[pgmidx++] = 0xBF810000; // s_endpgm 308 + 309 + // (RDNA ISA Ref. 2.5) 310 + for (auto i = 0; i < 64; ++i) { 311 + pgm[pgmidx++] = 0xBF9F0000; // s_code_end 312 + } 313 + 314 + log("shader code: {} {}", (void *)x.cpu, (void *)x.gpu); 301 315 302 316 // @todo: temporary 303 317 auto wave_size = 32; ··· 334 348 335 349 shader.config.pgm_rsrc1 = 336 350 S_00B848_VGPRS((num_vgprs - 1) / (wave_size == 32 ? 8 : 4)) 337 - | S_00B848_DX10_CLAMP(dx10_clamp); 351 + | S_00B848_DX10_CLAMP(dx10_clamp) 352 + | S_00B128_MEM_ORDERED(true); //always true for gfx10.3 338 353 339 354 shader.config.pgm_rsrc2 = 340 355 S_00B84C_USER_SGPR(shader.config.user_sgpr_count)
+12 -9
test/examples/07_hello_dispatch/hello_dispatch.cpp
··· 4 4 #include <stdio.h> 5 5 6 6 struct DispatchArguments { 7 - uint64_t va; 8 - uint32_t size; 7 + uint64_t buffer; 9 8 }; 10 9 11 10 int main(void) { 12 - 13 - auto size = 10 * 1024 * 1024; 14 - 15 11 auto dev = kes_create(); 16 12 17 - auto x = kes_malloc(dev, size, 4, KesMemoryDefault); 13 + auto x = kes_malloc(dev, 1024, 4, KesMemoryDefault); 18 14 auto y = kes_malloc(dev, sizeof(DispatchArguments), 8, KesMemoryDefault); 15 + 16 + printf("x: %p %p\n", (void *)x.cpu, (void *)x.gpu); 17 + printf("y: %p %p\n", (void *)y.cpu, (void *)y.gpu); 19 18 20 19 DispatchArguments *args = (DispatchArguments *)y.cpu; 21 - args->va = x.gpu; 22 - args->size = size; 20 + args->buffer = x.gpu; 23 21 24 22 auto compute = kes_create_queue(dev, KesQueueTypeCompute); 25 23 26 24 auto cl = kes_start_recording(compute); 27 25 { 28 - kes_cmd_dispatch(cl, y.gpu, 128, 1, 1); 26 + kes_cmd_dispatch(cl, y.gpu, 32, 1, 1); 29 27 } 30 28 31 29 kes_submit(compute, cl); 32 30 33 31 sleep(1); 32 + 33 + printf("x[0]: %u\n", ((uint32_t *)x.cpu)[0]); 34 + printf("x[1]: %u\n", ((uint32_t *)x.cpu)[1]); 35 + printf("x[2]: %u\n", ((uint32_t *)x.cpu)[2]); 36 + printf("x[3]: %u\n", ((uint32_t *)x.cpu)[3]); 34 37 35 38 kes_free(dev, &x); 36 39 kes_destroy(dev);