A Modern GPGPU API & wip linux RDNA2+ Driver
rdna driver linux gpu
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

wip

+52 -13
+41 -10
libvektor/include/vektor/vektor.h
··· 19 19 Transfer 20 20 }; 21 21 22 + enum class Stage { 23 + Transfer, 24 + Compute, 25 + RasterColorOut, 26 + PixelShader, 27 + VertexShader 28 + }; 29 + enum class Signal { 30 + AtomicSet, 31 + AtomicMax, 32 + AtomicOr, 33 + }; 34 + enum class Op { 35 + Never, 36 + Less, 37 + Equal, 38 + }; 39 + 40 + enum HazardFlags { 41 + // @todo 42 + DrawArguments = 1 << 0, 43 + Descriptors = 1 << 1 44 + }; 45 + 46 + enum class Memory { Default, Gpu, Readback }; 47 + 48 + typedef uint64_t gpuptr_t; 22 49 typedef void *Device; 23 50 typedef void *Queue; 24 51 typedef void *CommandList; 25 - 26 - typedef uint64_t gpuptr_t; 27 - 28 - Version version(); 29 - 30 - Device create(); 31 - void destroy(Device); 32 - 33 - enum class Memory { Default, Gpu, Readback }; 52 + typedef void *Semaphore; 34 53 35 54 struct Allocation { 36 55 void *cpu; ··· 40 59 }; 41 60 42 61 62 + Version version(); 63 + 64 + Device create(); 65 + void destroy(Device); 66 + 43 67 Allocation malloc(Device, std::size_t size, Memory memory = Memory::Default); 44 68 Allocation malloc(Device, std::size_t size, std::size_t align, Memory memory = Memory::Default); 45 69 void free(Device, Allocation &); 46 70 71 + Semaphore create_semaphore(uint64_t value); 72 + 47 73 Queue create_queue(Device, QueueType); 48 74 CommandList start_recording(Queue); 49 75 50 - void submit(Queue, CommandList); 76 + void submit(Queue, CommandList, Semaphore = nullptr, uint64_t value = 0); 51 77 52 78 void memset(CommandList, gpuptr_t addr, std::size_t size, uint32_t value); 79 + 80 + void signal_after(CommandList, Stage before, gpuptr_t ptr, uint64_t value, Signal); 81 + void wait_before(CommandList, Stage after, gpuptr_t ptr, uint64_t value, Op, HazardFlags hazard = 0, uint64 mask = ~0); 82 + void wait_semaphore(Semaphore, uint64_t value); 83 + void signal_semaphore(Semaphore, uint64_t value); 53 84 54 85 };
+3 -1
libvektor/src/amdgpu/cmdstream.h
··· 40 40 struct Submission { 41 41 uint32_t start_dw; 42 42 uint32_t end_dw; 43 - amdgpu_cs_fence fence; 43 + uint64_t point; 44 44 }; 45 45 46 46 void wait_for_space(uint32_t target_dw_offset); ··· 53 53 amdgpu_bo_handle m_bo_handle; 54 54 uint64_t m_gpu_va; 55 55 uint32_t* m_cpu_map; 56 + 57 + uint64_t m_timeline_counter = 0; 56 58 57 59 uint32_t m_write_cursor_dw = 0; 58 60 std::deque<Submission> m_history;
+4
libvektor/src/vektor_impl.h
··· 32 32 CommandStream cs; 33 33 }; 34 34 35 + struct SemaphoreImpl { 36 + 37 + }; 38 + 35 39 }
+4 -2
libvektor/src/vektor_queue.cpp
··· 48 48 return cl; 49 49 } 50 50 51 - void submit(Queue pq, CommandList pcl) { 51 + void submit(Queue pq, CommandList pcl, Semaphore sem, uint64_t value) { 52 52 auto *queue = (QueueImpl *)pq; 53 53 auto *cl = (CommandListImpl *)pcl; 54 54 assert(cl->queue == queue, "submit: commandlist from foreign queue"); 55 55 56 - queue->cmd_ring->submit(cl->cs); 56 + auto *semaphore = (SemaphoreImpl *)sem; 57 + 58 + queue->cmd_ring->submit(cl->cs, semaphore, value); 57 59 58 60 // @todo: to free commandlist, we want to be sure that it is no longer mapped and stuff. 59 61 // then, we can freely-free it. But i think this needs some deferred-cleanup, as