A Modern GPGPU API & wip linux RDNA2+ Driver
rdna driver linux gpu
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

gir: moved base-code to common

+305 -103
+9 -1
drivers/CMakeLists.txt
··· 1 + file(GLOB_RECURSE COMMON_DRIVER_SOURCES CONFIGURE_DEPENDS 2 + "${CMAKE_CURRENT_SOURCE_DIR}/common/*.cpp" 3 + ) 4 + add_library(kes_driver_common STATIC ${COMMON_DRIVER_SOURCES}) 5 + target_include_directories(kes_driver_common PUBLIC 6 + common 7 + ) 8 + 1 9 function(add_kestrel_driver DRIVER_NAME) 2 10 set(TARGET_NAME "kes_${DRIVER_NAME}") 3 11 ··· 17 25 target_include_directories(${TARGET_NAME} PRIVATE 18 26 ${PROJECT_SOURCE_DIR}/kestrel/include 19 27 ${DRIVER_NAME} 20 - common 21 28 ) 22 29 23 30 target_compile_options(${TARGET_NAME} PRIVATE ··· 40 47 ) 41 48 42 49 target_link_libraries(${TARGET_NAME} PRIVATE 50 + kes_driver_common 43 51 fmt::fmt 44 52 libdrm::libdrm 45 53 ${ARGN}
+10 -6
drivers/amdgpu/cmds.cpp
··· 1 + #include "gir/gir.h" 1 2 #include "compiler/compiler.h" 2 - #include "compiler/gir.h" 3 3 #include "cp_encoder.h" 4 4 #include "gpuinfo.h" 5 5 #include "kestrel/kestrel.h" ··· 299 299 void init_compute_shader_config(DeviceImpl *dev, Shader &shader) { 300 300 301 301 // @todo: ultra temporary. 302 - auto x = amdgpu_malloc(dev, 1024, 256, KesMemoryDefault); 302 + auto alloc = amdgpu_malloc(dev, 1024, 256, KesMemoryDefault); 303 303 304 304 { 305 - gir::IRModule mod; 305 + gir::Module mod; 306 306 gir::Builder gb(mod); 307 + auto rp = gb.get_root_ptr(); 308 + auto x = gb.load(rp, gb.mul(gb.i32(4), gb.get_thread_id_x())); 309 + auto sum = gb.add(x, gb.i32(15)); 310 + gb.store(rp, sum, x); 307 311 308 - gir::rdna2_compile(mod, x.cpu, x.gpu); 312 + rdna2_compile(mod, alloc.cpu, alloc.gpu); 309 313 } 310 314 311 - log("shader code: {} {}", (void *)x.cpu, (void *)x.gpu); 315 + log("shader code: {} {}", (void *)alloc.cpu, (void *)alloc.gpu); 312 316 313 317 // @todo: temporary 314 318 auto ordered = false; ··· 336 340 shader.info.block_size[0] = 32; 337 341 shader.info.block_size[1] = 1; 338 342 shader.info.block_size[2] = 1; 339 - shader.va = x.gpu; 343 + shader.va = alloc.gpu; 340 344 shader.info.hw_stage = HwStage::Compute; 341 345 342 346 // use large limits.
+9 -8
drivers/amdgpu/compiler/compiler.cpp
··· 1 1 #include "compiler.h" 2 2 #include "rdna2_asm.h" 3 - #include "gir.h" 3 + #include "gir/gir.h" 4 4 5 5 #include <sstream> 6 6 #include <iomanip> 7 7 #include <string> 8 8 #include <fstream> 9 9 10 - namespace gir { 10 + using namespace gir; 11 11 12 12 /* 13 13 * ··· 24 24 // @todo: this is obviously very early stage wip... 25 25 26 26 struct Compiler { 27 - IRModule& mod; 27 + gir::Module& mod; 28 28 RDNA2Assembler as; 29 29 }; 30 30 ··· 33 33 void allocate_registers(Compiler &); 34 34 void codegen(Compiler &); 35 35 36 - void rdna2_compile(IRModule &mod, void *write_ptr, uint64_t base_addr) { 36 + void rdna2_compile(gir::Module &mod, void *write_ptr, uint64_t base_addr) { 37 37 Compiler compiler(mod); 38 38 39 39 analyze_liveness(compiler); ··· 62 62 } 63 63 } 64 64 65 + // @todo: stuff like this is pretty general. 65 66 void analyze_liveness(Compiler &cc) { 66 67 for (uint32_t i = 0; i < cc.mod.insts.size(); ++i) { 67 - for (auto arg : cc.mod.insts[i].args) { 68 - if (arg.id != 0xFFFFFFFF) cc.mod.values[arg.id].last_use = i; 68 + for (auto arg : cc.mod.insts[i].operands) { 69 + if (arg.id != 0xFFFFFFFF) cc.mod.insts[arg.id].meta.last_use = i; 69 70 } 70 71 } 71 72 } 72 73 73 74 void analyze_uniformity(Compiler &cc) { 74 75 // Simple propagation: Root ptr is uniform. 76 + /* 75 77 for (auto& inst : cc.mod.insts) { 76 78 bool divergent = false; 77 79 for (auto arg : inst.args) { ··· 80 82 if (inst.op == LOAD_GLOBAL) divergent = true; // Memory reads are divergent 81 83 if (inst.dest.id != 0xFFFFFFFF) cc.mod.values[inst.dest.id].is_uniform = !divergent; 82 84 } 85 + */ 83 86 } 84 87 85 88 void allocate_registers(Compiler &cc) { ··· 124 127 cc.as.sopp(RDNA2Assembler::sopp_opcode::s_code_end, 0); 125 128 } 126 129 } 127 - 128 - }
+2 -5
drivers/amdgpu/compiler/compiler.h
··· 1 1 #pragma once 2 2 3 - #include "gir.h" 4 - namespace gir { 3 + #include "gir/gir.h" 5 4 6 - void rdna2_compile(IRModule &mod, void *write_ptr, uint64_t base_addr); 7 - 8 - } 5 + void rdna2_compile(gir::Module &mod, void *write_ptr, uint64_t base_addr);
-56
drivers/amdgpu/compiler/gir.h
··· 1 - #pragma once 2 - 3 - #include "rdna2_asm.h" 4 - #include <cstdint> 5 - #include <vector> 6 - 7 - namespace gir { 8 - enum class Type { 9 - Int, Addr, 10 - }; 11 - 12 - enum Op { ADD, SUB, LOAD_GLOBAL, STORE_GLOBAL, GET_ROOT_PTR, V_MOV_S2V, LOAD_ROOT_PTR }; 13 - 14 - struct Ref { uint32_t id; }; 15 - 16 - struct Inst { 17 - Op op; 18 - Ref dest; 19 - std::vector<Ref> args; 20 - uint32_t imm = 0; 21 - }; 22 - 23 - struct ValueMeta { 24 - bool is_uniform = false; 25 - Type type; 26 - uint32_t phys_reg = 0xFFFFFFFF; 27 - uint32_t last_use = 0; 28 - }; 29 - 30 - class IRModule { 31 - public: 32 - std::vector<ValueMeta> values; 33 - std::vector<Inst> insts; 34 - 35 - inline Ref make_value(Type type) { 36 - values.push_back({false, type, 0xFFFFFFFF, 0}); 37 - return { (uint32_t)values.size() - 1 }; 38 - } 39 - }; 40 - 41 - class Builder { 42 - public: 43 - Builder(IRModule& m) : mod(m) {} 44 - 45 - Ref iadd(Ref a, Ref b); 46 - 47 - Ref load_root_ptr(); 48 - 49 - Ref load_global(Ref addr, uint32_t offset); 50 - 51 - void store_global(Ref addr, Ref data, uint32_t offset); 52 - private: 53 - IRModule& mod; 54 - }; 55 - 56 - };
-27
drivers/amdgpu/compiler/gir_builder.cpp
··· 1 - #include "gir.h" 2 - 3 - namespace gir { 4 - 5 - Ref Builder::iadd(Ref a, Ref b) { 6 - Ref dst = mod.make_value(Type::Int); 7 - mod.insts.push_back({ADD, dst, {a, b}}); 8 - return dst; 9 - } 10 - 11 - Ref Builder::load_root_ptr() { 12 - Ref dst = mod.make_value(Type::Addr); 13 - mod.insts.push_back({LOAD_ROOT_PTR, dst, {}}); 14 - return dst; 15 - } 16 - 17 - Ref Builder::load_global(Ref addr, uint32_t offset) { 18 - Ref dst = mod.make_value(Type::Int); 19 - mod.insts.push_back({LOAD_GLOBAL, dst, {addr}, offset}); 20 - return dst; 21 - } 22 - 23 - void Builder::store_global(Ref addr, Ref data, uint32_t offset) { 24 - mod.insts.push_back({STORE_GLOBAL, {0xFFFFFFFF}, {addr, data}, offset}); 25 - } 26 - 27 - }
+128
drivers/common/gir/gir.h
··· 1 + #pragma once 2 + 3 + #include <cstdint> 4 + #include <vector> 5 + 6 + namespace gir { 7 + 8 + enum class Type { 9 + Void, 10 + I32, 11 + F32, 12 + Ptr, 13 + }; 14 + 15 + struct Value { 16 + uint32_t id; 17 + 18 + bool is_inst() const { return id != ~0u; } 19 + }; 20 + 21 + enum class Op { 22 + Add, 23 + Sub, 24 + Mul, 25 + Div, 26 + Mod, 27 + 28 + FAdd, 29 + FSub, 30 + FMul, 31 + FDiv, 32 + 33 + And, 34 + Or, 35 + Xor, 36 + Shl, 37 + Shr, 38 + 39 + Eq, 40 + Ne, 41 + Lt, 42 + Le, 43 + Gt, 44 + Ge, 45 + 46 + Load, 47 + Store, 48 + 49 + Const, 50 + 51 + GetRootPtr, 52 + 53 + GetThreadIdX, 54 + GetThreadIdY, 55 + GetThreadIdZ, 56 + GetWorkgroupIdX, 57 + GetWorkgroupIdY, 58 + GetWorkgroupIdZ, 59 + 60 + BackendIntrinsic, 61 + }; 62 + 63 + using BackendIntrinsicId = uint32_t; 64 + 65 + struct Inst { 66 + Op op; 67 + Type type; 68 + std::vector<Value> operands; 69 + 70 + // only for BackendIntrinsic 71 + BackendIntrinsicId intrinsic_id; 72 + 73 + union { 74 + int64_t imm_i64; 75 + } data; 76 + 77 + struct { 78 + bool is_uniform = false; 79 + uint32_t phys_reg = ~0u; 80 + uint32_t last_use = 0xFFFFFFFF; 81 + } meta; 82 + }; 83 + 84 + class Module { 85 + public: 86 + std::vector<Inst> insts; 87 + 88 + Value emit(Inst inst) { 89 + uint32_t id = insts.size(); 90 + insts.push_back(inst); 91 + return Value{id}; 92 + } 93 + }; 94 + 95 + class Builder { 96 + public: 97 + Builder(Module& m) : mod(m) {} 98 + 99 + Value i32(int32_t imm); 100 + Value f32(float f); 101 + 102 + Value add(Value a, Value b); 103 + Value sub(Value a, Value b); 104 + Value mul(Value a, Value b); 105 + 106 + Value fadd(Value a, Value b); 107 + Value fmul(Value a, Value b); 108 + 109 + Value eq(Value a, Value b); 110 + Value lt(Value a, Value b); 111 + 112 + Value load(Value addr, Value offset); 113 + void store(Value addr, Value data, Value offset); 114 + 115 + Value get_root_ptr(); 116 + 117 + Value get_thread_id_x(); 118 + Value get_thread_id_y(); 119 + 120 + Value get_workgroup_id_x(); 121 + Value get_workgroup_id_y(); 122 + Value get_workgroup_id_z(); 123 + 124 + protected: 125 + Module& mod; 126 + }; 127 + 128 + };
+147
drivers/common/gir/gir_builder.cpp
··· 1 + #include "gir.h" 2 + 3 + #include <cstring> 4 + 5 + namespace gir { 6 + 7 + Value Builder::i32(int32_t imm) { 8 + return mod.emit(Inst{ 9 + .op = Op::Const, 10 + .type = Type::I32, 11 + .operands = {}, 12 + .data = {.imm_i64 = imm} 13 + }); 14 + } 15 + 16 + Value Builder::f32(float f) { 17 + uint32_t bits; 18 + memcpy(&bits, &f, sizeof(float)); 19 + return mod.emit(Inst{ 20 + .op = Op::Const, 21 + .type = Type::F32, 22 + .operands = {}, 23 + .data = {.imm_i64 = (int64_t)bits} 24 + }); 25 + } 26 + 27 + Value Builder::add(Value a, Value b) { 28 + return mod.emit(Inst{ 29 + .op = Op::Add, 30 + .type = Type::I32, 31 + .operands = {a, b} 32 + }); 33 + } 34 + 35 + Value Builder::sub(Value a, Value b) { 36 + return mod.emit(Inst{ 37 + .op = Op::Sub, 38 + .type = Type::I32, 39 + .operands = {a, b} 40 + }); 41 + } 42 + 43 + Value Builder::mul(Value a, Value b) { 44 + return mod.emit(Inst{ 45 + .op = Op::Mul, 46 + .type = Type::I32, 47 + .operands = {a, b} 48 + }); 49 + } 50 + 51 + Value Builder::fadd(Value a, Value b) { 52 + return mod.emit(Inst{ 53 + .op = Op::FAdd, 54 + .type = Type::F32, 55 + .operands = {a, b} 56 + }); 57 + } 58 + 59 + Value Builder::fmul(Value a, Value b) { 60 + return mod.emit(Inst{ 61 + .op = Op::FMul, 62 + .type = Type::F32, 63 + .operands = {a, b} 64 + }); 65 + } 66 + 67 + Value Builder::eq(Value a, Value b) { 68 + return mod.emit(Inst{ 69 + .op = Op::Eq, 70 + .type = Type::I32, 71 + .operands = {a, b} 72 + }); 73 + } 74 + 75 + Value Builder::lt(Value a, Value b) { 76 + return mod.emit(Inst{ 77 + .op = Op::Lt, 78 + .type = Type::I32, 79 + .operands = {a, b} 80 + }); 81 + } 82 + 83 + Value Builder::load(Value addr, Value offset) { 84 + return mod.emit(Inst{ 85 + .op = Op::Load, 86 + .type = Type::I32, 87 + .operands = {addr, offset}, 88 + }); 89 + } 90 + 91 + void Builder::store(Value addr, Value data, Value offset) { 92 + mod.emit(Inst{ 93 + .op = Op::Store, 94 + .type = Type::Void, 95 + .operands = {addr, data, offset}, 96 + }); 97 + } 98 + 99 + Value Builder::get_root_ptr() { 100 + return mod.emit(Inst{ 101 + .op = Op::GetRootPtr, 102 + .type = Type::Ptr, 103 + .operands = {} 104 + }); 105 + } 106 + 107 + Value Builder::get_thread_id_x() { 108 + return mod.emit(Inst{ 109 + .op = Op::GetThreadIdX, 110 + .type = Type::I32, 111 + .operands = {} 112 + }); 113 + } 114 + 115 + Value Builder::get_thread_id_y() { 116 + return mod.emit(Inst{ 117 + .op = Op::GetThreadIdY, 118 + .type = Type::I32, 119 + .operands = {} 120 + }); 121 + } 122 + 123 + Value Builder::get_workgroup_id_x() { 124 + return mod.emit(Inst{ 125 + .op = Op::GetWorkgroupIdX, 126 + .type = Type::I32, 127 + .operands = {} 128 + }); 129 + } 130 + 131 + Value Builder::get_workgroup_id_y() { 132 + return mod.emit(Inst{ 133 + .op = Op::GetWorkgroupIdY, 134 + .type = Type::I32, 135 + .operands = {} 136 + }); 137 + } 138 + 139 + Value Builder::get_workgroup_id_z() { 140 + return mod.emit(Inst{ 141 + .op = Op::GetWorkgroupIdZ, 142 + .type = Type::I32, 143 + .operands = {} 144 + }); 145 + } 146 + 147 + }