A Modern GPGPU API & wip linux RDNA2+ Driver
rdna driver linux gpu
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

amdgpu: compiler cleanup

+31 -27
+31 -27
drivers/amdgpu/compiler/compiler.cpp
··· 277 277 return (RDNA2Assembler::ssrc)((uint)RDNA2Assembler::ssrc::sgpr0 + inst.meta.phys_reg); 278 278 } 279 279 280 + // some operators are commutative but don't allow certain values in a given place. 281 + // Specifically, vsrc1 MUST be a VGPR, src0 can be anything (SGPR, VGPR, const). 282 + struct VOP2Operands { 283 + RDNA2Assembler::vsrc src0; 284 + uint8_t src1; 285 + }; 286 + VOP2Operands vop2_order(Compiler &cc, const Value &a, const Value &b) { 287 + auto& op0 = cc.mod.deref(a); 288 + auto& op1 = cc.mod.deref(b); 289 + 290 + bool op0_is_vgpr = !op0.meta.is_uniform && op0.op != gir::Op::Const; 291 + bool op1_is_vgpr = !op1.meta.is_uniform && op1.op != gir::Op::Const; 292 + 293 + if (!op0_is_vgpr && !op1_is_vgpr) { 294 + not_implemented("codegen: vop2_order requires at least one VGPR operand"); 295 + } 296 + 297 + if (op0_is_vgpr) { 298 + return VOP2Operands{get_vsrc(cc, b), (uint8_t)op0.meta.phys_reg}; 299 + } else { 300 + return VOP2Operands{get_vsrc(cc, a), (uint8_t)op1.meta.phys_reg}; 301 + } 302 + } 303 + 280 304 void codegen(Compiler &cc) { 281 305 for (auto &inst : cc.mod.insts) { 282 306 switch (inst.op) { ··· 366 390 ); 367 391 } else { 368 392 allocate_vgpr(cc, inst); 369 - // vsrc1 MUST be a VGPR, src0 can be anything (SGPR, VGPR, const) 370 - auto& op0 = cc.mod.deref(inst.operands[0]); 371 - auto& op1 = cc.mod.deref(inst.operands[1]); 393 + auto ops = vop2_order(cc, inst.operands[0], inst.operands[1]); 372 394 373 - // Ensure VGPR is in vsrc1 position by swapping if needed 374 - bool op0_is_vgpr = !op0.meta.is_uniform && op0.op != gir::Op::Const; 375 - bool op1_is_vgpr = !op1.meta.is_uniform && op1.op != gir::Op::Const; 376 - 377 - if (!op0_is_vgpr && !op1_is_vgpr) { 378 - not_implemented("codegen: v_add_nc_u32 requires at least one VGPR operand"); 379 - } 380 - 381 - // Swap so VGPR is always in vsrc1 position 382 - if (op0_is_vgpr && !op1_is_vgpr) { 383 - cc.as.vop2( 384 - RDNA2Assembler::vop2_opcode::v_add_nc_u32, 385 - inst.meta.phys_reg, 386 - get_vsrc(cc, inst.operands[1]), // src0: can be const/sgpr 387 - op0.meta.phys_reg // vsrc1: VGPR 388 - ); 389 - } else { 390 - cc.as.vop2( 391 - RDNA2Assembler::vop2_opcode::v_add_nc_u32, 392 - inst.meta.phys_reg, 393 - get_vsrc(cc, inst.operands[0]), // src0: can be const/sgpr/vgpr 394 - op1.meta.phys_reg // vsrc1: VGPR 395 - ); 396 - } 395 + cc.as.vop2( 396 + RDNA2Assembler::vop2_opcode::v_add_nc_u32, 397 + inst.meta.phys_reg, 398 + ops.src0, 399 + ops.src1 400 + ); 397 401 } 398 402 } else if (inst.type == gir::Type::Ptr) { 399 403 not_implemented("codegen: pointer addition (64-bit) not yet implemented");