Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amdkfd: gfx12.1 trap handler instruction fixup for VOP3PX

A trap may occur in the middle of VOP3PX instruction co-issue.
The PC would be restored incorrectly if left unmodified.

Identify this case by examining the instruction opcode and
rewind the PC 8 bytes if it occurs.

Signed-off-by: Jay Cornwall <jay.cornwall@amd.com>
Reviewed-by: Lancelot Six <lancelot.six@amd.com>
Reviewed-by: Vladimir Indic <vladimir.indic@amd.com>
Cc: Shweta Khatri <shweta.khatri@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Jay Cornwall and committed by
Alex Deucher
05762d9c 1197366c

+123 -103
+105 -96
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
··· 4587 4587 }; 4588 4588 4589 4589 static const uint32_t cwsr_trap_gfx12_1_0_hex[] = { 4590 - 0xbfa00001, 0xbfa003ac, 4590 + 0xbfa00001, 0xbfa003be, 4591 4591 0xb0804009, 0xb8f8f804, 4592 4592 0x9178ff78, 0x00008c00, 4593 4593 0xb8fbf811, 0x8b6eff78, 4594 4594 0x00004000, 0xbfa10008, 4595 4595 0x8b6eff7b, 0x00000080, 4596 4596 0xbfa20018, 0x8b6ea07b, 4597 - 0xbfa200d1, 0xbf830010, 4597 + 0xbfa200da, 0xbf830010, 4598 4598 0xb8fbf811, 0xbfa0fffb, 4599 4599 0x8b6eff7b, 0x00000bd0, 4600 4600 0xbfa20010, 0xb8eef812, ··· 4605 4605 0xf0000000, 0xbfa20005, 4606 4606 0x8b6fff6f, 0x00000200, 4607 4607 0xbfa20002, 0x8b6ea07b, 4608 - 0xbfa200bb, 0x9177ff77, 4608 + 0xbfa200c4, 0x9177ff77, 4609 4609 0x007fc000, 0xb8fa04a1, 4610 4610 0x847a967a, 0x8c777a77, 4611 4611 0xb8fa0421, 0x847a957a, ··· 4632 4632 0xbfa00002, 0x806c846c, 4633 4633 0x826d806d, 0x8b6dff6d, 4634 4634 0x01ffffff, 0xb8fbf811, 4635 - 0xbf0d847b, 0xbfa20078, 4635 + 0xbf0d847b, 0xbfa20081, 4636 4636 0xf4003eb6, 0xf8000000, 4637 4637 0xbfc70000, 0xf4003bb6, 4638 4638 0xf8000008, 0x8b76ff7a, 4639 4639 0x80000000, 0xbfa20027, 4640 4640 0x9376ff7a, 0x00060019, 4641 4641 0x81f9a376, 0xbf0b8179, 4642 - 0xbfa20068, 0x81f9ac76, 4643 - 0xbf0b8179, 0xbfa20062, 4642 + 0xbfa2006e, 0x81f9ac76, 4643 + 0xbf0b8179, 0xbfa20068, 4644 4644 0x81f9b776, 0xbf0b8179, 4645 - 0xbfa2005f, 0x8b76ff7a, 4645 + 0xbfa20065, 0x8b76ff7a, 4646 4646 0x000001ff, 0xbf06ff76, 4647 - 0x000000fe, 0xbfa2005d, 4647 + 0x000000fe, 0xbfa20063, 4648 4648 0xbf06ff76, 0x000000ff, 4649 - 0xbfa20057, 0xbf06ff76, 4650 - 0x000000fa, 0xbfa20054, 4649 + 0xbfa2005d, 0xbf06ff76, 4650 + 0x000000fa, 0xbfa2005a, 4651 4651 0x81f9ff76, 0x000000e9, 4652 - 0xbf0b8179, 0xbfa20050, 4652 + 0xbf0b8179, 0xbfa20056, 4653 4653 0x8b76ff7b, 0xffff0000, 4654 4654 0xbf06ff76, 0xbf860000, 4655 - 0xbfa10051, 0x9376ff7b, 4655 + 0xbfa1005a, 0x9376ff7b, 4656 4656 0x0002000e, 0x8b79ff7b, 4657 4657 0x00003f00, 0x85798679, 4658 4658 0x8c767976, 0xb9763b01, 4659 - 0xbfa00049, 0x8b76ff7a, 4659 + 0xbfa00052, 0x8b76ff7a, 4660 4660 0xfc000000, 0xbf06ff76, 4661 - 0xd4000000, 0xbfa20013, 4661 + 0xd4000000, 0xbfa20019, 4662 4662 0xbf06ff76, 0xc8000000, 4663 - 0xbfa20027, 0x8b76ff7a, 4663 + 0xbfa2002d, 0x8b76ff7a, 4664 4664 0xff000000, 0xbf06ff76, 4665 - 0xcf000000, 0xbfa20039, 4665 + 0xcf000000, 0xbfa2003f, 4666 4666 0x8b79ff7a, 0xffff0000, 4667 + 0xbf06ff79, 0xcc330000, 4668 + 0xbfa2003d, 0xbf06ff79, 4669 + 0xcc880000, 0xbfa2003a, 4667 4670 0xbf06ff79, 0xcc350000, 4668 - 0xbfa20037, 0xbf06ff79, 4669 - 0xcc3a0000, 0xbfa20034, 4671 + 0xbfa2003a, 0xbf06ff79, 4672 + 0xcc3a0000, 0xbfa20037, 4670 4673 0xbf06ff76, 0xcc000000, 4671 - 0xbfa10031, 0x8b76ff7b, 4674 + 0xbfa10034, 0x8b76ff7b, 4672 4675 0x000001ff, 0xbf06ff76, 4673 4676 0x000000ff, 0xbfa20029, 4674 4677 0xbf06ff76, 0x000000fa, ··· 4694 4691 0x000001ff, 0xbf06ff76, 4695 4692 0x000000ff, 0xbfa20003, 4696 4693 0xbfc70000, 0xbefb006e, 4697 - 0xbfa0ffad, 0xbfc70000, 4698 - 0xbefb006f, 0xbfa0ffaa, 4699 - 0xbfc70000, 0x857a9677, 4700 - 0xb97a04a1, 0x857a9577, 4701 - 0xb97a0421, 0x857a8e77, 4702 - 0xb97a3021, 0x8bfe7e7e, 4703 - 0x8bea6a6a, 0x85788978, 4704 - 0xb9783244, 0xbe804a6c, 4705 - 0xb8faf802, 0xbf0d987a, 4706 - 0xbfa10001, 0xbfb00000, 4707 - 0x8b6dff6d, 0x01ffffff, 4708 - 0xbefa0080, 0xb97a0151, 4709 - 0x9177ff77, 0x007fc000, 4710 - 0xb8fa04a1, 0x847a967a, 4711 - 0x8c777a77, 0xb8fa0421, 4712 - 0x847a957a, 0x8c777a77, 4713 - 0xb8fa3021, 0x847a8e7a, 4714 - 0x8c777a77, 0xb980f821, 4715 - 0x00000000, 0xbf0d847b, 4716 - 0xbfa20078, 0xf4003eb6, 4717 - 0xf8000000, 0xbfc70000, 4718 - 0xf4003bb6, 0xf8000008, 4719 - 0x8b76ff7a, 0x80000000, 4720 - 0xbfa20027, 0x9376ff7a, 4721 - 0x00060019, 0x81f9a376, 4694 + 0xbfa0ffa7, 0xbfc70000, 4695 + 0xbefb006f, 0xbfa0ffa4, 4696 + 0x80ec886c, 0x82ed806d, 4697 + 0xbfa0fff7, 0xbfc70000, 4698 + 0x857a9677, 0xb97a04a1, 4699 + 0x857a9577, 0xb97a0421, 4700 + 0x857a8e77, 0xb97a3021, 4701 + 0x8bfe7e7e, 0x8bea6a6a, 4702 + 0x85788978, 0xb9783244, 4703 + 0xbe804a6c, 0xb8faf802, 4704 + 0xbf0d987a, 0xbfa10001, 4705 + 0xbfb00000, 0x8b6dff6d, 4706 + 0x01ffffff, 0xbefa0080, 4707 + 0xb97a0151, 0x9177ff77, 4708 + 0x007fc000, 0xb8fa04a1, 4709 + 0x847a967a, 0x8c777a77, 4710 + 0xb8fa0421, 0x847a957a, 4711 + 0x8c777a77, 0xb8fa3021, 4712 + 0x847a8e7a, 0x8c777a77, 4713 + 0xb980f821, 0x00000000, 4714 + 0xbf0d847b, 0xbfa20081, 4715 + 0xf4003eb6, 0xf8000000, 4716 + 0xbfc70000, 0xf4003bb6, 4717 + 0xf8000008, 0x8b76ff7a, 4718 + 0x80000000, 0xbfa20027, 4719 + 0x9376ff7a, 0x00060019, 4720 + 0x81f9a376, 0xbf0b8179, 4721 + 0xbfa2006e, 0x81f9ac76, 4722 4722 0xbf0b8179, 0xbfa20068, 4723 - 0x81f9ac76, 0xbf0b8179, 4724 - 0xbfa20062, 0x81f9b776, 4725 - 0xbf0b8179, 0xbfa2005f, 4726 - 0x8b76ff7a, 0x000001ff, 4727 - 0xbf06ff76, 0x000000fe, 4728 - 0xbfa2005d, 0xbf06ff76, 4729 - 0x000000ff, 0xbfa20057, 4730 - 0xbf06ff76, 0x000000fa, 4731 - 0xbfa20054, 0x81f9ff76, 4732 - 0x000000e9, 0xbf0b8179, 4733 - 0xbfa20050, 0x8b76ff7b, 4734 - 0xffff0000, 0xbf06ff76, 4735 - 0xbf860000, 0xbfa10051, 4736 - 0x9376ff7b, 0x0002000e, 4737 - 0x8b79ff7b, 0x00003f00, 4738 - 0x85798679, 0x8c767976, 4739 - 0xb9763b01, 0xbfa00049, 4740 - 0x8b76ff7a, 0xfc000000, 4741 - 0xbf06ff76, 0xd4000000, 4742 - 0xbfa20013, 0xbf06ff76, 4743 - 0xc8000000, 0xbfa20027, 4744 - 0x8b76ff7a, 0xff000000, 4745 - 0xbf06ff76, 0xcf000000, 4746 - 0xbfa20039, 0x8b79ff7a, 4747 - 0xffff0000, 0xbf06ff79, 4748 - 0xcc350000, 0xbfa20037, 4749 - 0xbf06ff79, 0xcc3a0000, 4750 - 0xbfa20034, 0xbf06ff76, 4751 - 0xcc000000, 0xbfa10031, 4752 - 0x8b76ff7b, 0x000001ff, 4753 - 0xbf06ff76, 0x000000ff, 4754 - 0xbfa20029, 0xbf06ff76, 4755 - 0x000000fa, 0xbfa20026, 4756 - 0x81f6ff76, 0x000000e9, 4757 - 0xbf0b8176, 0xbfa20022, 4758 - 0x8b76ff7b, 0x0003fe00, 4759 - 0xbf06ff76, 0x0001fe00, 4760 - 0xbfa2001d, 0x8b76ff7b, 4761 - 0x07fc0000, 0xbf06ff76, 4762 - 0x03fc0000, 0xbfa20018, 4763 - 0xbfa00014, 0x9376ff7a, 4764 - 0x00040016, 0x81f68176, 4765 - 0xbf0b8176, 0xbfa20012, 4766 - 0x9376ff7a, 0x00050011, 4767 - 0x81f68176, 0xbf0b8176, 4768 - 0xbfa2000d, 0x8b76ff7a, 4723 + 0x81f9b776, 0xbf0b8179, 4724 + 0xbfa20065, 0x8b76ff7a, 4769 4725 0x000001ff, 0xbf06ff76, 4770 - 0x000000ff, 0xbfa20008, 4771 - 0x8b76ff7b, 0x000001ff, 4726 + 0x000000fe, 0xbfa20063, 4772 4727 0xbf06ff76, 0x000000ff, 4773 - 0xbfa20003, 0xbfc70000, 4774 - 0xbefb006e, 0xbfa0ffad, 4775 - 0xbfc70000, 0xbefb006f, 4776 - 0xbfa0ffaa, 0xbfc70000, 4728 + 0xbfa2005d, 0xbf06ff76, 4729 + 0x000000fa, 0xbfa2005a, 4730 + 0x81f9ff76, 0x000000e9, 4731 + 0xbf0b8179, 0xbfa20056, 4732 + 0x8b76ff7b, 0xffff0000, 4733 + 0xbf06ff76, 0xbf860000, 4734 + 0xbfa1005a, 0x9376ff7b, 4735 + 0x0002000e, 0x8b79ff7b, 4736 + 0x00003f00, 0x85798679, 4737 + 0x8c767976, 0xb9763b01, 4738 + 0xbfa00052, 0x8b76ff7a, 4739 + 0xfc000000, 0xbf06ff76, 4740 + 0xd4000000, 0xbfa20019, 4741 + 0xbf06ff76, 0xc8000000, 4742 + 0xbfa2002d, 0x8b76ff7a, 4743 + 0xff000000, 0xbf06ff76, 4744 + 0xcf000000, 0xbfa2003f, 4745 + 0x8b79ff7a, 0xffff0000, 4746 + 0xbf06ff79, 0xcc330000, 4747 + 0xbfa2003d, 0xbf06ff79, 4748 + 0xcc880000, 0xbfa2003a, 4749 + 0xbf06ff79, 0xcc350000, 4750 + 0xbfa2003a, 0xbf06ff79, 4751 + 0xcc3a0000, 0xbfa20037, 4752 + 0xbf06ff76, 0xcc000000, 4753 + 0xbfa10034, 0x8b76ff7b, 4754 + 0x000001ff, 0xbf06ff76, 4755 + 0x000000ff, 0xbfa20029, 4756 + 0xbf06ff76, 0x000000fa, 4757 + 0xbfa20026, 0x81f6ff76, 4758 + 0x000000e9, 0xbf0b8176, 4759 + 0xbfa20022, 0x8b76ff7b, 4760 + 0x0003fe00, 0xbf06ff76, 4761 + 0x0001fe00, 0xbfa2001d, 4762 + 0x8b76ff7b, 0x07fc0000, 4763 + 0xbf06ff76, 0x03fc0000, 4764 + 0xbfa20018, 0xbfa00014, 4765 + 0x9376ff7a, 0x00040016, 4766 + 0x81f68176, 0xbf0b8176, 4767 + 0xbfa20012, 0x9376ff7a, 4768 + 0x00050011, 0x81f68176, 4769 + 0xbf0b8176, 0xbfa2000d, 4770 + 0x8b76ff7a, 0x000001ff, 4771 + 0xbf06ff76, 0x000000ff, 4772 + 0xbfa20008, 0x8b76ff7b, 4773 + 0x000001ff, 0xbf06ff76, 4774 + 0x000000ff, 0xbfa20003, 4775 + 0xbfc70000, 0xbefb006e, 4776 + 0xbfa0ffa7, 0xbfc70000, 4777 + 0xbefb006f, 0xbfa0ffa4, 4778 + 0x80ec886c, 0x82ed806d, 4779 + 0xbfa0fff7, 0xbfc70000, 4777 4780 0xbeee007e, 0xbeef007f, 4778 4781 0xbefe0180, 0xbefe4d84, 4779 4782 0xbf8a0000, 0x8b7aff7f,
+18 -7
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
··· 37 37 #define HAVE_CLUSTER_BARRIER (ASIC_FAMILY == CHIP_GC_12_0_3) 38 38 #define CLUSTER_BARRIER_SERIALIZE_WORKAROUND (ASIC_FAMILY == CHIP_GC_12_0_3) 39 39 #define RELAXED_SCHEDULING_IN_TRAP (ASIC_FAMILY == CHIP_GFX12) 40 + #define HAVE_INSTRUCTION_FIXUP (ASIC_FAMILY == CHIP_GC_12_0_3) 40 41 41 42 #define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost TRAP_AFTER_INST exception when SAVECTX raised 42 43 #define HAVE_VALU_SGPR_HAZARD (ASIC_FAMILY == CHIP_GFX12) ··· 376 375 L_EXIT_TRAP: 377 376 s_and_b32 ttmp1, ttmp1, ADDRESS_HI32_MASK 378 377 379 - #if HAVE_BANKED_VGPRS 378 + #if HAVE_INSTRUCTION_FIXUP 380 379 s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) 381 - fixup_vgpr_bank_selection() 380 + fixup_instruction() 382 381 #endif 383 382 384 383 #if HAVE_XNACK ··· 419 418 save_and_clear_xnack_state_priv(s_save_tmp) 420 419 #endif 421 420 422 - #if HAVE_BANKED_VGPRS 423 - fixup_vgpr_bank_selection() 421 + #if HAVE_INSTRUCTION_FIXUP 422 + fixup_instruction() 424 423 #endif 425 424 426 425 /* inform SPI the readiness and wait for SPI's go signal */ ··· 1401 1400 L_BARRIER_RESTORE_DONE: 1402 1401 end 1403 1402 1404 - #if HAVE_BANKED_VGPRS 1405 - function fixup_vgpr_bank_selection 1403 + #if HAVE_INSTRUCTION_FIXUP 1404 + function fixup_instruction 1406 1405 // PC read may fault if memory violation has been asserted. 1407 1406 // In this case no further progress is expected so fixup is not needed. 1408 1407 s_bitcmp1_b32 s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_SHIFT ··· 1481 1480 s_cmp_eq_u32 ttmp10, 0xcf000000 // If 31:24 = 0xcf, this is VOPD3 1482 1481 s_cbranch_scc1 L_FIXUP_THREE_DWORD // If VOPD3, 3 DWORD inst 1483 1482 // Not VOP1, VOP2, VOPC, VOP3, VOP3SD, VOPD, or VOPD3. 1484 - // Might be in VOP3P, but we must ensure we are not VOP3PX2 1483 + // Check if we are in the middle of VOP3PX. 1485 1484 s_and_b32 ttmp13, ttmp14, 0xffff0000 // Bits 31:16 1485 + s_cmp_eq_u32 ttmp13, 0xcc330000 // If 31:16 = 0xcc33, this is 8 bytes past VOP3PX 1486 + s_cbranch_scc1 L_FIXUP_VOP3PX_MIDDLE 1487 + s_cmp_eq_u32 ttmp13, 0xcc880000 // If 31:16 = 0xcc88, this is 8 bytes past VOP3PX 1488 + s_cbranch_scc1 L_FIXUP_VOP3PX_MIDDLE 1489 + // Might be in VOP3P, but we must ensure we are not VOP3PX2 1486 1490 s_cmp_eq_u32 ttmp13, 0xcc350000 // If 31:16 = 0xcc35, this is VOP3PX2 1487 1491 s_cbranch_scc1 L_FIXUP_DONE // If VOP3PX2, no fixup needed 1488 1492 s_cmp_eq_u32 ttmp13, 0xcc3a0000 // If 31:16 = 0xcc3a, this is VOP3PX2 ··· 1547 1541 s_wait_kmcnt 0 // Wait for PC+2 and PC+3 to arrive in ttmp2 and ttmp3 1548 1542 s_mov_b32 ttmp15, ttmp3 // Move possible S_SET_VGPR_MSB into ttmp15 1549 1543 s_branch L_FIXUP_ONE_DWORD // Go to common logic that checks if it is S_SET_VGPR_MSB 1544 + 1545 + L_FIXUP_VOP3PX_MIDDLE: 1546 + s_sub_co_u32 ttmp0, ttmp0, 8 // Rewind PC 8 bytes to beginning of instruction 1547 + s_sub_co_ci_u32 ttmp1, ttmp1, 0 1548 + s_branch L_FIXUP_TWO_DWORD // 2 DWORD inst (2nd half of a 4 DWORD inst) 1550 1549 1551 1550 L_FIXUP_DONE: 1552 1551 s_wait_kmcnt 0 // Ensure load of ttmp2 and ttmp3 is done