Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amdkfd: Add interrupt handling for GFX 12.1.0

Add interrupt handling for GFX 12.1.0 similar to what is done
for GFX 9.4.3.

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Reviewed-by: Alex Sierra <alex.sierra@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Mukul Joshi and committed by
Alex Deucher
2c0c485d 01bbc4a4

+416 -4
+1
drivers/gpu/drm/amd/amdkfd/Makefile
··· 58 58 $(AMDKFD_PATH)/kfd_int_process_v9.o \ 59 59 $(AMDKFD_PATH)/kfd_int_process_v10.o \ 60 60 $(AMDKFD_PATH)/kfd_int_process_v11.o \ 61 + $(AMDKFD_PATH)/kfd_int_process_v12_1.o \ 61 62 $(AMDKFD_PATH)/kfd_smi_events.o \ 62 63 $(AMDKFD_PATH)/kfd_crat.o \ 63 64 $(AMDKFD_PATH)/kfd_debug.o
+21 -3
drivers/gpu/drm/amd/amdkfd/kfd_device.c
··· 171 171 /* GFX12_TODO: Change to v12 version. */ 172 172 kfd->device_info.event_interrupt_class = &event_interrupt_class_v11; 173 173 break; 174 + case IP_VERSION(12, 1, 0): 175 + kfd->device_info.event_interrupt_class = 176 + &event_interrupt_class_v12_1; 177 + break; 174 178 default: 175 179 dev_warn(kfd_device, "v9 event interrupt handler is set due to " 176 180 "mismatch of gc ip block(GC_HWIP:0x%x).\n", gc_version); ··· 671 667 struct amdgpu_device *adev = node->adev; 672 668 uint32_t xcc_mask = node->xcc_mask; 673 669 uint32_t xcc, mapped_xcc; 670 + uint32_t bitmap; 674 671 /* 675 672 * Interrupt bitmap is setup for processing interrupts from 676 673 * different XCDs and AIDs. ··· 693 688 * - AND VMID reported in the interrupt lies within the 694 689 * VMID range of the node. 695 690 */ 696 - for_each_inst(xcc, xcc_mask) { 697 - mapped_xcc = GET_INST(GC, xcc); 698 - node->interrupt_bitmap |= (mapped_xcc % 2 ? 5 : 3) << (4 * (mapped_xcc / 2)); 691 + switch (KFD_GC_VERSION(node)) { 692 + case IP_VERSION(12, 1, 0): 693 + for_each_inst(xcc, xcc_mask) { 694 + mapped_xcc = GET_INST(GC, xcc); 695 + bitmap = 0x2 | (0x4 << (mapped_xcc % 4)); 696 + if (mapped_xcc/4) 697 + bitmap = bitmap << 8; 698 + node->interrupt_bitmap |= bitmap; 699 + } 700 + break; 701 + default: 702 + for_each_inst(xcc, xcc_mask) { 703 + mapped_xcc = GET_INST(GC, xcc); 704 + node->interrupt_bitmap |= (mapped_xcc % 2 ? 5 : 3) << (4 * (mapped_xcc / 2)); 705 + } 706 + break; 699 707 } 700 708 dev_info(kfd_device, "Node: %d, interrupt_bitmap: %x\n", kfd_node_idx, 701 709 node->interrupt_bitmap);
+391
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v12_1.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR MIT 2 + /* 3 + * Copyright 2025 Advanced Micro Devices, Inc. 4 + * 5 + * Permission is hereby granted, free of charge, to any person obtaining a 6 + * copy of this software and associated documentation files (the "Software"), 7 + * to deal in the Software without restriction, including without limitation 8 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 + * and/or sell copies of the Software, and to permit persons to whom the 10 + * Software is furnished to do so, subject to the following conditions: 11 + * 12 + * The above copyright notice and this permission notice shall be included in 13 + * all copies or substantial portions of the Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 + * OTHER DEALINGS IN THE SOFTWARE. 22 + */ 23 + 24 + #include "kfd_priv.h" 25 + #include "kfd_events.h" 26 + #include "soc15_int.h" 27 + #include "kfd_device_queue_manager.h" 28 + #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" 29 + #include "kfd_smi_events.h" 30 + #include "kfd_debug.h" 31 + 32 + /* 33 + * GFX12.1 SQ Interrupts 34 + * 35 + * There are 3 encoding types of interrupts sourced from SQ sent as a 44-bit 36 + * packet to the Interrupt Handler: 37 + * Auto - Generated by the SQG (various cmd overflows, timestamps etc) 38 + * Wave - Generated by S_SENDMSG through a shader program 39 + * Error - HW generated errors (Illegal instructions, Memviols, EDC etc) 40 + * 41 + * The 44-bit packet is mapped as {context_id1[7:0],context_id0[31:0]} plus 42 + * 4-bits for VMID (SOC15_VMID_FROM_IH_ENTRY) as such: 43 + * 44 + * - context_id1[7:6] 45 + * Encoding type (0 = Auto, 1 = Wave, 2 = Error) 46 + * 47 + * - context_id0[26] 48 + * PRIV bit indicates that Wave S_SEND or error occurred within trap 49 + * 50 + * - context_id0[24:0] 51 + * 25-bit data with the following layout per encoding type: 52 + * Auto - only context_id0[8:0] is used, which reports various interrupts 53 + * generated by SQG. The rest is 0. 54 + * Wave - user data sent from m0 via S_SENDMSG (context_id0[23:0]) 55 + * Error - Error Type (context_id0[24:21]), Error Details (context_id0[20:0]) 56 + * 57 + * The other context_id bits show coordinates (SE/SH/CU/SIMD/WGP) for wave 58 + * S_SENDMSG and Errors. These are 0 for Auto. 59 + */ 60 + 61 + enum SQ_INTERRUPT_WORD_ENCODING { 62 + SQ_INTERRUPT_WORD_ENCODING_AUTO = 0x0, 63 + SQ_INTERRUPT_WORD_ENCODING_INST, 64 + SQ_INTERRUPT_WORD_ENCODING_ERROR, 65 + }; 66 + 67 + enum SQ_INTERRUPT_ERROR_TYPE { 68 + SQ_INTERRUPT_ERROR_TYPE_EDC_FUE = 0x0, 69 + SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST, 70 + SQ_INTERRUPT_ERROR_TYPE_MEMVIOL, 71 + SQ_INTERRUPT_ERROR_TYPE_EDC_FED, 72 + }; 73 + 74 + /* SQ_INTERRUPT_WORD_AUTO_CTXID */ 75 + #define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE__SHIFT 0 76 + #define SQ_INTERRUPT_WORD_AUTO_CTXID0__WLT__SHIFT 1 77 + #define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_BUF0_FULL__SHIFT 2 78 + #define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_BUF1_FULL__SHIFT 3 79 + #define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_UTC_ERROR__SHIFT 8 80 + #define SQ_INTERRUPT_WORD_AUTO_CTXID1__ENCODING__SHIFT 6 81 + 82 + #define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_MASK 0x00000001 83 + #define SQ_INTERRUPT_WORD_AUTO_CTXID0__WLT_MASK 0x00000002 84 + #define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_BUF0_FULL_MASK 0x00000004 85 + #define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_BUF1_FULL_MASK 0x00000008 86 + #define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_UTC_ERROR_MASK 0x00000100 87 + #define SQ_INTERRUPT_WORD_AUTO_CTXID1__ENCODING_MASK 0x000000c0 88 + 89 + /* SQ_INTERRUPT_WORD_WAVE_CTXID */ 90 + #define SQ_INTERRUPT_WORD_WAVE_CTXID0__DATA__SHIFT 0 91 + #define SQ_INTERRUPT_WORD_WAVE_CTXID0__SA_ID__SHIFT 25 92 + #define SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV__SHIFT 26 93 + #define SQ_INTERRUPT_WORD_WAVE_CTXID0__WAVE_ID__SHIFT 27 94 + #define SQ_INTERRUPT_WORD_WAVE_CTXID1__SIMD_ID__SHIFT 0 95 + #define SQ_INTERRUPT_WORD_WAVE_CTXID1__WGP_ID__SHIFT 2 96 + #define SQ_INTERRUPT_WORD_WAVE_CTXID1__ENCODING__SHIFT 6 97 + 98 + #define SQ_INTERRUPT_WORD_WAVE_CTXID0__DATA_MASK 0x00ffffff /* [23:0] */ 99 + #define SQ_INTERRUPT_WORD_WAVE_CTXID0__SA_ID_MASK 0x02000000 /* [25] */ 100 + #define SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV_MASK 0x04000000 /* [26] */ 101 + #define SQ_INTERRUPT_WORD_WAVE_CTXID0__WAVE_ID_MASK 0xf8000000 /* [31:27] */ 102 + #define SQ_INTERRUPT_WORD_WAVE_CTXID1__SIMD_ID_MASK 0x00000003 /* [33:32] */ 103 + #define SQ_INTERRUPT_WORD_WAVE_CTXID1__WGP_ID_MASK 0x0000003c /* [37:34] */ 104 + #define SQ_INTERRUPT_WORD_WAVE_CTXID1__ENCODING_MASK 0x000000c0 /* [39:38] */ 105 + 106 + /* SQ_INTERRUPT_WORD_ERROR_CTXID */ 107 + #define SQ_INTERRUPT_WORD_ERROR_CTXID0__DETAIL__SHIFT 0 108 + #define SQ_INTERRUPT_WORD_ERROR_CTXID0__MEM_VIOL__SHIFT 19 109 + #define SQ_INTERRUPT_WORD_ERROR_CTXID0__TYPE__SHIFT 21 110 + #define SQ_INTERRUPT_WORD_ERROR_CTXID0__SA_ID__SHIFT 25 111 + #define SQ_INTERRUPT_WORD_ERROR_CTXID0__PRIV__SHIFT 26 112 + #define SQ_INTERRUPT_WORD_ERROR_CTXID0__WAVE_ID__SHIFT 27 113 + #define SQ_INTERRUPT_WORD_ERROR_CTXID1__SIMD_ID__SHIFT 0 114 + #define SQ_INTERRUPT_WORD_ERROR_CTXID1__WGP_ID__SHIFT 2 115 + #define SQ_INTERRUPT_WORD_ERROR_CTXID1__ENCODING__SHIFT 6 116 + 117 + #define SQ_INTERRUPT_WORD_ERROR_CTXID0__DETAIL_MASK 0x0007ffff /* [18:0] */ 118 + #define SQ_INTERRUPT_WORD_ERROR_CTXID0__MEM_VIOL_MASK 0x00180000 /* [20:19] */ 119 + #define SQ_INTERRUPT_WORD_ERROR_CTXID0__TYPE_MASK 0x01e00000 /* [24:21] */ 120 + #define SQ_INTERRUPT_WORD_ERROR_CTXID0__SA_ID_MASK 0x02000000 /* [25] */ 121 + #define SQ_INTERRUPT_WORD_ERROR_CTXID0__PRIV_MASK 0x04000000 /* [26] */ 122 + #define SQ_INTERRUPT_WORD_ERROR_CTXID0__WAVE_ID_MASK 0xf8000000 /* [31:27] */ 123 + #define SQ_INTERRUPT_WORD_ERROR_CTXID1__SIMD_ID_MASK 0x00000003 /* [33:32] */ 124 + #define SQ_INTERRUPT_WORD_ERROR_CTXID1__WGP_ID_MASK 0x0000003c /* [37:34] */ 125 + #define SQ_INTERRUPT_WORD_ERROR_CTXID1__ENCODING_MASK 0x000000c0 /* [39:38] */ 126 + 127 + /* 128 + * The debugger will send user data(m0) with PRIV=1 to indicate it requires 129 + * notification from the KFD with the following queue id (DOORBELL_ID) and 130 + * trap code (TRAP_CODE). 131 + */ 132 + #define KFD_CTXID0_TRAP_CODE_SHIFT 10 133 + #define KFD_CTXID0_TRAP_CODE_MASK 0xfffc00 134 + #define KFD_CTXID0_CP_BAD_OP_ECODE_MASK 0x3ffffff 135 + #define KFD_CTXID0_DOORBELL_ID_MASK 0x0003ff 136 + 137 + #define KFD_CTXID0_TRAP_CODE(ctxid0) (((ctxid0) & \ 138 + KFD_CTXID0_TRAP_CODE_MASK) >> \ 139 + KFD_CTXID0_TRAP_CODE_SHIFT) 140 + #define KFD_CTXID0_CP_BAD_OP_ECODE(ctxid0) (((ctxid0) & \ 141 + KFD_CTXID0_CP_BAD_OP_ECODE_MASK) >> \ 142 + KFD_CTXID0_TRAP_CODE_SHIFT) 143 + #define KFD_CTXID0_DOORBELL_ID(ctxid0) ((ctxid0) & \ 144 + KFD_CTXID0_DOORBELL_ID_MASK) 145 + 146 + static void print_sq_intr_info_auto(uint32_t context_id0, uint32_t context_id1) 147 + { 148 + pr_debug_ratelimited( 149 + "sq_intr: auto, ttrace %d, wlt %d, ttrace_buf0_full %d, ttrace_buf1_full %d ttrace_utc_err %d\n", 150 + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE), 151 + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, WLT), 152 + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_BUF0_FULL), 153 + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_BUF1_FULL), 154 + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_UTC_ERROR)); 155 + } 156 + 157 + static void print_sq_intr_info_inst(uint32_t context_id0, uint32_t context_id1) 158 + { 159 + pr_debug_ratelimited( 160 + "sq_intr: inst, data 0x%08x, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n", 161 + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, DATA), 162 + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, SA_ID), 163 + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV), 164 + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, WAVE_ID), 165 + REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, SIMD_ID), 166 + REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, WGP_ID)); 167 + } 168 + 169 + static void print_sq_intr_info_error(uint32_t context_id0, uint32_t context_id1) 170 + { 171 + pr_debug_ratelimited( 172 + "sq_intr: error, detail 0x%08x, type %d, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n", 173 + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, DETAIL), 174 + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE), 175 + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, SA_ID), 176 + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, PRIV), 177 + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, WAVE_ID), 178 + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, SIMD_ID), 179 + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, WGP_ID)); 180 + } 181 + 182 + static void event_interrupt_poison_consumption_v12_1(struct kfd_node *node, 183 + uint16_t pasid, uint16_t source_id) 184 + { 185 + enum amdgpu_ras_block block = 0; 186 + int ret = -EINVAL; 187 + uint32_t reset = 0; 188 + struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL); 189 + 190 + if (!p) 191 + return; 192 + 193 + /* all queues of a process will be unmapped in one time */ 194 + if (atomic_read(&p->poison)) { 195 + kfd_unref_process(p); 196 + return; 197 + } 198 + 199 + atomic_set(&p->poison, 1); 200 + kfd_unref_process(p); 201 + 202 + switch (source_id) { 203 + case SOC15_INTSRC_SQ_INTERRUPT_MSG: 204 + if (node->dqm->ops.reset_queues) 205 + ret = node->dqm->ops.reset_queues(node->dqm, pasid); 206 + block = AMDGPU_RAS_BLOCK__GFX; 207 + if (ret) 208 + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; 209 + break; 210 + case SOC21_INTSRC_SDMA_ECC: 211 + default: 212 + block = AMDGPU_RAS_BLOCK__GFX; 213 + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; 214 + break; 215 + } 216 + 217 + kfd_signal_poison_consumed_event(node, pasid); 218 + 219 + /* 220 + * resetting queue passes, do page retirement without gpu reset 221 + * resetting queue fails, fallback to gpu reset solution 222 + */ 223 + amdgpu_amdkfd_ras_poison_consumption_handler(node->adev, block, reset); 224 + } 225 + 226 + static bool event_interrupt_isr_v12_1(struct kfd_node *node, 227 + const uint32_t *ih_ring_entry, 228 + uint32_t *patched_ihre, 229 + bool *patched_flag) 230 + { 231 + uint16_t source_id, client_id, pasid, vmid, node_id; 232 + const uint32_t *data = ih_ring_entry; 233 + uint32_t context_id0; 234 + 235 + node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry); 236 + vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry); 237 + 238 + if (!kfd_irq_is_from_node(node, node_id, vmid)) { 239 + pr_debug("Interrupt not for Node, node_id: %d, vmid: %d\n", node_id, vmid); 240 + return false; 241 + } 242 + 243 + source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); 244 + client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); 245 + 246 + /* Only handle interrupts from KFD VMIDs */ 247 + if (!KFD_IRQ_IS_FENCE(client_id, source_id) && 248 + (vmid < node->vm_info.first_vmid_kfd || 249 + vmid > node->vm_info.last_vmid_kfd)) 250 + return false; 251 + 252 + pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); 253 + context_id0 = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry); 254 + 255 + if ((source_id == SOC15_INTSRC_CP_END_OF_PIPE) && 256 + (context_id0 & AMDGPU_FENCE_MES_QUEUE_FLAG)) 257 + return false; 258 + 259 + pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n", 260 + client_id, source_id, vmid, pasid); 261 + pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", 262 + data[0], data[1], data[2], data[3], 263 + data[4], data[5], data[6], data[7]); 264 + 265 + /* If there is no valid PASID, it's likely a bug */ 266 + if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt")) 267 + return false; 268 + 269 + /* Interrupt types we care about: various signals and faults. 270 + * They will be forwarded to a work queue (see below). 271 + */ 272 + return source_id == SOC15_INTSRC_CP_END_OF_PIPE || 273 + source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG || 274 + source_id == SOC15_INTSRC_CP_BAD_OPCODE || 275 + source_id == SOC21_INTSRC_SDMA_TRAP || 276 + KFD_IRQ_IS_FENCE(client_id, source_id) || 277 + ((client_id == SOC21_IH_CLIENTID_VMC || 278 + client_id == SOC21_IH_CLIENTID_UTCL2) && 279 + !amdgpu_no_queue_eviction_on_vm_fault); 280 + } 281 + 282 + static void event_interrupt_wq_v12_1(struct kfd_node *node, 283 + const uint32_t *ih_ring_entry) 284 + { 285 + uint16_t source_id, client_id, ring_id, pasid, vmid; 286 + uint32_t context_id0, context_id1; 287 + uint8_t sq_int_enc, sq_int_priv, sq_int_errtype; 288 + struct kfd_vm_fault_info info = {0}; 289 + struct kfd_hsa_memory_exception_data exception_data; 290 + 291 + source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); 292 + client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); 293 + ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); 294 + pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); 295 + vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry); 296 + context_id0 = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry); 297 + context_id1 = SOC15_CONTEXT_ID1_FROM_IH_ENTRY(ih_ring_entry); 298 + 299 + /* VMC, UTCL2 */ 300 + if (client_id == SOC21_IH_CLIENTID_VMC || 301 + client_id == SOC21_IH_CLIENTID_UTCL2) { 302 + info.vmid = vmid; 303 + info.mc_id = client_id; 304 + info.page_addr = ih_ring_entry[4] | 305 + (uint64_t)(ih_ring_entry[5] & 0xf) << 32; 306 + info.prot_valid = ring_id & 0x08; 307 + info.prot_read = ring_id & 0x10; 308 + info.prot_write = ring_id & 0x20; 309 + 310 + memset(&exception_data, 0, sizeof(exception_data)); 311 + exception_data.gpu_id = node->id; 312 + exception_data.va = (info.page_addr) << PAGE_SHIFT; 313 + exception_data.failure.NotPresent = info.prot_valid ? 1 : 0; 314 + exception_data.failure.NoExecute = info.prot_exec ? 1 : 0; 315 + exception_data.failure.ReadOnly = info.prot_write ? 1 : 0; 316 + exception_data.failure.imprecise = 0; 317 + 318 + kfd_set_dbg_ev_from_interrupt(node, pasid, -1, 319 + KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION), 320 + &exception_data, sizeof(exception_data)); 321 + kfd_smi_event_update_vmfault(node, pasid); 322 + 323 + /* GRBM, SDMA, SE, PMM */ 324 + } else if (client_id == SOC21_IH_CLIENTID_GRBM_CP || 325 + client_id == SOC21_IH_CLIENTID_GFX) { 326 + 327 + /* CP */ 328 + if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) 329 + kfd_signal_event_interrupt(pasid, context_id0, 32); 330 + else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && 331 + KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) { 332 + u32 doorbell_id = KFD_CTXID0_DOORBELL_ID(context_id0); 333 + 334 + kfd_set_dbg_ev_from_interrupt(node, pasid, doorbell_id, 335 + KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)), 336 + NULL, 0); 337 + kfd_dqm_suspend_bad_queue_mes(node, pasid, doorbell_id); 338 + } 339 + 340 + /* SDMA */ 341 + else if (source_id == SOC21_INTSRC_SDMA_TRAP) 342 + kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); 343 + else if (source_id == SOC21_INTSRC_SDMA_ECC) { 344 + event_interrupt_poison_consumption_v12_1(node, pasid, source_id); 345 + return; 346 + } 347 + 348 + /* SQ */ 349 + else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) { 350 + sq_int_enc = REG_GET_FIELD(context_id1, 351 + SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING); 352 + switch (sq_int_enc) { 353 + case SQ_INTERRUPT_WORD_ENCODING_AUTO: 354 + print_sq_intr_info_auto(context_id0, context_id1); 355 + break; 356 + case SQ_INTERRUPT_WORD_ENCODING_INST: 357 + print_sq_intr_info_inst(context_id0, context_id1); 358 + sq_int_priv = REG_GET_FIELD(context_id0, 359 + SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV); 360 + if (sq_int_priv && (kfd_set_dbg_ev_from_interrupt(node, pasid, 361 + KFD_CTXID0_DOORBELL_ID(context_id0), 362 + KFD_CTXID0_TRAP_CODE(context_id0), 363 + NULL, 0))) 364 + return; 365 + break; 366 + case SQ_INTERRUPT_WORD_ENCODING_ERROR: 367 + print_sq_intr_info_error(context_id0, context_id1); 368 + sq_int_errtype = REG_GET_FIELD(context_id0, 369 + SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE); 370 + if (sq_int_errtype != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST && 371 + sq_int_errtype != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) { 372 + event_interrupt_poison_consumption_v12_1( 373 + node, pasid, source_id); 374 + return; 375 + } 376 + break; 377 + default: 378 + break; 379 + } 380 + kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24); 381 + } 382 + 383 + } else if (KFD_IRQ_IS_FENCE(client_id, source_id)) { 384 + kfd_process_close_interrupt_drain(pasid); 385 + } 386 + } 387 + 388 + const struct kfd_event_interrupt_class event_interrupt_class_v12_1 = { 389 + .interrupt_isr = event_interrupt_isr_v12_1, 390 + .interrupt_wq = event_interrupt_wq_v12_1, 391 + };
+1
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
··· 1508 1508 extern const struct kfd_event_interrupt_class event_interrupt_class_v9_4_3; 1509 1509 extern const struct kfd_event_interrupt_class event_interrupt_class_v10; 1510 1510 extern const struct kfd_event_interrupt_class event_interrupt_class_v11; 1511 + extern const struct kfd_event_interrupt_class event_interrupt_class_v12_1; 1511 1512 1512 1513 extern const struct kfd_device_global_init_class device_global_init_class_cik; 1513 1514
+2 -1
drivers/gpu/drm/amd/amdkfd/kfd_process.c
··· 2279 2279 */ 2280 2280 if (KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 3) || 2281 2281 KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 4) || 2282 - KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 5, 0)) { 2282 + KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 5, 0) || 2283 + KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(12, 1, 0)) { 2283 2284 node_id = ffs(pdd->dev->interrupt_bitmap) - 1; 2284 2285 irq_drain_fence[3] |= node_id << 16; 2285 2286 }