Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/xe: Drop registration of guc_submit_wedged_fini from xe_guc_submit_wedge()

xe_guc_submit_wedge() runs in the DMA-fence signaling path, where
GFP_KERNEL memory allocations are not permitted. However, registering
guc_submit_wedged_fini via drmm_add_action_or_reset() triggers such an
allocation.

Avoid this by moving the logic from guc_submit_wedged_fini() into
guc_submit_fini(), where wedged exec queue references are dropped during
normal teardown.

Fixes: 8ed9aaae39f3 ("drm/xe: Force wedged state and block GT reset upon any GPU hang")
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patch.msgid.link/20260326210116.202585-3-matthew.brost@intel.com
(cherry picked from commit 4a706bd93c4fb156a13477e26ffdf2e633edeb10)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>

authored by

Matthew Brost and committed by
Rodrigo Vivi
a0fc362f 254f4963

+9 -24
+9 -24
drivers/gpu/drm/xe/xe_guc_submit.c
··· 261 261 static void guc_submit_fini(void *arg) 262 262 { 263 263 struct xe_guc *guc = arg; 264 - 265 - /* Forcefully kill any remaining exec queues */ 266 - xe_guc_ct_stop(&guc->ct); 267 - guc_submit_reset_prepare(guc); 268 - xe_guc_softreset(guc); 269 - xe_guc_submit_stop(guc); 270 - xe_uc_fw_sanitize(&guc->fw); 271 - xe_guc_submit_pause_abort(guc); 272 - } 273 - 274 - static void guc_submit_wedged_fini(void *arg) 275 - { 276 - struct xe_guc *guc = arg; 277 264 struct xe_exec_queue *q; 278 265 unsigned long index; 279 266 267 + /* Drop any wedged queue refs */ 280 268 mutex_lock(&guc->submission_state.lock); 281 269 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 282 270 if (exec_queue_wedged(q)) { ··· 274 286 } 275 287 } 276 288 mutex_unlock(&guc->submission_state.lock); 289 + 290 + /* Forcefully kill any remaining exec queues */ 291 + xe_guc_ct_stop(&guc->ct); 292 + guc_submit_reset_prepare(guc); 293 + xe_guc_softreset(guc); 294 + xe_guc_submit_stop(guc); 295 + xe_uc_fw_sanitize(&guc->fw); 296 + xe_guc_submit_pause_abort(guc); 277 297 } 278 298 279 299 static const struct xe_exec_queue_ops guc_exec_queue_ops; ··· 1316 1320 void xe_guc_submit_wedge(struct xe_guc *guc) 1317 1321 { 1318 1322 struct xe_device *xe = guc_to_xe(guc); 1319 - struct xe_gt *gt = guc_to_gt(guc); 1320 1323 struct xe_exec_queue *q; 1321 1324 unsigned long index; 1322 - int err; 1323 1325 1324 1326 xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); 1325 1327 ··· 1329 1335 return; 1330 1336 1331 1337 if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) { 1332 - err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, 1333 - guc_submit_wedged_fini, guc); 1334 - if (err) { 1335 - xe_gt_err(gt, "Failed to register clean-up on wedged.mode=%s; " 1336 - "Although device is wedged.\n", 1337 - xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET)); 1338 - return; 1339 - } 1340 - 1341 1338 mutex_lock(&guc->submission_state.lock); 1342 1339 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1343 1340 if (xe_exec_queue_get_unless_zero(q))