Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

accel/amdxdna: Fix runtime suspend deadlock when there is pending job

The runtime suspend callback drains the running job workqueue before
suspending the device. If a job is still executing and calls
pm_runtime_resume_and_get(), it can deadlock with the runtime suspend
path.

Fix this by moving pm_runtime_resume_and_get() from the job execution
routine to the job submission routine, ensuring the device is resumed
before the job is queued and avoiding the deadlock during runtime
suspend.

Fixes: 063db451832b ("accel/amdxdna: Enhance runtime power management")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260310180058.336348-1-lizhi.hou@amd.com

Lizhi Hou 6b13cb8f 59bdbabc

+12 -12
+2 -12
drivers/accel/amdxdna/aie2_ctx.c
··· 165 165 166 166 trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq); 167 167 168 - amdxdna_pm_suspend_put(job->hwctx->client->xdna); 169 168 job->hwctx->priv->completed++; 170 169 dma_fence_signal(fence); 171 170 ··· 289 290 struct dma_fence *fence; 290 291 int ret; 291 292 292 - ret = amdxdna_pm_resume_get(hwctx->client->xdna); 293 - if (ret) 293 + if (!hwctx->priv->mbox_chann) 294 294 return NULL; 295 295 296 - if (!hwctx->priv->mbox_chann) { 297 - amdxdna_pm_suspend_put(hwctx->client->xdna); 298 - return NULL; 299 - } 300 - 301 - if (!mmget_not_zero(job->mm)) { 302 - amdxdna_pm_suspend_put(hwctx->client->xdna); 296 + if (!mmget_not_zero(job->mm)) 303 297 return ERR_PTR(-ESRCH); 304 - } 305 298 306 299 kref_get(&job->refcnt); 307 300 fence = dma_fence_get(job->fence); ··· 324 333 325 334 out: 326 335 if (ret) { 327 - amdxdna_pm_suspend_put(hwctx->client->xdna); 328 336 dma_fence_put(job->fence); 329 337 aie2_job_put(job); 330 338 mmput(job->mm);
+10
drivers/accel/amdxdna/amdxdna_ctx.c
··· 17 17 #include "amdxdna_ctx.h" 18 18 #include "amdxdna_gem.h" 19 19 #include "amdxdna_pci_drv.h" 20 + #include "amdxdna_pm.h" 20 21 21 22 #define MAX_HWCTX_ID 255 22 23 #define MAX_ARG_COUNT 4095 ··· 446 445 void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job) 447 446 { 448 447 trace_amdxdna_debug_point(job->hwctx->name, job->seq, "job release"); 448 + amdxdna_pm_suspend_put(job->hwctx->client->xdna); 449 449 amdxdna_arg_bos_put(job); 450 450 amdxdna_gem_put_obj(job->cmd_bo); 451 451 dma_fence_put(job->fence); ··· 482 480 if (ret) { 483 481 XDNA_ERR(xdna, "Argument BOs lookup failed, ret %d", ret); 484 482 goto cmd_put; 483 + } 484 + 485 + ret = amdxdna_pm_resume_get(xdna); 486 + if (ret) { 487 + XDNA_ERR(xdna, "Resume failed, ret %d", ret); 488 + goto put_bos; 485 489 } 486 490 487 491 idx = srcu_read_lock(&client->hwctx_srcu); ··· 530 522 dma_fence_put(job->fence); 531 523 unlock_srcu: 532 524 srcu_read_unlock(&client->hwctx_srcu, idx); 525 + amdxdna_pm_suspend_put(xdna); 526 + put_bos: 533 527 amdxdna_arg_bos_put(job); 534 528 cmd_put: 535 529 amdxdna_gem_put_obj(job->cmd_bo);