Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

accel/amdxdna: Fix race condition when checking rpm_on

When autosuspend is triggered, driver rpm_on flag is set to indicate that
a suspend/resume is already in progress. However, when a userspace
application submits a command during this narrow window,
amdxdna_pm_resume_get() may incorrectly skip the resume operation because
the rpm_on flag is still set. This results in commands being submitted
while the device has not actually resumed, causing unexpected behavior.

The set_dpm() is called by suspend/resume, it relied on rpm_on flag to
avoid calling into rpm suspend/resume recursivly. So to fix this, remove
the use of the rpm_on flag entirely. Instead, introduce aie2_pm_set_dpm()
which explicitly resumes the device before invoking set_dpm(). With this
change, set_dpm() is called directly inside the suspend or resume execution
path. Otherwise, aie2_pm_set_dpm() is called.

Fixes: 063db451832b ("accel/amdxdna: Enhance runtime power management")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Reviewed-by: Maciej Falkowski <maciej.falkowski@linux.intel.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20251208165356.1549237-1-lizhi.hou@amd.com

Lizhi Hou 00ffe45e 0823bd89

+24 -47
-1
drivers/accel/amdxdna/aie2_message.c
··· 39 39 if (!ndev->mgmt_chann) 40 40 return -ENODEV; 41 41 42 - drm_WARN_ON(&xdna->ddev, xdna->rpm_on && !mutex_is_locked(&xdna->dev_lock)); 43 42 ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg); 44 43 if (ret == -ETIME) { 45 44 xdna_mailbox_stop_channel(ndev->mgmt_chann);
+1 -1
drivers/accel/amdxdna/aie2_pci.c
··· 321 321 if (ndev->pw_mode != POWER_MODE_DEFAULT || ndev->dpm_level == dpm_level) 322 322 return 0; 323 323 324 - return ndev->priv->hw_ops.set_dpm(ndev, dpm_level); 324 + return aie2_pm_set_dpm(ndev, dpm_level); 325 325 } 326 326 327 327 static struct xrs_action_ops aie2_xrs_actions = {
+1
drivers/accel/amdxdna/aie2_pci.h
··· 286 286 /* aie2_pm.c */ 287 287 int aie2_pm_init(struct amdxdna_dev_hdl *ndev); 288 288 int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target); 289 + int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); 289 290 290 291 /* aie2_psp.c */ 291 292 struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf);
+16 -1
drivers/accel/amdxdna/aie2_pm.c
··· 10 10 11 11 #include "aie2_pci.h" 12 12 #include "amdxdna_pci_drv.h" 13 + #include "amdxdna_pm.h" 13 14 14 15 #define AIE2_CLK_GATING_ENABLE 1 15 16 #define AIE2_CLK_GATING_DISABLE 0 ··· 25 24 26 25 ndev->clk_gating = val; 27 26 return 0; 27 + } 28 + 29 + int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) 30 + { 31 + int ret; 32 + 33 + ret = amdxdna_pm_resume_get(ndev->xdna); 34 + if (ret) 35 + return ret; 36 + 37 + ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level); 38 + amdxdna_pm_suspend_put(ndev->xdna); 39 + 40 + return ret; 28 41 } 29 42 30 43 int aie2_pm_init(struct amdxdna_dev_hdl *ndev) ··· 109 94 return -EOPNOTSUPP; 110 95 } 111 96 112 - ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level); 97 + ret = aie2_pm_set_dpm(ndev, dpm_level); 113 98 if (ret) 114 99 return ret; 115 100
+4 -23
drivers/accel/amdxdna/aie2_smu.c
··· 11 11 12 12 #include "aie2_pci.h" 13 13 #include "amdxdna_pci_drv.h" 14 - #include "amdxdna_pm.h" 15 14 16 15 #define SMU_RESULT_OK 1 17 16 ··· 66 67 u32 freq; 67 68 int ret; 68 69 69 - ret = amdxdna_pm_resume_get(ndev->xdna); 70 - if (ret) 71 - return ret; 72 - 73 70 ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ, 74 71 ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq); 75 72 if (ret) { 76 73 XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n", 77 74 ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret); 78 - goto suspend_put; 75 + return ret; 79 76 } 80 77 ndev->npuclk_freq = freq; 81 78 ··· 80 85 if (ret) { 81 86 XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n", 82 87 ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret); 83 - goto suspend_put; 88 + return ret; 84 89 } 85 90 86 - amdxdna_pm_suspend_put(ndev->xdna); 87 91 ndev->hclk_freq = freq; 88 92 ndev->dpm_level = dpm_level; 89 93 ndev->max_tops = 2 * ndev->total_col; ··· 92 98 ndev->npuclk_freq, ndev->hclk_freq); 93 99 94 100 return 0; 95 - 96 - suspend_put: 97 - amdxdna_pm_suspend_put(ndev->xdna); 98 - return ret; 99 101 } 100 102 101 103 int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) 102 104 { 103 105 int ret; 104 106 105 - ret = amdxdna_pm_resume_get(ndev->xdna); 106 - if (ret) 107 - return ret; 108 - 109 107 ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL); 110 108 if (ret) { 111 109 XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ", 112 110 dpm_level, ret); 113 - goto suspend_put; 111 + return ret; 114 112 } 115 113 116 114 ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL); 117 115 if (ret) { 118 116 XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d", 119 117 dpm_level, ret); 120 - goto suspend_put; 118 + return ret; 121 119 } 122 120 123 - amdxdna_pm_suspend_put(ndev->xdna); 124 121 ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk; 125 122 ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk; 126 123 ndev->dpm_level = dpm_level; ··· 122 137 ndev->npuclk_freq, ndev->hclk_freq); 123 138 124 139 return 0; 125 - 126 - suspend_put: 127 - amdxdna_pm_suspend_put(ndev->xdna); 128 - return ret; 129 140 } 130 141 131 142 int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
-1
drivers/accel/amdxdna/amdxdna_pci_drv.h
··· 101 101 struct amdxdna_fw_ver fw_ver; 102 102 struct rw_semaphore notifier_lock; /* for mmu notifier*/ 103 103 struct workqueue_struct *notifier_wq; 104 - bool rpm_on; 105 104 }; 106 105 107 106 /*
+2 -20
drivers/accel/amdxdna/amdxdna_pm.c
··· 15 15 { 16 16 struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev)); 17 17 int ret = -EOPNOTSUPP; 18 - bool rpm; 19 18 20 - if (xdna->dev_info->ops->suspend) { 21 - rpm = xdna->rpm_on; 22 - xdna->rpm_on = false; 19 + if (xdna->dev_info->ops->suspend) 23 20 ret = xdna->dev_info->ops->suspend(xdna); 24 - xdna->rpm_on = rpm; 25 - } 26 21 27 22 XDNA_DBG(xdna, "Suspend done ret %d", ret); 28 23 return ret; ··· 27 32 { 28 33 struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev)); 29 34 int ret = -EOPNOTSUPP; 30 - bool rpm; 31 35 32 - if (xdna->dev_info->ops->resume) { 33 - rpm = xdna->rpm_on; 34 - xdna->rpm_on = false; 36 + if (xdna->dev_info->ops->resume) 35 37 ret = xdna->dev_info->ops->resume(xdna); 36 - xdna->rpm_on = rpm; 37 - } 38 38 39 39 XDNA_DBG(xdna, "Resume done ret %d", ret); 40 40 return ret; ··· 39 49 { 40 50 struct device *dev = xdna->ddev.dev; 41 51 int ret; 42 - 43 - if (!xdna->rpm_on) 44 - return 0; 45 52 46 53 ret = pm_runtime_resume_and_get(dev); 47 54 if (ret) { ··· 53 66 { 54 67 struct device *dev = xdna->ddev.dev; 55 68 56 - if (!xdna->rpm_on) 57 - return; 58 - 59 69 pm_runtime_put_autosuspend(dev); 60 70 } 61 71 ··· 65 81 pm_runtime_use_autosuspend(dev); 66 82 pm_runtime_allow(dev); 67 83 pm_runtime_put_autosuspend(dev); 68 - xdna->rpm_on = true; 69 84 } 70 85 71 86 void amdxdna_pm_fini(struct amdxdna_dev *xdna) 72 87 { 73 88 struct device *dev = xdna->ddev.dev; 74 89 75 - xdna->rpm_on = false; 76 90 pm_runtime_get_noresume(dev); 77 91 pm_runtime_forbid(dev); 78 92 }