Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amdgpu: Create an option to disable soft recovery

Create a module option to disable soft recoveries on amdgpu, making
every recovery go through the device reset path. This option makes
easier to force device resets for testing and debugging purposes.

Signed-off-by: André Almeida <andrealmeid@igalia.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

André Almeida and committed by
Alex Deucher
ffde7210 887db1e4

+13 -1
+1
drivers/gpu/drm/amd/amdgpu/amdgpu.h
··· 1102 1102 /* Debug */ 1103 1103 bool debug_vm; 1104 1104 bool debug_largebar; 1105 + bool debug_disable_soft_recovery; 1105 1106 }; 1106 1107 1107 1108 static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
+7
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
··· 124 124 enum AMDGPU_DEBUG_MASK { 125 125 AMDGPU_DEBUG_VM = BIT(0), 126 126 AMDGPU_DEBUG_LARGEBAR = BIT(1), 127 + AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2), 127 128 }; 128 129 129 130 unsigned int amdgpu_vram_limit = UINT_MAX; ··· 946 945 * - 0x2: Enable simulating large-bar capability on non-large bar system. This 947 946 * limits the VRAM size reported to ROCm applications to the visible 948 947 * size, usually 256MB. 948 + * - 0x4: Disable GPU soft recovery, always do a full reset 949 949 */ 950 950 MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default"); 951 951 module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444); ··· 2065 2063 if (amdgpu_debug_mask & AMDGPU_DEBUG_LARGEBAR) { 2066 2064 pr_info("debug: enabled simulating large-bar capability on non-large bar system\n"); 2067 2065 adev->debug_largebar = true; 2066 + } 2067 + 2068 + if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY) { 2069 + pr_info("debug: soft reset for GPU recovery disabled\n"); 2070 + adev->debug_disable_soft_recovery = true; 2068 2071 } 2069 2072 } 2070 2073
+5 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
··· 434 434 struct dma_fence *fence) 435 435 { 436 436 unsigned long flags; 437 + ktime_t deadline; 437 438 438 - ktime_t deadline = ktime_add_us(ktime_get(), 10000); 439 + if (unlikely(ring->adev->debug_disable_soft_recovery)) 440 + return false; 441 + 442 + deadline = ktime_add_us(ktime_get(), 10000); 439 443 440 444 if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence) 441 445 return false;