Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amdgpu: reduce reset time

In multi container use case, reset time is important, so skip ring
tests and cp halt wait during ip suspending for reset as they are
going to fail and cost more time on reset

v2: add a hang flag to indicate the reset comes from a job timeout,
skip ring test and cp halt wait in this case

v3: move hang flag to adev

Signed-off-by: Victor Zhao <Victor.Zhao@amd.com>
Acked-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Victor Zhao and committed by
Alex Deucher
194eb174 72fadb13

+11 -3
+1
drivers/gpu/drm/amd/amdgpu/amdgpu.h
··· 1065 1065 struct work_struct reset_work; 1066 1066 1067 1067 uint32_t amdgpu_reset_level_mask; 1068 + bool job_hang; 1068 1069 }; 1069 1070 1070 1071 static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
··· 477 477 kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i], 478 478 RESET_QUEUES, 0, 0); 479 479 480 - if (adev->gfx.kiq.ring.sched.ready) 480 + if (adev->gfx.kiq.ring.sched.ready && !adev->job_hang) 481 481 r = amdgpu_ring_test_helper(kiq_ring); 482 482 spin_unlock(&adev->gfx.kiq.ring_lock); 483 483
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
··· 49 49 } 50 50 51 51 memset(&ti, 0, sizeof(struct amdgpu_task_info)); 52 + adev->job_hang = true; 52 53 53 54 if (amdgpu_gpu_recovery && 54 55 amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { ··· 84 83 } 85 84 86 85 exit: 86 + adev->job_hang = false; 87 87 drm_dev_exit(idx); 88 88 return DRM_GPU_SCHED_STAT_NOMINAL; 89 89 }
+7 -2
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
··· 5971 5971 WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); 5972 5972 } 5973 5973 5974 + if (adev->job_hang && !enable) 5975 + return 0; 5976 + 5974 5977 for (i = 0; i < adev->usec_timeout; i++) { 5975 5978 if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0) 5976 5979 break; ··· 7572 7569 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7573 7570 kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], 7574 7571 PREEMPT_QUEUES, 0, 0); 7575 - 7576 - return amdgpu_ring_test_helper(kiq_ring); 7572 + if (!adev->job_hang) 7573 + return amdgpu_ring_test_helper(kiq_ring); 7574 + else 7575 + return 0; 7577 7576 } 7578 7577 #endif 7579 7578