Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amdgpu: don't reemit ring contents more than once

If we cancel a bad job and reemit the ring contents, and
we get another timeout, cancel everything rather than reemitting.
The wptr markers are only relevant for the original emit. If
we reemit, the wptr markers are no longer correct.

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

+19 -5
+17 -5
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
··· 709 709 struct amdgpu_ring *ring = af->ring; 710 710 unsigned long flags; 711 711 u32 seq, last_seq; 712 + bool reemitted = false; 712 713 713 714 last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask; 714 715 seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask; ··· 727 726 if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) { 728 727 fence = container_of(unprocessed, struct amdgpu_fence, base); 729 728 730 - if (fence == af) 729 + if (fence->reemitted > 1) 730 + reemitted = true; 731 + else if (fence == af) 731 732 dma_fence_set_error(&fence->base, -ETIME); 732 733 else if (fence->context == af->context) 733 734 dma_fence_set_error(&fence->base, -ECANCELED); ··· 737 734 rcu_read_unlock(); 738 735 } while (last_seq != seq); 739 736 spin_unlock_irqrestore(&ring->fence_drv.lock, flags); 740 - /* signal the guilty fence */ 741 - amdgpu_fence_write(ring, (u32)af->base.seqno); 742 - amdgpu_fence_process(ring); 737 + 738 + if (reemitted) { 739 + /* if we've already reemitted once then just cancel everything */ 740 + amdgpu_fence_driver_force_completion(af->ring); 741 + af->ring->ring_backup_entries_to_copy = 0; 742 + } else { 743 + /* signal the guilty fence */ 744 + amdgpu_fence_write(ring, (u32)af->base.seqno); 745 + amdgpu_fence_process(ring); 746 + } 743 747 } 744 748 745 749 void amdgpu_fence_save_wptr(struct amdgpu_fence *af) ··· 794 784 /* save everything if the ring is not guilty, otherwise 795 785 * just save the content from other contexts. 796 786 */ 797 - if (!guilty_fence || (fence->context != guilty_fence->context)) 787 + if (!fence->reemitted && 788 + (!guilty_fence || (fence->context != guilty_fence->context))) 798 789 amdgpu_ring_backup_unprocessed_command(ring, wptr, 799 790 fence->wptr); 800 791 wptr = fence->wptr; 792 + fence->reemitted++; 801 793 } 802 794 rcu_read_unlock(); 803 795 } while (last_seq != seq);
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
··· 150 150 u64 wptr; 151 151 /* fence context for resets */ 152 152 u64 context; 153 + /* has this fence been reemitted */ 154 + unsigned int reemitted; 153 155 }; 154 156 155 157 extern const struct drm_sched_backend_ops amdgpu_sched_ops;