Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amdgpu: rework ring reset backup and reemit v9

Store the start wptr and ib size in the IB fence. On queue
reset, save the ring contents of all IBs.

For reemit, reemit the entire IB state for non-guilty contexts.
For guilty contexts, replace the IB submission with nops, but reemit
the rest. Split the reemit per fence and when we reemit, update the
wptr with the new values from reemit. This allows us to reemit jobs
repeatedly as the wptrs get properly updated each time.

v2: further simplify the logic
v3: reemit vm state, not just vm fence
v4: just nop the IB and possibly the VM portion of the submission
v5: simplify the vm fence check
v6: split the vm and ib fences
v7: fix commit message
v8: use wptr rather than count_dw to calculate offsets
v9: fix missing documenation update spotted by the kernel test robot

Reviewed-by: Jesse Zhang <jesse.zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

+74 -117
+37 -62
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
··· 89 89 return seq; 90 90 } 91 91 92 - static void amdgpu_fence_save_fence_wptr_start(struct amdgpu_fence *af) 93 - { 94 - af->fence_wptr_start = af->ring->wptr; 95 - } 96 - 97 - static void amdgpu_fence_save_fence_wptr_end(struct amdgpu_fence *af) 98 - { 99 - af->fence_wptr_end = af->ring->wptr; 100 - } 101 - 102 92 /** 103 93 * amdgpu_fence_emit - emit a fence on the requested ring 104 94 * ··· 114 124 &ring->fence_drv.lock, 115 125 adev->fence_context + ring->idx, seq); 116 126 117 - amdgpu_fence_save_fence_wptr_start(af); 118 127 amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, 119 128 seq, flags | AMDGPU_FENCE_FLAG_INT); 120 - amdgpu_fence_save_fence_wptr_end(af); 121 - amdgpu_fence_save_wptr(af); 129 + 122 130 pm_runtime_get_noresume(adev_to_drm(adev)->dev); 123 131 ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; 124 132 if (unlikely(rcu_dereference_protected(*ptr, 1))) { ··· 228 240 229 241 do { 230 242 struct dma_fence *fence, **ptr; 231 - struct amdgpu_fence *am_fence; 232 243 233 244 ++last_seq; 234 245 last_seq &= drv->num_fences_mask; ··· 240 253 if (!fence) 241 254 continue; 242 255 243 - /* Save the wptr in the fence driver so we know what the last processed 244 - * wptr was. This is required for re-emitting the ring state for 245 - * queues that are reset but are not guilty and thus have no guilty fence. 246 - */ 247 - am_fence = container_of(fence, struct amdgpu_fence, base); 248 - drv->signalled_wptr = am_fence->wptr; 249 256 dma_fence_signal(fence); 250 257 dma_fence_put(fence); 251 258 pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); ··· 688 707 */ 689 708 690 709 /** 691 - * amdgpu_fence_driver_update_timedout_fence_state - Update fence state and set errors 710 + * amdgpu_ring_set_fence_errors_and_reemit - Set dma_fence errors and reemit 692 711 * 693 - * @af: fence of the ring to update 712 + * @ring: the ring to operate on 713 + * @guilty_fence: fence of the ring to update 694 714 * 695 715 */ 696 - void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af) 716 + void amdgpu_ring_set_fence_errors_and_reemit(struct amdgpu_ring *ring, 717 + struct amdgpu_fence *guilty_fence) 697 718 { 698 719 struct dma_fence *unprocessed; 699 720 struct dma_fence __rcu **ptr; 700 721 struct amdgpu_fence *fence; 701 - struct amdgpu_ring *ring = af->ring; 702 722 unsigned long flags; 703 723 u32 seq, last_seq; 704 - bool reemitted = false; 724 + unsigned int i; 725 + bool is_guilty_fence; 726 + bool is_guilty_context; 705 727 706 728 last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask; 707 729 seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask; 708 730 709 - /* mark all fences from the guilty context with an error */ 731 + ring->reemit = true; 732 + amdgpu_ring_alloc(ring, ring->ring_backup_entries_to_copy); 710 733 spin_lock_irqsave(&ring->fence_drv.lock, flags); 711 734 do { 712 735 last_seq++; ··· 722 737 723 738 if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) { 724 739 fence = container_of(unprocessed, struct amdgpu_fence, base); 740 + is_guilty_fence = fence == guilty_fence; 741 + is_guilty_context = fence->context == guilty_fence->context; 725 742 726 - if (fence->reemitted > 1) 727 - reemitted = true; 728 - else if (fence == af) 743 + /* mark all fences from the guilty context with an error */ 744 + if (is_guilty_fence) 729 745 dma_fence_set_error(&fence->base, -ETIME); 730 - else if (fence->context == af->context) 746 + else if (is_guilty_context) 731 747 dma_fence_set_error(&fence->base, -ECANCELED); 748 + 749 + /* reemit the packet stream and update wptrs */ 750 + fence->ib_wptr = ring->wptr; 751 + for (i = 0; i < fence->ib_dw_size; i++) { 752 + /* Skip the IB(s) for the guilty context. */ 753 + if (is_guilty_context && 754 + i >= fence->skip_ib_dw_start_offset && 755 + i < fence->skip_ib_dw_end_offset) 756 + amdgpu_ring_write(ring, ring->funcs->nop); 757 + else 758 + amdgpu_ring_write(ring, 759 + ring->ring_backup[fence->backup_idx + i]); 760 + } 732 761 } 733 762 rcu_read_unlock(); 734 763 } while (last_seq != seq); 735 764 spin_unlock_irqrestore(&ring->fence_drv.lock, flags); 736 - 737 - if (reemitted) { 738 - /* if we've already reemitted once then just cancel everything */ 739 - amdgpu_fence_driver_force_completion(af->ring); 740 - af->ring->ring_backup_entries_to_copy = 0; 741 - } 742 - } 743 - 744 - void amdgpu_fence_save_wptr(struct amdgpu_fence *af) 745 - { 746 - af->wptr = af->ring->wptr; 765 + amdgpu_ring_commit(ring); 766 + ring->reemit = false; 747 767 } 748 768 749 769 static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring, 750 - u64 start_wptr, u64 end_wptr) 770 + struct amdgpu_fence *af) 751 771 { 752 - unsigned int first_idx = start_wptr & ring->buf_mask; 753 - unsigned int last_idx = end_wptr & ring->buf_mask; 772 + unsigned int first_idx = af->ib_wptr & ring->buf_mask; 773 + unsigned int dw_size = af->ib_dw_size; 754 774 unsigned int i; 755 775 776 + af->backup_idx = ring->ring_backup_entries_to_copy; 756 777 /* Backup the contents of the ring buffer. */ 757 - for (i = first_idx; i != last_idx; ++i, i &= ring->buf_mask) 778 + for (i = first_idx; dw_size > 0; ++i, i &= ring->buf_mask, --dw_size) 758 779 ring->ring_backup[ring->ring_backup_entries_to_copy++] = ring->ring[i]; 759 780 } 760 781 ··· 770 779 struct dma_fence *unprocessed; 771 780 struct dma_fence __rcu **ptr; 772 781 struct amdgpu_fence *fence; 773 - u64 wptr; 774 782 u32 seq, last_seq; 775 783 776 784 last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask; 777 785 seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask; 778 - wptr = ring->fence_drv.signalled_wptr; 779 786 ring->ring_backup_entries_to_copy = 0; 780 787 781 788 do { ··· 787 798 if (unprocessed && !dma_fence_is_signaled(unprocessed)) { 788 799 fence = container_of(unprocessed, struct amdgpu_fence, base); 789 800 790 - /* save everything if the ring is not guilty, otherwise 791 - * just save the content from other contexts. 792 - */ 793 - if (!fence->reemitted && 794 - (!guilty_fence || (fence->context != guilty_fence->context))) { 795 - amdgpu_ring_backup_unprocessed_command(ring, wptr, 796 - fence->wptr); 797 - } else if (!fence->reemitted) { 798 - /* always save the fence */ 799 - amdgpu_ring_backup_unprocessed_command(ring, 800 - fence->fence_wptr_start, 801 - fence->fence_wptr_end); 802 - } 803 - wptr = fence->wptr; 804 - fence->reemitted++; 801 + amdgpu_ring_backup_unprocessed_command(ring, fence); 805 802 } 806 803 rcu_read_unlock(); 807 804 } while (last_seq != seq);
+18 -8
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
··· 129 129 struct amdgpu_ib *ib = &ibs[0]; 130 130 struct dma_fence *tmp = NULL; 131 131 struct amdgpu_fence *af; 132 + struct amdgpu_fence *vm_af; 132 133 bool need_ctx_switch; 133 134 struct amdgpu_vm *vm; 134 135 uint64_t fence_ctx; ··· 216 215 dma_fence_put(tmp); 217 216 } 218 217 219 - if (job) 218 + if (job) { 219 + vm_af = job->hw_vm_fence; 220 + /* VM sequence */ 221 + vm_af->ib_wptr = ring->wptr; 220 222 amdgpu_vm_flush(ring, job, need_pipe_sync); 223 + vm_af->ib_dw_size = 224 + amdgpu_ring_get_dw_distance(ring, vm_af->ib_wptr, ring->wptr); 225 + } 221 226 227 + /* IB sequence */ 228 + af->ib_wptr = ring->wptr; 222 229 amdgpu_ring_ib_begin(ring); 223 230 224 231 if (ring->funcs->insert_start) ··· 247 238 cond_exec = amdgpu_ring_init_cond_exec(ring, 248 239 ring->cond_exe_gpu_addr); 249 240 241 + /* Skip the IB for guilty contexts */ 242 + af->skip_ib_dw_start_offset = 243 + amdgpu_ring_get_dw_distance(ring, af->ib_wptr, ring->wptr); 250 244 amdgpu_device_flush_hdp(adev, ring); 251 245 252 246 if (need_ctx_switch) ··· 288 276 amdgpu_ring_emit_frame_cntl(ring, false, secure); 289 277 290 278 amdgpu_device_invalidate_hdp(adev, ring); 279 + /* Skip the IB for guilty contexts */ 280 + af->skip_ib_dw_end_offset = 281 + amdgpu_ring_get_dw_distance(ring, af->ib_wptr, ring->wptr); 291 282 292 283 if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE) 293 284 fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY; ··· 327 312 ring->funcs->emit_wave_limit(ring, false); 328 313 329 314 amdgpu_ring_ib_end(ring); 330 - /* Save the wptr associated with this fence. 331 - * This must be last for resets to work properly 332 - * as we need to save the wptr associated with this 333 - * fence so we know what rings contents to backup 334 - * after we reset the queue. 335 - */ 336 - amdgpu_fence_save_wptr(af); 315 + 316 + af->ib_dw_size = amdgpu_ring_get_dw_distance(ring, af->ib_wptr, ring->wptr); 337 317 338 318 amdgpu_ring_commit(ring); 339 319
+9 -37
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
··· 90 90 ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask; 91 91 92 92 /* Make sure we aren't trying to allocate more space 93 - * than the maximum for one submission 93 + * than the maximum for one submission. Skip for reemit 94 + * since we may be reemitting several submissions. 94 95 */ 95 - if (WARN_ON_ONCE(ndw > ring->max_dw)) 96 - return -ENOMEM; 96 + if (!ring->reemit) { 97 + if (WARN_ON_ONCE(ndw > ring->max_dw)) 98 + return -ENOMEM; 99 + } 97 100 98 101 ring->count_dw = ndw; 99 102 ring->wptr_old = ring->wptr; ··· 105 102 ring->funcs->begin_use(ring); 106 103 107 104 return 0; 108 - } 109 - 110 - /** 111 - * amdgpu_ring_alloc_reemit - allocate space on the ring buffer for reemit 112 - * 113 - * @ring: amdgpu_ring structure holding ring information 114 - * @ndw: number of dwords to allocate in the ring buffer 115 - * 116 - * Allocate @ndw dwords in the ring buffer (all asics). 117 - * doesn't check the max_dw limit as we may be reemitting 118 - * several submissions. 119 - */ 120 - static void amdgpu_ring_alloc_reemit(struct amdgpu_ring *ring, unsigned int ndw) 121 - { 122 - /* Align requested size with padding so unlock_commit can 123 - * pad safely */ 124 - ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask; 125 - 126 - ring->count_dw = ndw; 127 - ring->wptr_old = ring->wptr; 128 - 129 - if (ring->funcs->begin_use) 130 - ring->funcs->begin_use(ring); 131 105 } 132 106 133 107 /** ··· 855 875 int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring, 856 876 struct amdgpu_fence *guilty_fence) 857 877 { 858 - unsigned int i; 859 878 int r; 860 879 861 880 /* verify that the ring is functional */ ··· 862 883 if (r) 863 884 return r; 864 885 865 - /* set an error on all fences from the context */ 866 - if (guilty_fence) 867 - amdgpu_fence_driver_update_timedout_fence_state(guilty_fence); 868 - /* Re-emit the non-guilty commands */ 869 - if (ring->ring_backup_entries_to_copy) { 870 - amdgpu_ring_alloc_reemit(ring, ring->ring_backup_entries_to_copy); 871 - for (i = 0; i < ring->ring_backup_entries_to_copy; i++) 872 - amdgpu_ring_write(ring, ring->ring_backup[i]); 873 - amdgpu_ring_commit(ring); 874 - } 886 + /* set an error on all fences from the context and reemit */ 887 + amdgpu_ring_set_fence_errors_and_reemit(ring, guilty_fence); 888 + 875 889 return 0; 876 890 } 877 891
+10 -10
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
··· 121 121 /* sync_seq is protected by ring emission lock */ 122 122 uint32_t sync_seq; 123 123 atomic_t last_seq; 124 - u64 signalled_wptr; 125 124 bool initialized; 126 125 struct amdgpu_irq_src *irq_src; 127 126 unsigned irq_type; ··· 145 146 struct amdgpu_ring *ring; 146 147 ktime_t start_timestamp; 147 148 148 - /* wptr for the total submission for resets */ 149 - u64 wptr; 149 + /* location and size of the IB */ 150 + u64 ib_wptr; 151 + unsigned int ib_dw_size; 152 + unsigned int skip_ib_dw_start_offset; 153 + unsigned int skip_ib_dw_end_offset; 150 154 /* fence context for resets */ 151 155 u64 context; 152 - /* has this fence been reemitted */ 153 - unsigned int reemitted; 154 - /* wptr for the fence for the submission */ 155 - u64 fence_wptr_start; 156 - u64 fence_wptr_end; 156 + /* idx for ring backups */ 157 + unsigned int backup_idx; 157 158 }; 158 159 159 160 extern const struct drm_sched_backend_ops amdgpu_sched_ops; 160 161 161 162 void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error); 162 163 void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); 163 - void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af); 164 - void amdgpu_fence_save_wptr(struct amdgpu_fence *af); 164 + void amdgpu_ring_set_fence_errors_and_reemit(struct amdgpu_ring *ring, 165 + struct amdgpu_fence *guilty_fence); 165 166 166 167 int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); 167 168 int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, ··· 312 313 /* backups for resets */ 313 314 uint32_t *ring_backup; 314 315 unsigned int ring_backup_entries_to_copy; 316 + bool reemit; 315 317 unsigned rptr_offs; 316 318 u64 rptr_gpu_addr; 317 319 u32 *rptr_cpu_addr;