drm/amdgpu: rework TLB flushing · tjh.dev/kernel@5255e14

+4 -4

drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

··· 810 810 if (r) 811 811 return r; 812 812 813 - r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update); 813 + r = amdgpu_sync_fence(&p->job->sync, fpriv->prt_va->last_pt_update); 814 814 if (r) 815 815 return r; 816 816 ··· 821 821 if (r) 822 822 return r; 823 823 824 - r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update); 824 + r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); 825 825 if (r) 826 826 return r; 827 827 } ··· 840 840 if (r) 841 841 return r; 842 842 843 - r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update); 843 + r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); 844 844 if (r) 845 845 return r; 846 846 } ··· 853 853 if (r) 854 854 return r; 855 855 856 - r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update); 856 + r = amdgpu_sync_fence(&p->job->sync, vm->last_update); 857 857 if (r) 858 858 return r; 859 859

+3 -3

drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c

··· 277 277 unsigned vmhub = ring->funcs->vmhub; 278 278 uint64_t fence_context = adev->fence_context + ring->idx; 279 279 bool needs_flush = vm->use_cpu_for_update; 280 - uint64_t updates = sync->last_vm_update; 280 + uint64_t updates = amdgpu_vm_tlb_seq(vm); 281 281 int r; 282 282 283 283 *id = vm->reserved_vmid[vmhub]; ··· 338 338 unsigned vmhub = ring->funcs->vmhub; 339 339 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 340 340 uint64_t fence_context = adev->fence_context + ring->idx; 341 - uint64_t updates = sync->last_vm_update; 341 + uint64_t updates = amdgpu_vm_tlb_seq(vm); 342 342 int r; 343 343 344 344 job->vm_needs_flush = vm->use_cpu_for_update; ··· 426 426 if (r) 427 427 goto error; 428 428 429 - id->flushed_updates = sync->last_vm_update; 429 + id->flushed_updates = amdgpu_vm_tlb_seq(vm); 430 430 job->vm_needs_flush = true; 431 431 } 432 432

-20

drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c

··· 51 51 void amdgpu_sync_create(struct amdgpu_sync *sync) 52 52 { 53 53 hash_init(sync->fences); 54 - sync->last_vm_update = 0; 55 54 } 56 55 57 56 /** ··· 168 169 hash_add(sync->fences, &e->node, f->context); 169 170 e->fence = dma_fence_get(f); 170 171 return 0; 171 - } 172 - 173 - /** 174 - * amdgpu_sync_vm_fence - remember to sync to this VM fence 175 - * 176 - * @sync: sync object to add fence to 177 - * @fence: the VM fence to add 178 - * 179 - * Add the fence to the sync object and remember it as VM update. 180 - */ 181 - int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence) 182 - { 183 - if (!fence) 184 - return 0; 185 - 186 - sync->last_vm_update = max(sync->last_vm_update, fence->seqno); 187 - return amdgpu_sync_fence(sync, fence); 188 172 } 189 173 190 174 /* Determine based on the owner and mode if we should sync to a fence or not */ ··· 357 375 kmem_cache_free(amdgpu_sync_slab, e); 358 376 } 359 377 } 360 - 361 - clone->last_vm_update = source->last_vm_update; 362 378 363 379 return 0; 364 380 }

-2

drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h

··· 43 43 */ 44 44 struct amdgpu_sync { 45 45 DECLARE_HASHTABLE(fences, 4); 46 - uint64_t last_vm_update; 47 46 }; 48 47 49 48 void amdgpu_sync_create(struct amdgpu_sync *sync); 50 49 int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f); 51 - int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence); 52 50 int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, 53 51 struct dma_resv *resv, enum amdgpu_sync_mode mode, 54 52 void *owner);

+54 -3

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

··· 89 89 }; 90 90 91 91 /** 92 + * amdgpu_vm_tlb_seq_cb - Helper to increment the TLB flush sequence 93 + */ 94 + struct amdgpu_vm_tlb_seq_cb { 95 + /** 96 + * @vm: pointer to the amdgpu_vm structure to set the fence sequence on 97 + */ 98 + struct amdgpu_vm *vm; 99 + 100 + /** 101 + * @cb: callback 102 + */ 103 + struct dma_fence_cb cb; 104 + }; 105 + 106 + /** 92 107 * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping 93 108 * 94 109 * @adev: amdgpu_device pointer ··· 776 761 } 777 762 778 763 /** 764 + * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence 765 + * @fence: unused 766 + * @cb: the callback structure 767 + * 768 + * Increments the tlb sequence to make sure that future CS execute a VM flush. 769 + */ 770 + static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, 771 + struct dma_fence_cb *cb) 772 + { 773 + struct amdgpu_vm_tlb_seq_cb *tlb_cb; 774 + 775 + tlb_cb = container_of(cb, typeof(*tlb_cb), cb); 776 + atomic64_inc(&tlb_cb->vm->tlb_seq); 777 + kfree(tlb_cb); 778 + } 779 + 780 + /** 779 781 * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table 780 782 * 781 783 * @adev: amdgpu_device pointer of the VM ··· 827 795 bool *table_freed) 828 796 { 829 797 struct amdgpu_vm_update_params params; 798 + struct amdgpu_vm_tlb_seq_cb *tlb_cb; 830 799 struct amdgpu_res_cursor cursor; 831 800 enum amdgpu_sync_mode sync_mode; 832 801 int r, idx; 833 802 834 803 if (!drm_dev_enter(adev_to_drm(adev), &idx)) 835 804 return -ENODEV; 805 + 806 + tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL); 807 + if (!tlb_cb) { 808 + r = -ENOMEM; 809 + goto error_unlock; 810 + } 836 811 837 812 memset(&params, 0, sizeof(params)); 838 813 params.adev = adev; ··· 859 820 amdgpu_vm_eviction_lock(vm); 860 821 if (vm->evicting) { 861 822 r = -EBUSY; 862 - goto error_unlock; 823 + goto error_free; 863 824 } 864 825 865 826 if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) { ··· 872 833 873 834 r = vm->update_funcs->prepare(&params, resv, sync_mode); 874 835 if (r) 875 - goto error_unlock; 836 + goto error_free; 876 837 877 838 amdgpu_res_first(pages_addr ? NULL : res, offset, 878 839 (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, &cursor); ··· 921 882 tmp = start + num_entries; 922 883 r = amdgpu_vm_ptes_update(&params, start, tmp, addr, flags); 923 884 if (r) 924 - goto error_unlock; 885 + goto error_free; 925 886 926 887 amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE); 927 888 start = tmp; ··· 929 890 930 891 r = vm->update_funcs->commit(&params, fence); 931 892 893 + if (!unlocked && (!(flags & AMDGPU_PTE_VALID) || params.table_freed)) { 894 + tlb_cb->vm = vm; 895 + if (!fence || !*fence || 896 + dma_fence_add_callback(*fence, &tlb_cb->cb, 897 + amdgpu_vm_tlb_seq_cb)) 898 + amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); 899 + tlb_cb = NULL; 900 + } 901 + 932 902 if (table_freed) 933 903 *table_freed = *table_freed || params.table_freed; 904 + 905 + error_free: 906 + kfree(tlb_cb); 934 907 935 908 error_unlock: 936 909 amdgpu_vm_eviction_unlock(vm);

+15

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

··· 284 284 struct drm_sched_entity immediate; 285 285 struct drm_sched_entity delayed; 286 286 287 + /* Last finished delayed update */ 288 + atomic64_t tlb_seq; 289 + 287 290 /* Last unlocked submission to the scheduler entities */ 288 291 struct dma_fence *last_unlocked; 289 292 ··· 480 477 #if defined(CONFIG_DEBUG_FS) 481 478 void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m); 482 479 #endif 480 + 481 + /** 482 + * amdgpu_vm_tlb_seq - return tlb flush sequence number 483 + * @vm: the amdgpu_vm structure to query 484 + * 485 + * Returns the tlb flush sequence number which indicates that the VM TLBs needs 486 + * to be invalidated whenever the sequence number change. 487 + */ 488 + static inline uint64_t amdgpu_vm_tlb_seq(struct amdgpu_vm *vm) 489 + { 490 + return atomic64_read(&vm->tlb_seq); 491 + } 483 492 484 493 #endif

Configure Feed

Configure Feed