Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/msm: Defer VMA unmap for fb unpins

With the conversion to drm_gpuvm, we lost the lazy VMA cleanup, which
means that fb cleanup/unpin when pageflipping to new scanout buffers
immediately unmaps the scanout buffer. This is costly (with tlbinv,
it can be 4-6ms for a 1080p scanout buffer, and more for higher
resolutions)!

To avoid this, introduce a vma_ref, which is incremented whenever
userspace has a GEM handle or dma-buf fd. When unpinning if the
vm is the kms->vm we defer tearing down the VMA until the vma_ref
drops to zero. If the buffer is still part of a flip-chain then
userspace will be holding some sort of reference to the BO, either
via a GEM handle and/or dma-buf fd. So this avoids unmapping the VMA
when there is a strong possibility that it will be needed again.

Signed-off-by: Rob Clark <robin.clark@oss.qualcomm.com>
Tested-by: Antonino Maniscalco <antomani103@gmail.com>
Reviewed-by: Antonino Maniscalco <antomani103@gmail.com>
Patchwork: https://patchwork.freedesktop.org/patch/661538/

Rob Clark 3bebfd53 8d4c2171

+123 -26
+1
drivers/gpu/drm/msm/msm_drv.c
··· 837 837 .postclose = msm_postclose, 838 838 .dumb_create = msm_gem_dumb_create, 839 839 .dumb_map_offset = msm_gem_dumb_map_offset, 840 + .gem_prime_import = msm_gem_prime_import, 840 841 .gem_prime_import_sg_table = msm_gem_prime_import_sg_table, 841 842 #ifdef CONFIG_DEBUG_FS 842 843 .debugfs_init = msm_debugfs_init,
+1
drivers/gpu/drm/msm/msm_drv.h
··· 269 269 struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj); 270 270 int msm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map); 271 271 void msm_gem_prime_vunmap(struct drm_gem_object *obj, struct iosys_map *map); 272 + struct drm_gem_object *msm_gem_prime_import(struct drm_device *dev, struct dma_buf *buf); 272 273 struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, 273 274 struct dma_buf_attachment *attach, struct sg_table *sg); 274 275 struct dma_buf *msm_gem_prime_export(struct drm_gem_object *obj, int flags);
+4 -1
drivers/gpu/drm/msm/msm_fb.c
··· 89 89 return 0; 90 90 91 91 for (i = 0; i < n; i++) { 92 + msm_gem_vma_get(fb->obj[i]); 92 93 ret = msm_gem_get_and_pin_iova(fb->obj[i], vm, &msm_fb->iova[i]); 93 94 drm_dbg_state(fb->dev, "FB[%u]: iova[%d]: %08llx (%d)\n", 94 95 fb->base.id, i, msm_fb->iova[i], ret); ··· 115 114 116 115 memset(msm_fb->iova, 0, sizeof(msm_fb->iova)); 117 116 118 - for (i = 0; i < n; i++) 117 + for (i = 0; i < n; i++) { 119 118 msm_gem_unpin_iova(fb->obj[i], vm); 119 + msm_gem_vma_put(fb->obj[i]); 120 + } 120 121 } 121 122 122 123 uint32_t msm_framebuffer_iova(struct drm_framebuffer *fb, int plane)
+37 -23
drivers/gpu/drm/msm/msm_gem.c
··· 19 19 #include "msm_drv.h" 20 20 #include "msm_gem.h" 21 21 #include "msm_gpu.h" 22 + #include "msm_kms.h" 22 23 23 24 static void update_device_mem(struct msm_drm_private *priv, ssize_t size) 24 25 { ··· 40 39 41 40 static int msm_gem_open(struct drm_gem_object *obj, struct drm_file *file) 42 41 { 42 + msm_gem_vma_get(obj); 43 43 update_ctx_mem(file, obj->size); 44 44 return 0; 45 45 } ··· 48 46 static void put_iova_spaces(struct drm_gem_object *obj, struct drm_gpuvm *vm, 49 47 bool close, const char *reason); 50 48 51 - static void detach_vm(struct drm_gem_object *obj, struct drm_gpuvm *vm) 52 - { 53 - msm_gem_assert_locked(obj); 54 - drm_gpuvm_resv_assert_held(vm); 55 - 56 - struct drm_gpuvm_bo *vm_bo = drm_gpuvm_bo_find(vm, obj); 57 - if (vm_bo) { 58 - struct drm_gpuva *vma; 59 - 60 - drm_gpuvm_bo_for_each_va (vma, vm_bo) { 61 - if (vma->vm != vm) 62 - continue; 63 - msm_gem_vma_unmap(vma, "detach"); 64 - msm_gem_vma_close(vma); 65 - break; 66 - } 67 - 68 - drm_gpuvm_bo_put(vm_bo); 69 - } 70 - } 71 - 72 49 static void msm_gem_close(struct drm_gem_object *obj, struct drm_file *file) 73 50 { 74 51 struct msm_context *ctx = file->driver_priv; 75 52 struct drm_exec exec; 76 53 77 54 update_ctx_mem(file, -obj->size); 55 + msm_gem_vma_put(obj); 78 56 79 57 /* 80 58 * If VM isn't created yet, nothing to cleanup. And in fact calling ··· 81 99 82 100 msm_gem_lock_vm_and_obj(&exec, obj, ctx->vm); 83 101 put_iova_spaces(obj, ctx->vm, true, "close"); 84 - detach_vm(obj, ctx->vm); 102 + drm_exec_fini(&exec); /* drop locks */ 103 + } 104 + 105 + /* 106 + * Get/put for kms->vm VMA 107 + */ 108 + 109 + void msm_gem_vma_get(struct drm_gem_object *obj) 110 + { 111 + atomic_inc(&to_msm_bo(obj)->vma_ref); 112 + } 113 + 114 + void msm_gem_vma_put(struct drm_gem_object *obj) 115 + { 116 + struct msm_drm_private *priv = obj->dev->dev_private; 117 + struct drm_exec exec; 118 + 119 + if (atomic_dec_return(&to_msm_bo(obj)->vma_ref)) 120 + return; 121 + 122 + if (!priv->kms) 123 + return; 124 + 125 + msm_gem_lock_vm_and_obj(&exec, obj, priv->kms->vm); 126 + put_iova_spaces(obj, priv->kms->vm, true, "vma_put"); 85 127 drm_exec_fini(&exec); /* drop locks */ 86 128 } 87 129 ··· 662 656 return ret; 663 657 } 664 658 659 + static bool is_kms_vm(struct drm_gpuvm *vm) 660 + { 661 + struct msm_drm_private *priv = vm->drm->dev_private; 662 + 663 + return priv->kms && (priv->kms->vm == vm); 664 + } 665 + 665 666 /* 666 667 * Unpin a iova by updating the reference counts. The memory isn't actually 667 668 * purged until something else (shrinker, mm_notifier, destroy, etc) decides ··· 684 671 if (vma) { 685 672 msm_gem_unpin_locked(obj); 686 673 } 687 - detach_vm(obj, vm); 674 + if (!is_kms_vm(vm)) 675 + put_iova_spaces(obj, vm, true, "close"); 688 676 drm_exec_fini(&exec); /* drop locks */ 689 677 } 690 678
+28
drivers/gpu/drm/msm/msm_gem.h
··· 211 211 * Protected by LRU lock. 212 212 */ 213 213 int pin_count; 214 + 215 + /** 216 + * @vma_ref: Reference count of VMA users. 217 + * 218 + * With the vm_bo/vma holding a reference to the GEM object, we'd 219 + * otherwise have to actively tear down a VMA when, for example, 220 + * a buffer is unpinned for scanout, vs. the pre-drm_gpuvm approach 221 + * where a VMA did not hold a reference to the BO, but instead was 222 + * implicitly torn down when the BO was freed. 223 + * 224 + * To regain the lazy VMA teardown, we use the @vma_ref. It is 225 + * incremented for any of the following: 226 + * 227 + * 1) the BO is exported as a dma_buf 228 + * 2) the BO has open userspace handle 229 + * 230 + * All of those conditions will hold an reference to the BO, 231 + * preventing it from being freed. So lazily keeping around the 232 + * VMA will not prevent the BO from being freed. (Or rather, the 233 + * reference loop is harmless in this case.) 234 + * 235 + * When the @vma_ref drops to zero, then kms->vm VMA will be 236 + * torn down. 237 + */ 238 + atomic_t vma_ref; 214 239 }; 215 240 #define to_msm_bo(x) container_of(x, struct msm_gem_object, base) 241 + 242 + void msm_gem_vma_get(struct drm_gem_object *obj); 243 + void msm_gem_vma_put(struct drm_gem_object *obj); 216 244 217 245 uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj); 218 246 int msm_gem_prot(struct drm_gem_object *obj);
+52 -2
drivers/gpu/drm/msm/msm_gem_prime.c
··· 6 6 7 7 #include <linux/dma-buf.h> 8 8 9 + #include <drm/drm_drv.h> 9 10 #include <drm/drm_prime.h> 10 11 11 12 #include "msm_drv.h" ··· 43 42 msm_gem_put_vaddr_locked(obj); 44 43 } 45 44 45 + static void msm_gem_dmabuf_release(struct dma_buf *dma_buf) 46 + { 47 + struct drm_gem_object *obj = dma_buf->priv; 48 + 49 + msm_gem_vma_put(obj); 50 + drm_gem_dmabuf_release(dma_buf); 51 + } 52 + 53 + static const struct dma_buf_ops msm_gem_prime_dmabuf_ops = { 54 + .attach = drm_gem_map_attach, 55 + .detach = drm_gem_map_detach, 56 + .map_dma_buf = drm_gem_map_dma_buf, 57 + .unmap_dma_buf = drm_gem_unmap_dma_buf, 58 + .release = msm_gem_dmabuf_release, 59 + .mmap = drm_gem_dmabuf_mmap, 60 + .vmap = drm_gem_dmabuf_vmap, 61 + .vunmap = drm_gem_dmabuf_vunmap, 62 + }; 63 + 64 + struct drm_gem_object *msm_gem_prime_import(struct drm_device *dev, 65 + struct dma_buf *buf) 66 + { 67 + if (buf->ops == &msm_gem_prime_dmabuf_ops) { 68 + struct drm_gem_object *obj = buf->priv; 69 + if (obj->dev == dev) { 70 + /* 71 + * Importing dmabuf exported from our own gem increases 72 + * refcount on gem itself instead of f_count of dmabuf. 73 + */ 74 + drm_gem_object_get(obj); 75 + return obj; 76 + } 77 + } 78 + 79 + return drm_gem_prime_import(dev, buf); 80 + } 81 + 46 82 struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, 47 83 struct dma_buf_attachment *attach, struct sg_table *sg) 48 84 { 49 85 return msm_gem_import(dev, attach->dmabuf, sg); 50 86 } 51 87 52 - 53 88 struct dma_buf *msm_gem_prime_export(struct drm_gem_object *obj, int flags) 54 89 { 55 90 if (to_msm_bo(obj)->flags & MSM_BO_NO_SHARE) 56 91 return ERR_PTR(-EPERM); 57 92 58 - return drm_gem_prime_export(obj, flags); 93 + msm_gem_vma_get(obj); 94 + 95 + struct drm_device *dev = obj->dev; 96 + struct dma_buf_export_info exp_info = { 97 + .exp_name = KBUILD_MODNAME, /* white lie for debug */ 98 + .owner = dev->driver->fops->owner, 99 + .ops = &msm_gem_prime_dmabuf_ops, 100 + .size = obj->size, 101 + .flags = flags, 102 + .priv = obj, 103 + .resv = obj->resv, 104 + }; 105 + 106 + return drm_gem_dmabuf_export(dev, &exp_info); 59 107 } 60 108 61 109 int msm_gem_prime_pin(struct drm_gem_object *obj)