drm/msm: Add VM_BIND throttling

A large number of (unsorted or separate) small (<2MB) mappings can cause
a lot of, probably unnecessary, prealloc pages. Ie. a single 4k page
size mapping will pre-allocate 3 pages (for levels 2-4) for the
pagetable. Which can chew up a large amount of unneeded memory. So add
a mechanism to put an upper bound on the # of pre-alloc pages.

Signed-off-by: Rob Clark <robin.clark@oss.qualcomm.com>
Tested-by: Antonino Maniscalco <antomani103@gmail.com>
Reviewed-by: Antonino Maniscalco <antomani103@gmail.com>
Patchwork: https://patchwork.freedesktop.org/patch/661529/

Rob Clark 11 months ago b74fae54 3bebfd53

+46 -2

2 changed files

expand all

drivers

gpu

drm

msm

msm_gem.h

msm_gem_vma.c

+20

drivers/gpu/drm/msm/msm_gem.h

··· 76 76 struct drm_gpu_scheduler sched; 77 77 78 78 /** 79 + * @prealloc_throttle: Used to throttle VM_BIND ops if too much pre- 80 + * allocated memory is in flight. 81 + * 82 + * Because we have to pre-allocate pgtable pages for the worst case 83 + * (ie. new mappings do not share any PTEs with existing mappings) 84 + * we could end up consuming a lot of resources transiently. The 85 + * prealloc_throttle puts an upper bound on that. 86 + */ 87 + struct { 88 + /** @wait: Notified when preallocated resources are released */ 89 + wait_queue_head_t wait; 90 + 91 + /** 92 + * @in_flight: The # of preallocated pgtable pages in-flight 93 + * for queued VM_BIND jobs. 94 + */ 95 + atomic_t in_flight; 96 + } prealloc_throttle; 97 + 98 + /** 79 99 * @mm: Memory management for kernel managed VA allocations 80 100 * 81 101 * Only used for kernel managed VMs, unused for user managed VMs.

+26 -2

drivers/gpu/drm/msm/msm_gem_vma.c

··· 705 705 706 706 vm->mmu->funcs->prealloc_cleanup(vm->mmu, &job->prealloc); 707 707 708 + atomic_sub(job->prealloc.count, &vm->prealloc_throttle.in_flight); 709 + 708 710 drm_sched_job_cleanup(_job); 709 711 710 712 job_foreach_bo (obj, job) ··· 722 720 list_del(&op->node); 723 721 kfree(op); 724 722 } 723 + 724 + wake_up(&vm->prealloc_throttle.wait); 725 725 726 726 kfree(job); 727 727 } ··· 787 783 ret = drm_sched_init(&vm->sched, &args); 788 784 if (ret) 789 785 goto err_free_dummy; 786 + 787 + init_waitqueue_head(&vm->prealloc_throttle.wait); 790 788 } 791 789 792 790 drm_gpuvm_init(&vm->base, name, flags, drm, dummy_gem, ··· 1096 1090 * them as a single mapping. Otherwise the prealloc_count() will not realize 1097 1091 * they can share pagetable pages and vastly overcount. 1098 1092 */ 1099 - static void 1093 + static int 1100 1094 vm_bind_prealloc_count(struct msm_vm_bind_job *job) 1101 1095 { 1102 1096 struct msm_vm_bind_op *first = NULL, *last = NULL; 1097 + struct msm_gem_vm *vm = to_msm_vm(job->vm); 1098 + int ret; 1103 1099 1104 1100 for (int i = 0; i < job->nr_ops; i++) { 1105 1101 struct msm_vm_bind_op *op = &job->ops[i]; ··· 1130 1122 1131 1123 /* Flush the remaining range: */ 1132 1124 prealloc_count(job, first, last); 1125 + 1126 + /* 1127 + * Now that we know the needed amount to pre-alloc, throttle on pending 1128 + * VM_BIND jobs if we already have too much pre-alloc memory in flight 1129 + */ 1130 + ret = wait_event_interruptible( 1131 + vm->prealloc_throttle.wait, 1132 + atomic_read(&vm->prealloc_throttle.in_flight) <= 1024); 1133 + if (ret) 1134 + return ret; 1135 + 1136 + atomic_add(job->prealloc.count, &vm->prealloc_throttle.in_flight); 1137 + 1138 + return 0; 1133 1139 } 1134 1140 1135 1141 /* ··· 1434 1412 if (ret) 1435 1413 goto out_unlock; 1436 1414 1437 - vm_bind_prealloc_count(job); 1415 + ret = vm_bind_prealloc_count(job); 1416 + if (ret) 1417 + goto out_unlock; 1438 1418 1439 1419 struct drm_exec exec; 1440 1420 unsigned flags = DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT;

Configure Feed

Configure Feed