Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/panthor: Add flag to map GEM object Write-Back Cacheable

Will be used by the UMD to optimize CPU accesses to buffers
that are frequently read by the CPU, or on which the access
pattern makes non-cacheable mappings inefficient.

Mapping buffers CPU-cached implies taking care of the CPU
cache maintenance in the UMD, unless the GPU is IO coherent.

v2:
- Add more to the commit message
- Tweak the doc
- Make sure we sync the section of the BO pointing to the CS
syncobj before we read its seqno

v3:
- Fix formatting/spelling issues

v4:
- Add Steve's R-b

v5:
- Drop Steve's R-b (changes in the ioctl semantics requiring
new review)

v6:
- Fix the uAPI doc
- Fix inverted logic in some comment

v7:
- No changes

v8:
- Collect R-b

Signed-off-by: Loïc Molinari <loic.molinari@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Link: https://patch.msgid.link/20251208100841.730527-7-boris.brezillon@collabora.com
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>

authored by

Loïc Molinari and committed by
Boris Brezillon
cd2c9c30 c146c82f

+66 -5
+6 -1
drivers/gpu/drm/panthor/panthor_drv.c
··· 902 902 return panthor_vm_pool_destroy_vm(pfile->vms, args->id); 903 903 } 904 904 905 - #define PANTHOR_BO_FLAGS DRM_PANTHOR_BO_NO_MMAP 905 + #define PANTHOR_BO_FLAGS (DRM_PANTHOR_BO_NO_MMAP | \ 906 + DRM_PANTHOR_BO_WB_MMAP) 906 907 907 908 static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data, 908 909 struct drm_file *file) ··· 921 920 ret = -EINVAL; 922 921 goto out_dev_exit; 923 922 } 923 + 924 + if ((args->flags & DRM_PANTHOR_BO_NO_MMAP) && 925 + (args->flags & DRM_PANTHOR_BO_WB_MMAP)) 926 + return -EINVAL; 924 927 925 928 if (args->exclusive_vm_id) { 926 929 vm = panthor_vm_pool_get_vm(pfile->vms, args->exclusive_vm_id);
+35 -2
drivers/gpu/drm/panthor/panthor_gem.c
··· 77 77 static void panthor_gem_debugfs_bo_init(struct panthor_gem_object *bo) {} 78 78 #endif 79 79 80 + static bool 81 + should_map_wc(struct panthor_gem_object *bo, struct panthor_vm *exclusive_vm) 82 + { 83 + struct panthor_device *ptdev = container_of(bo->base.base.dev, struct panthor_device, base); 84 + 85 + /* We can't do uncached mappings if the device is coherent, 86 + * because the zeroing done by the shmem layer at page allocation 87 + * time happens on a cached mapping which isn't CPU-flushed (at least 88 + * not on Arm64 where the flush is deferred to PTE setup time, and 89 + * only done conditionally based on the mapping permissions). We can't 90 + * rely on dma_map_sgtable()/dma_sync_sgtable_for_xxx() either to flush 91 + * those, because they are NOPed if dma_dev_coherent() returns true. 92 + * 93 + * FIXME: Note that this problem is going to pop up again when we 94 + * decide to support mapping buffers with the NO_MMAP flag as 95 + * non-shareable (AKA buffers accessed only by the GPU), because we 96 + * need the same CPU flush to happen after page allocation, otherwise 97 + * there's a risk of data leak or late corruption caused by a dirty 98 + * cacheline being evicted. At this point we'll need a way to force 99 + * CPU cache maintenance regardless of whether the device is coherent 100 + * or not. 101 + */ 102 + if (ptdev->coherent) 103 + return false; 104 + 105 + /* Cached mappings are explicitly requested, so no write-combine. */ 106 + if (bo->flags & DRM_PANTHOR_BO_WB_MMAP) 107 + return false; 108 + 109 + /* The default is write-combine. */ 110 + return true; 111 + } 112 + 80 113 static void panthor_gem_free_object(struct drm_gem_object *obj) 81 114 { 82 115 struct panthor_gem_object *bo = to_panthor_bo(obj); ··· 196 163 bo = to_panthor_bo(&obj->base); 197 164 kbo->obj = &obj->base; 198 165 bo->flags = bo_flags; 166 + bo->base.map_wc = should_map_wc(bo, vm); 199 167 bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm); 200 168 drm_gem_object_get(bo->exclusive_vm_root_gem); 201 169 bo->base.base.resv = bo->exclusive_vm_root_gem->resv; ··· 397 363 */ 398 364 struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size) 399 365 { 400 - struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); 401 366 struct panthor_gem_object *obj; 402 367 403 368 obj = kzalloc(sizeof(*obj), GFP_KERNEL); ··· 404 371 return ERR_PTR(-ENOMEM); 405 372 406 373 obj->base.base.funcs = &panthor_gem_funcs; 407 - obj->base.map_wc = !ptdev->coherent; 408 374 mutex_init(&obj->label.lock); 409 375 410 376 panthor_gem_debugfs_bo_init(obj); ··· 438 406 439 407 bo = to_panthor_bo(&shmem->base); 440 408 bo->flags = flags; 409 + bo->base.map_wc = should_map_wc(bo, exclusive_vm); 441 410 442 411 if (exclusive_vm) { 443 412 bo->exclusive_vm_root_gem = panthor_vm_root_gem(exclusive_vm);
+16 -2
drivers/gpu/drm/panthor/panthor_sched.c
··· 863 863 struct iosys_map map; 864 864 int ret; 865 865 866 - if (queue->syncwait.kmap) 867 - return queue->syncwait.kmap + queue->syncwait.offset; 866 + if (queue->syncwait.kmap) { 867 + bo = container_of(queue->syncwait.obj, 868 + struct panthor_gem_object, base.base); 869 + goto out_sync; 870 + } 868 871 869 872 bo = panthor_vm_get_bo_for_va(group->vm, 870 873 queue->syncwait.gpu_va, ··· 883 880 queue->syncwait.kmap = map.vaddr; 884 881 if (drm_WARN_ON(&ptdev->base, !queue->syncwait.kmap)) 885 882 goto err_put_syncwait_obj; 883 + 884 + out_sync: 885 + /* Make sure the CPU caches are invalidated before the seqno is read. 886 + * drm_gem_shmem_sync() is a NOP if map_wc=true, so no need to check 887 + * it here. 888 + */ 889 + panthor_gem_sync(&bo->base.base, queue->syncwait.offset, 890 + queue->syncwait.sync64 ? 891 + sizeof(struct panthor_syncobj_64b) : 892 + sizeof(struct panthor_syncobj_32b), 893 + DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE); 886 894 887 895 return queue->syncwait.kmap + queue->syncwait.offset; 888 896
+9
include/uapi/drm/panthor_drm.h
··· 681 681 enum drm_panthor_bo_flags { 682 682 /** @DRM_PANTHOR_BO_NO_MMAP: The buffer object will never be CPU-mapped in userspace. */ 683 683 DRM_PANTHOR_BO_NO_MMAP = (1 << 0), 684 + 685 + /** 686 + * @DRM_PANTHOR_BO_WB_MMAP: Force "Write-Back Cacheable" CPU mapping. 687 + * 688 + * CPU map the buffer object in userspace by forcing the "Write-Back 689 + * Cacheable" cacheability attribute. The mapping otherwise uses the 690 + * "Non-Cacheable" attribute if the GPU is not IO coherent. 691 + */ 692 + DRM_PANTHOR_BO_WB_MMAP = (1 << 1), 684 693 }; 685 694 686 695 /**