Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'drm-xe-next-2026-03-26-1' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next

Hi Dave and Sima,

Here goes our late, final drm-xe-next PR towards 7.1. We just purgeable
BO uAPI in today, hence the late pull.

In the big things we have:
- Add support for purgeable buffer objects

Thanks,
Matt

UAPI Changes:
- Add support for purgeable buffer objects (Arvind, Himal)

Driver Changes:
- Remove useless comment (Maarten)
- Issue GGTT invalidation under lock in ggtt_node_remove (Brost, Fixes)
- Fix mismatched include guards in header files (Shuicheng)

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/acX4fWxPkZrrfwnT@gsse-cloud1.jf.intel.com

+814 -68
+181 -13
drivers/gpu/drm/xe/xe_bo.c
··· 838 838 return 0; 839 839 } 840 840 841 + /** 842 + * xe_bo_set_purgeable_shrinker() - Update shrinker accounting for purgeable state 843 + * @bo: Buffer object 844 + * @new_state: New purgeable state being set 845 + * 846 + * Transfers pages between shrinkable and purgeable buckets when the BO 847 + * purgeable state changes. Called automatically from xe_bo_set_purgeable_state(). 848 + */ 849 + static void xe_bo_set_purgeable_shrinker(struct xe_bo *bo, 850 + enum xe_madv_purgeable_state new_state) 851 + { 852 + struct ttm_buffer_object *ttm_bo = &bo->ttm; 853 + struct ttm_tt *tt = ttm_bo->ttm; 854 + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); 855 + struct xe_ttm_tt *xe_tt; 856 + long tt_pages; 857 + 858 + xe_bo_assert_held(bo); 859 + 860 + if (!tt || !ttm_tt_is_populated(tt)) 861 + return; 862 + 863 + xe_tt = container_of(tt, struct xe_ttm_tt, ttm); 864 + tt_pages = tt->num_pages; 865 + 866 + if (!xe_tt->purgeable && new_state == XE_MADV_PURGEABLE_DONTNEED) { 867 + xe_tt->purgeable = true; 868 + /* Transfer pages from shrinkable to purgeable count */ 869 + xe_shrinker_mod_pages(xe->mem.shrinker, -tt_pages, tt_pages); 870 + } else if (xe_tt->purgeable && new_state == XE_MADV_PURGEABLE_WILLNEED) { 871 + xe_tt->purgeable = false; 872 + /* Transfer pages from purgeable to shrinkable count */ 873 + xe_shrinker_mod_pages(xe->mem.shrinker, tt_pages, -tt_pages); 874 + } 875 + } 876 + 877 + /** 878 + * xe_bo_set_purgeable_state() - Set BO purgeable state with validation 879 + * @bo: Buffer object 880 + * @new_state: New purgeable state 881 + * 882 + * Sets the purgeable state with lockdep assertions and validates state 883 + * transitions. Once a BO is PURGED, it cannot transition to any other state. 884 + * Invalid transitions are caught with xe_assert(). Shrinker page accounting 885 + * is updated automatically. 886 + */ 887 + void xe_bo_set_purgeable_state(struct xe_bo *bo, 888 + enum xe_madv_purgeable_state new_state) 889 + { 890 + struct xe_device *xe = xe_bo_device(bo); 891 + 892 + xe_bo_assert_held(bo); 893 + 894 + /* Validate state is one of the known values */ 895 + xe_assert(xe, new_state == XE_MADV_PURGEABLE_WILLNEED || 896 + new_state == XE_MADV_PURGEABLE_DONTNEED || 897 + new_state == XE_MADV_PURGEABLE_PURGED); 898 + 899 + /* Once purged, always purged - cannot transition out */ 900 + xe_assert(xe, !(bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED && 901 + new_state != XE_MADV_PURGEABLE_PURGED)); 902 + 903 + bo->madv_purgeable = new_state; 904 + xe_bo_set_purgeable_shrinker(bo, new_state); 905 + } 906 + 907 + /** 908 + * xe_ttm_bo_purge() - Purge buffer object backing store 909 + * @ttm_bo: The TTM buffer object to purge 910 + * @ctx: TTM operation context 911 + * 912 + * This function purges the backing store of a BO marked as DONTNEED and 913 + * triggers rebind to invalidate stale GPU mappings. For fault-mode VMs, 914 + * this zaps the PTEs. The next GPU access will trigger a page fault and 915 + * perform NULL rebind (scratch pages or clear PTEs based on VM config). 916 + * 917 + * Return: 0 on success, negative error code on failure 918 + */ 919 + static int xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx) 920 + { 921 + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); 922 + struct ttm_placement place = {}; 923 + int ret; 924 + 925 + xe_bo_assert_held(bo); 926 + 927 + if (!ttm_bo->ttm) 928 + return 0; 929 + 930 + if (!xe_bo_madv_is_dontneed(bo)) 931 + return 0; 932 + 933 + /* 934 + * Use the standard pre-move hook so we share the same cleanup/invalidate 935 + * path as migrations: drop any CPU vmap and schedule the necessary GPU 936 + * unbind/rebind work. 937 + * 938 + * This must be called before ttm_bo_validate() frees the pages. 939 + * May fail in no-wait contexts (fault/shrinker) or if the BO is 940 + * pinned. Keep state unchanged on failure so we don't end up "PURGED" 941 + * with stale mappings. 942 + */ 943 + ret = xe_bo_move_notify(bo, ctx); 944 + if (ret) 945 + return ret; 946 + 947 + ret = ttm_bo_validate(ttm_bo, &place, ctx); 948 + if (ret) 949 + return ret; 950 + 951 + /* Commit the state transition only once invalidation was queued */ 952 + xe_bo_set_purgeable_state(bo, XE_MADV_PURGEABLE_PURGED); 953 + 954 + return 0; 955 + } 956 + 841 957 static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, 842 958 struct ttm_operation_ctx *ctx, 843 959 struct ttm_resource *new_mem, ··· 972 856 bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) && 973 857 ttm && ttm_tt_is_populated(ttm)) ? true : false; 974 858 int ret = 0; 859 + 860 + /* 861 + * Purge only non-shared BOs explicitly marked DONTNEED by userspace. 862 + * The move_notify callback will handle invalidation asynchronously. 863 + */ 864 + if (evict && xe_bo_madv_is_dontneed(bo)) { 865 + ret = xe_ttm_bo_purge(ttm_bo, ctx); 866 + if (ret) 867 + return ret; 868 + 869 + /* Free the unused eviction destination resource */ 870 + ttm_resource_free(ttm_bo, &new_mem); 871 + return 0; 872 + } 975 873 976 874 /* Bo creation path, moving to system or TT. */ 977 875 if ((!old_mem && ttm) && !handle_system_ccs) { ··· 1284 1154 lret = xe_bo_move_notify(xe_bo, ctx); 1285 1155 if (!lret) 1286 1156 lret = xe_bo_shrink_purge(ctx, bo, scanned); 1157 + if (lret > 0 && xe_bo_madv_is_dontneed(xe_bo)) 1158 + xe_bo_set_purgeable_state(xe_bo, 1159 + XE_MADV_PURGEABLE_PURGED); 1287 1160 goto out_unref; 1288 1161 } 1289 1162 ··· 1739 1606 } 1740 1607 } 1741 1608 1742 - static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx) 1743 - { 1744 - struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); 1745 - 1746 - if (ttm_bo->ttm) { 1747 - struct ttm_placement place = {}; 1748 - int ret = ttm_bo_validate(ttm_bo, &place, ctx); 1749 - 1750 - drm_WARN_ON(&xe->drm, ret); 1751 - } 1752 - } 1753 - 1754 1609 static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo) 1755 1610 { 1756 1611 struct ttm_operation_ctx ctx = { ··· 2023 1902 if (!dma_resv_trylock(tbo->base.resv)) 2024 1903 goto out_validation; 2025 1904 1905 + /* 1906 + * Reject CPU faults to purgeable BOs. DONTNEED BOs can be purged 1907 + * at any time, and purged BOs have no backing store. Either case 1908 + * is undefined behavior for CPU access. 1909 + */ 1910 + if (xe_bo_madv_is_dontneed(bo) || xe_bo_is_purged(bo)) { 1911 + ret = VM_FAULT_SIGBUS; 1912 + goto out_unlock; 1913 + } 1914 + 2026 1915 if (xe_ttm_bo_is_imported(tbo)) { 2027 1916 ret = VM_FAULT_SIGBUS; 2028 1917 drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n"); ··· 2123 1992 if (err) 2124 1993 break; 2125 1994 1995 + /* 1996 + * Reject CPU faults to purgeable BOs. DONTNEED BOs can be 1997 + * purged at any time, and purged BOs have no backing store. 1998 + */ 1999 + if (xe_bo_madv_is_dontneed(bo) || xe_bo_is_purged(bo)) { 2000 + err = -EFAULT; 2001 + break; 2002 + } 2003 + 2126 2004 if (xe_ttm_bo_is_imported(tbo)) { 2127 2005 err = -EFAULT; 2128 2006 drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n"); ··· 2209 2069 .access = xe_bo_vm_access, 2210 2070 }; 2211 2071 2072 + static int xe_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) 2073 + { 2074 + struct xe_bo *bo = gem_to_xe_bo(obj); 2075 + int err = 0; 2076 + 2077 + /* 2078 + * Reject mmap of purgeable BOs. DONTNEED BOs can be purged 2079 + * at any time, making CPU access undefined behavior. Purged BOs have 2080 + * no backing store and are permanently invalid. 2081 + */ 2082 + err = xe_bo_lock(bo, true); 2083 + if (err) 2084 + return err; 2085 + 2086 + if (xe_bo_madv_is_dontneed(bo)) 2087 + err = -EBUSY; 2088 + else if (xe_bo_is_purged(bo)) 2089 + err = -EINVAL; 2090 + xe_bo_unlock(bo); 2091 + if (err) 2092 + return err; 2093 + 2094 + return drm_gem_ttm_mmap(obj, vma); 2095 + } 2096 + 2212 2097 static const struct drm_gem_object_funcs xe_gem_object_funcs = { 2213 2098 .free = xe_gem_object_free, 2214 2099 .close = xe_gem_object_close, 2215 - .mmap = drm_gem_ttm_mmap, 2100 + .mmap = xe_gem_object_mmap, 2216 2101 .export = xe_gem_prime_export, 2217 2102 .vm_ops = &xe_gem_vm_ops, 2218 2103 }; ··· 2362 2197 INIT_LIST_HEAD(&bo->client_link); 2363 2198 #endif 2364 2199 INIT_LIST_HEAD(&bo->vram_userfault_link); 2200 + 2201 + /* Initialize purge advisory state */ 2202 + bo->madv_purgeable = XE_MADV_PURGEABLE_WILLNEED; 2365 2203 2366 2204 drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size); 2367 2205
+58
drivers/gpu/drm/xe/xe_bo.h
··· 87 87 88 88 #define XE_PCI_BARRIER_MMAP_OFFSET (0x50 << XE_PTE_SHIFT) 89 89 90 + /** 91 + * enum xe_madv_purgeable_state - Buffer object purgeable state enumeration 92 + * 93 + * This enum defines the possible purgeable states for a buffer object, 94 + * allowing userspace to provide memory usage hints to the kernel for 95 + * better memory management under pressure. 96 + * 97 + * @XE_MADV_PURGEABLE_WILLNEED: The buffer object is needed and should not be purged. 98 + * This is the default state. 99 + * @XE_MADV_PURGEABLE_DONTNEED: The buffer object is not currently needed and can be 100 + * purged by the kernel under memory pressure. 101 + * @XE_MADV_PURGEABLE_PURGED: The buffer object has been purged by the kernel. 102 + * 103 + * Accessing a purged buffer will result in an error. Per i915 semantics, 104 + * once purged, a BO remains permanently invalid and must be destroyed and recreated. 105 + */ 106 + enum xe_madv_purgeable_state { 107 + XE_MADV_PURGEABLE_WILLNEED, 108 + XE_MADV_PURGEABLE_DONTNEED, 109 + XE_MADV_PURGEABLE_PURGED, 110 + }; 111 + 90 112 struct sg_table; 91 113 92 114 struct xe_bo *xe_bo_alloc(void); ··· 236 214 { 237 215 return bo->pxp_key_instance; 238 216 } 217 + 218 + /** 219 + * xe_bo_is_purged() - Check if buffer object has been purged 220 + * @bo: The buffer object to check 221 + * 222 + * Checks if the buffer object's backing store has been discarded by the 223 + * kernel due to memory pressure after being marked as purgeable (DONTNEED). 224 + * Once purged, the BO cannot be restored and any attempt to use it will fail. 225 + * 226 + * Context: Caller must hold the BO's dma-resv lock 227 + * Return: true if the BO has been purged, false otherwise 228 + */ 229 + static inline bool xe_bo_is_purged(struct xe_bo *bo) 230 + { 231 + xe_bo_assert_held(bo); 232 + return bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED; 233 + } 234 + 235 + /** 236 + * xe_bo_madv_is_dontneed() - Check if BO is marked as DONTNEED 237 + * @bo: The buffer object to check 238 + * 239 + * Checks if userspace has marked this BO as DONTNEED (i.e., its contents 240 + * are not currently needed and can be discarded under memory pressure). 241 + * This is used internally to decide whether a BO is eligible for purging. 242 + * 243 + * Context: Caller must hold the BO's dma-resv lock 244 + * Return: true if the BO is marked DONTNEED, false otherwise 245 + */ 246 + static inline bool xe_bo_madv_is_dontneed(struct xe_bo *bo) 247 + { 248 + xe_bo_assert_held(bo); 249 + return bo->madv_purgeable == XE_MADV_PURGEABLE_DONTNEED; 250 + } 251 + 252 + void xe_bo_set_purgeable_state(struct xe_bo *bo, enum xe_madv_purgeable_state new_state); 239 253 240 254 static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo) 241 255 {
+6
drivers/gpu/drm/xe/xe_bo_types.h
··· 108 108 * from default 109 109 */ 110 110 u64 min_align; 111 + 112 + /** 113 + * @madv_purgeable: user space advise on BO purgeability, protected 114 + * by BO's dma-resv lock. 115 + */ 116 + u32 madv_purgeable; 111 117 }; 112 118 113 119 #endif
-3
drivers/gpu/drm/xe/xe_device.c
··· 390 390 } 391 391 392 392 static struct drm_driver driver = { 393 - /* Don't use MTRRs here; the Xserver or userspace app should 394 - * deal with them for Intel hardware. 395 - */ 396 393 .driver_features = 397 394 DRIVER_GEM | 398 395 DRIVER_RENDER | DRIVER_SYNCOBJ |
+24
drivers/gpu/drm/xe/xe_dma_buf.c
··· 223 223 if (bo->vm) 224 224 return ERR_PTR(-EPERM); 225 225 226 + /* 227 + * Reject exporting purgeable BOs. DONTNEED BOs can be purged 228 + * at any time, making the exported dma-buf unusable. Purged BOs 229 + * have no backing store and are permanently invalid. 230 + */ 231 + ret = xe_bo_lock(bo, true); 232 + if (ret) 233 + return ERR_PTR(ret); 234 + 235 + if (xe_bo_madv_is_dontneed(bo)) { 236 + ret = -EBUSY; 237 + goto out_unlock; 238 + } 239 + 240 + if (xe_bo_is_purged(bo)) { 241 + ret = -EINVAL; 242 + goto out_unlock; 243 + } 244 + xe_bo_unlock(bo); 245 + 226 246 ret = ttm_bo_setup_export(&bo->ttm, &ctx); 227 247 if (ret) 228 248 return ERR_PTR(ret); ··· 252 232 buf->ops = &xe_dmabuf_ops; 253 233 254 234 return buf; 235 + 236 + out_unlock: 237 + xe_bo_unlock(bo); 238 + return ERR_PTR(ret); 255 239 } 256 240 257 241 static struct drm_gem_object *
+2 -7
drivers/gpu/drm/xe/xe_ggtt.c
··· 481 481 xe_ggtt_clear(ggtt, xe_ggtt_node_addr(node), xe_ggtt_node_size(node)); 482 482 drm_mm_remove_node(&node->base); 483 483 node->base.size = 0; 484 + if (bound && node->invalidate_on_remove) 485 + xe_ggtt_invalidate(ggtt); 484 486 mutex_unlock(&ggtt->lock); 485 487 486 - if (!bound) 487 - goto free_node; 488 - 489 - if (node->invalidate_on_remove) 490 - xe_ggtt_invalidate(ggtt); 491 - 492 - free_node: 493 488 ggtt_node_fini(node); 494 489 } 495 490
+3 -3
drivers/gpu/drm/xe/xe_gt_idle_types.h
··· 3 3 * Copyright © 2023 Intel Corporation 4 4 */ 5 5 6 - #ifndef _XE_GT_IDLE_SYSFS_TYPES_H_ 7 - #define _XE_GT_IDLE_SYSFS_TYPES_H_ 6 + #ifndef _XE_GT_IDLE_TYPES_H_ 7 + #define _XE_GT_IDLE_TYPES_H_ 8 8 9 9 #include <linux/spinlock.h> 10 10 #include <linux/types.h> ··· 40 40 u64 (*idle_residency)(struct xe_guc_pc *pc); 41 41 }; 42 42 43 - #endif /* _XE_GT_IDLE_SYSFS_TYPES_H_ */ 43 + #endif /* _XE_GT_IDLE_TYPES_H_ */
+2 -2
drivers/gpu/drm/xe/xe_guc_exec_queue_types.h
··· 3 3 * Copyright © 2022 Intel Corporation 4 4 */ 5 5 6 - #ifndef _XE_GUC_ENGINE_TYPES_H_ 7 - #define _XE_GUC_ENGINE_TYPES_H_ 6 + #ifndef _XE_GUC_EXEC_QUEUE_TYPES_H_ 7 + #define _XE_GUC_EXEC_QUEUE_TYPES_H_ 8 8 9 9 #include <linux/spinlock.h> 10 10 #include <linux/workqueue.h>
+3 -3
drivers/gpu/drm/xe/xe_heci_gsc.h
··· 2 2 /* 3 3 * Copyright(c) 2023, Intel Corporation. All rights reserved. 4 4 */ 5 - #ifndef __XE_HECI_GSC_DEV_H__ 6 - #define __XE_HECI_GSC_DEV_H__ 5 + #ifndef _XE_HECI_GSC_H_ 6 + #define _XE_HECI_GSC_H_ 7 7 8 8 #include <linux/types.h> 9 9 ··· 37 37 void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir); 38 38 void xe_heci_csc_irq_handler(struct xe_device *xe, u32 iir); 39 39 40 - #endif /* __XE_HECI_GSC_DEV_H__ */ 40 + #endif /* _XE_HECI_GSC_H_ */
+2 -2
drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
··· 3 3 * Copyright © 2023 Intel Corporation 4 4 */ 5 5 6 - #ifndef _XE_ENGINE_CLASS_SYSFS_H_ 7 - #define _XE_ENGINE_CLASS_SYSFS_H_ 6 + #ifndef _XE_HW_ENGINE_CLASS_SYSFS_H_ 7 + #define _XE_HW_ENGINE_CLASS_SYSFS_H_ 8 8 9 9 #include <linux/kobject.h> 10 10
+2 -2
drivers/gpu/drm/xe/xe_late_bind_fw_types.h
··· 3 3 * Copyright © 2025 Intel Corporation 4 4 */ 5 5 6 - #ifndef _XE_LATE_BIND_TYPES_H_ 7 - #define _XE_LATE_BIND_TYPES_H_ 6 + #ifndef _XE_LATE_BIND_FW_TYPES_H_ 7 + #define _XE_LATE_BIND_FW_TYPES_H_ 8 8 9 9 #include <linux/iosys-map.h> 10 10 #include <linux/mutex.h>
+14 -1
drivers/gpu/drm/xe/xe_pagefault.c
··· 59 59 if (!bo) 60 60 return 0; 61 61 62 + /* 63 + * Skip validate/migrate for DONTNEED/purged BOs - repopulating 64 + * their pages would prevent the shrinker from reclaiming them. 65 + * For non-scratch VMs there is no safe fallback so fail the fault. 66 + * For scratch VMs let xe_vma_rebind() run normally; it will install 67 + * scratch PTEs so the GPU gets safe zero reads instead of faulting. 68 + */ 69 + if (unlikely(xe_bo_madv_is_dontneed(bo) || xe_bo_is_purged(bo))) { 70 + if (!xe_vm_has_scratch(vm)) 71 + return -EACCES; 72 + return 0; 73 + } 74 + 62 75 return need_vram_move ? xe_bo_migrate(bo, vram->placement, NULL, exec) : 63 76 xe_bo_validate(bo, vm, true, exec); 64 77 } ··· 158 145 159 146 down_read(&xe->usm.lock); 160 147 vm = xa_load(&xe->usm.asid_to_vm, asid); 161 - if (vm && xe_vm_in_fault_mode(vm)) 148 + if (vm && (xe_vm_in_fault_mode(vm) || xe_vm_has_scratch(vm))) 162 149 xe_vm_get(vm); 163 150 else 164 151 vm = ERR_PTR(-EINVAL);
+2 -2
drivers/gpu/drm/xe/xe_platform_types.h
··· 3 3 * Copyright © 2022 Intel Corporation 4 4 */ 5 5 6 - #ifndef _XE_PLATFORM_INFO_TYPES_H_ 7 - #define _XE_PLATFORM_INFO_TYPES_H_ 6 + #ifndef _XE_PLATFORM_TYPES_H_ 7 + #define _XE_PLATFORM_TYPES_H_ 8 8 9 9 /* 10 10 * Keep this in graphics version based order and chronological order within a
+33 -7
drivers/gpu/drm/xe/xe_pt.c
··· 531 531 /* Is this a leaf entry ?*/ 532 532 if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { 533 533 struct xe_res_cursor *curs = xe_walk->curs; 534 - bool is_null = xe_vma_is_null(xe_walk->vma); 535 - bool is_vram = is_null ? false : xe_res_is_vram(curs); 534 + struct xe_bo *bo = xe_vma_bo(xe_walk->vma); 535 + bool is_null_or_purged = xe_vma_is_null(xe_walk->vma) || 536 + (bo && xe_bo_is_purged(bo)); 537 + bool is_vram = is_null_or_purged ? false : xe_res_is_vram(curs); 536 538 537 539 XE_WARN_ON(xe_walk->va_curs_start != addr); 538 540 539 541 if (xe_walk->clear_pt) { 540 542 pte = 0; 541 543 } else { 542 - pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : 544 + /* 545 + * For purged BOs, treat like null VMAs - pass address 0. 546 + * The pte_encode_vma will set XE_PTE_NULL flag for scratch mapping. 547 + */ 548 + pte = vm->pt_ops->pte_encode_vma(is_null_or_purged ? 0 : 543 549 xe_res_dma(curs) + 544 550 xe_walk->dma_offset, 545 551 xe_walk->vma, 546 552 pat_index, level); 547 - if (!is_null) 553 + if (!is_null_or_purged) 548 554 pte |= is_vram ? xe_walk->default_vram_pte : 549 555 xe_walk->default_system_pte; 550 556 ··· 574 568 if (unlikely(ret)) 575 569 return ret; 576 570 577 - if (!is_null && !xe_walk->clear_pt) 571 + if (!is_null_or_purged && !xe_walk->clear_pt) 578 572 xe_res_next(curs, next - addr); 579 573 xe_walk->va_curs_start = next; 580 574 xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level); ··· 727 721 }; 728 722 struct xe_pt *pt = vm->pt_root[tile->id]; 729 723 int ret; 724 + bool is_purged = false; 725 + 726 + /* 727 + * Check if BO is purged: 728 + * - Scratch VMs: Use scratch PTEs (XE_PTE_NULL) for safe zero reads 729 + * - Non-scratch VMs: Clear PTEs to zero (non-present) to avoid mapping to phys addr 0 730 + * 731 + * For non-scratch VMs, we force clear_pt=true so leaf PTEs become completely 732 + * zero instead of creating a PRESENT mapping to physical address 0. 733 + */ 734 + if (bo && xe_bo_is_purged(bo)) { 735 + is_purged = true; 736 + 737 + /* 738 + * For non-scratch VMs, a NULL rebind should use zero PTEs 739 + * (non-present), not a present PTE to phys 0. 740 + */ 741 + if (!xe_vm_has_scratch(vm)) 742 + xe_walk.clear_pt = true; 743 + } 730 744 731 745 if (range) { 732 746 /* Move this entire thing to xe_svm.c? */ ··· 782 756 } 783 757 784 758 xe_walk.default_vram_pte |= XE_PPGTT_PTE_DM; 785 - xe_walk.dma_offset = bo ? vram_region_gpu_offset(bo->ttm.resource) : 0; 759 + xe_walk.dma_offset = (bo && !is_purged) ? vram_region_gpu_offset(bo->ttm.resource) : 0; 786 760 if (!range) 787 761 xe_bo_assert_held(bo); 788 762 789 - if (!xe_vma_is_null(vma) && !range) { 763 + if (!xe_vma_is_null(vma) && !range && !is_purged) { 790 764 if (xe_vma_is_userptr(vma)) 791 765 xe_res_first_dma(to_userptr_vma(vma)->userptr.pages.dma_addr, 0, 792 766 xe_vma_size(vma), &curs);
+2
drivers/gpu/drm/xe/xe_query.c
··· 342 342 DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY; 343 343 config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= 344 344 DRM_XE_QUERY_CONFIG_FLAG_HAS_DISABLE_STATE_CACHE_PERF_FIX; 345 + config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= 346 + DRM_XE_QUERY_CONFIG_FLAG_HAS_PURGING_SUPPORT; 345 347 config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT] = 346 348 xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; 347 349 config->info[DRM_XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits;
+1
drivers/gpu/drm/xe/xe_svm.c
··· 322 322 .preferred_loc.migration_policy = DRM_XE_MIGRATE_ALL_PAGES, 323 323 .pat_index = vma->attr.default_pat_index, 324 324 .atomic_access = DRM_XE_ATOMIC_UNDEFINED, 325 + .purgeable_state = XE_MADV_PURGEABLE_WILLNEED, 325 326 }; 326 327 327 328 xe_vma_mem_attr_copy(&vma->attr, &default_attr);
+2 -2
drivers/gpu/drm/xe/xe_tile_printk.h
··· 3 3 * Copyright © 2025 Intel Corporation 4 4 */ 5 5 6 - #ifndef _xe_tile_printk_H_ 7 - #define _xe_tile_printk_H_ 6 + #ifndef _XE_TILE_PRINTK_H_ 7 + #define _XE_TILE_PRINTK_H_ 8 8 9 9 #include "xe_printk.h" 10 10
+95 -17
drivers/gpu/drm/xe/xe_vm.c
··· 40 40 #include "xe_tile.h" 41 41 #include "xe_tlb_inval.h" 42 42 #include "xe_trace_bo.h" 43 + #include "xe_vm_madvise.h" 43 44 #include "xe_wa.h" 44 45 45 46 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) ··· 328 327 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 329 328 { 330 329 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); 330 + struct xe_bo *bo = gem_to_xe_bo(vm_bo->obj); 331 331 struct drm_gpuva *gpuva; 332 332 int ret; 333 333 ··· 337 335 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, 338 336 &vm->rebind_list); 339 337 338 + /* Skip re-populating purged BOs, rebind maps scratch pages. */ 339 + if (xe_bo_is_purged(bo)) { 340 + vm_bo->evicted = false; 341 + return 0; 342 + } 343 + 340 344 if (!try_wait_for_completion(&vm->xe->pm_block)) 341 345 return -EAGAIN; 342 346 343 - ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec); 347 + ret = xe_bo_validate(bo, vm, false, exec); 344 348 if (ret) 345 349 return ret; 346 350 ··· 1155 1147 static void xe_vma_destroy_late(struct xe_vma *vma) 1156 1148 { 1157 1149 struct xe_vm *vm = xe_vma_vm(vma); 1150 + struct xe_bo *bo = xe_vma_bo(vma); 1158 1151 1159 1152 if (vma->ufence) { 1160 1153 xe_sync_ufence_put(vma->ufence); ··· 1170 1161 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { 1171 1162 xe_vm_put(vm); 1172 1163 } else { 1173 - xe_bo_put(xe_vma_bo(vma)); 1164 + xe_bo_put(bo); 1174 1165 } 1175 1166 1176 1167 xe_vma_free(vma); ··· 1196 1187 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 1197 1188 { 1198 1189 struct xe_vm *vm = xe_vma_vm(vma); 1190 + struct xe_bo *bo = xe_vma_bo(vma); 1199 1191 1200 1192 lockdep_assert_held_write(&vm->lock); 1201 1193 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); ··· 1205 1195 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1206 1196 xe_userptr_destroy(to_userptr_vma(vma)); 1207 1197 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { 1208 - xe_bo_assert_held(xe_vma_bo(vma)); 1198 + xe_bo_assert_held(bo); 1209 1199 1210 1200 drm_gpuva_unlink(&vma->gpuva); 1201 + xe_bo_recompute_purgeable_state(bo); 1211 1202 } 1212 1203 1213 1204 xe_vm_assert_held(vm); ··· 1438 1427 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1439 1428 u16 pat_index, u32 pt_level) 1440 1429 { 1430 + struct xe_bo *bo = xe_vma_bo(vma); 1431 + struct xe_vm *vm = xe_vma_vm(vma); 1432 + 1441 1433 pte |= XE_PAGE_PRESENT; 1442 1434 1443 1435 if (likely(!xe_vma_read_only(vma))) ··· 1449 1435 pte |= pte_encode_pat_index(pat_index, pt_level); 1450 1436 pte |= pte_encode_ps(pt_level); 1451 1437 1452 - if (unlikely(xe_vma_is_null(vma))) 1438 + /* 1439 + * NULL PTEs redirect to scratch page (return zeros on read). 1440 + * Set for: 1) explicit null VMAs, 2) purged BOs on scratch VMs. 1441 + * Never set NULL flag without scratch page - causes undefined behavior. 1442 + */ 1443 + if (unlikely(xe_vma_is_null(vma) || 1444 + (bo && xe_bo_is_purged(bo) && xe_vm_has_scratch(vm)))) 1453 1445 pte |= XE_PTE_NULL; 1454 1446 1455 1447 return pte; ··· 2771 2751 .atomic_access = DRM_XE_ATOMIC_UNDEFINED, 2772 2752 .default_pat_index = op->map.pat_index, 2773 2753 .pat_index = op->map.pat_index, 2754 + .purgeable_state = XE_MADV_PURGEABLE_WILLNEED, 2774 2755 }; 2775 2756 2776 2757 flags |= op->map.vma_flags & XE_VMA_CREATE_MASK; ··· 3011 2990 } 3012 2991 } 3013 2992 2993 + /** 2994 + * struct xe_vma_lock_and_validate_flags - Flags for vma_lock_and_validate() 2995 + * @res_evict: Allow evicting resources during validation 2996 + * @validate: Perform BO validation 2997 + * @request_decompress: Request BO decompression 2998 + * @check_purged: Reject operation if BO is purged 2999 + */ 3000 + struct xe_vma_lock_and_validate_flags { 3001 + u32 res_evict : 1; 3002 + u32 validate : 1; 3003 + u32 request_decompress : 1; 3004 + u32 check_purged : 1; 3005 + }; 3006 + 3014 3007 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, 3015 - bool res_evict, bool validate, bool request_decompress) 3008 + struct xe_vma_lock_and_validate_flags flags) 3016 3009 { 3017 3010 struct xe_bo *bo = xe_vma_bo(vma); 3018 3011 struct xe_vm *vm = xe_vma_vm(vma); ··· 3035 3000 if (bo) { 3036 3001 if (!bo->vm) 3037 3002 err = drm_exec_lock_obj(exec, &bo->ttm.base); 3038 - if (!err && validate) 3003 + 3004 + /* Reject new mappings to DONTNEED/purged BOs; allow cleanup operations */ 3005 + if (!err && flags.check_purged) { 3006 + if (xe_bo_madv_is_dontneed(bo)) 3007 + err = -EBUSY; /* BO marked purgeable */ 3008 + else if (xe_bo_is_purged(bo)) 3009 + err = -EINVAL; /* BO already purged */ 3010 + } 3011 + 3012 + if (!err && flags.validate) 3039 3013 err = xe_bo_validate(bo, vm, 3040 3014 xe_vm_allow_vm_eviction(vm) && 3041 - res_evict, exec); 3015 + flags.res_evict, exec); 3042 3016 3043 3017 if (err) 3044 3018 return err; 3045 3019 3046 - if (request_decompress) 3020 + if (flags.request_decompress) 3047 3021 err = xe_bo_decompress(bo); 3048 3022 } 3049 3023 ··· 3146 3102 case DRM_GPUVA_OP_MAP: 3147 3103 if (!op->map.invalidate_on_bind) 3148 3104 err = vma_lock_and_validate(exec, op->map.vma, 3149 - res_evict, 3150 - !xe_vm_in_fault_mode(vm) || 3151 - op->map.immediate, 3152 - op->map.request_decompress); 3105 + (struct xe_vma_lock_and_validate_flags) { 3106 + .res_evict = res_evict, 3107 + .validate = !xe_vm_in_fault_mode(vm) || 3108 + op->map.immediate, 3109 + .request_decompress = 3110 + op->map.request_decompress, 3111 + .check_purged = true, 3112 + }); 3153 3113 break; 3154 3114 case DRM_GPUVA_OP_REMAP: 3155 3115 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); ··· 3162 3114 3163 3115 err = vma_lock_and_validate(exec, 3164 3116 gpuva_to_vma(op->base.remap.unmap->va), 3165 - res_evict, false, false); 3117 + (struct xe_vma_lock_and_validate_flags) { 3118 + .res_evict = res_evict, 3119 + .validate = false, 3120 + .request_decompress = false, 3121 + .check_purged = false, 3122 + }); 3166 3123 if (!err && op->remap.prev) 3167 3124 err = vma_lock_and_validate(exec, op->remap.prev, 3168 - res_evict, true, false); 3125 + (struct xe_vma_lock_and_validate_flags) { 3126 + .res_evict = res_evict, 3127 + .validate = true, 3128 + .request_decompress = false, 3129 + .check_purged = true, 3130 + }); 3169 3131 if (!err && op->remap.next) 3170 3132 err = vma_lock_and_validate(exec, op->remap.next, 3171 - res_evict, true, false); 3133 + (struct xe_vma_lock_and_validate_flags) { 3134 + .res_evict = res_evict, 3135 + .validate = true, 3136 + .request_decompress = false, 3137 + .check_purged = true, 3138 + }); 3172 3139 break; 3173 3140 case DRM_GPUVA_OP_UNMAP: 3174 3141 err = check_ufence(gpuva_to_vma(op->base.unmap.va)); ··· 3192 3129 3193 3130 err = vma_lock_and_validate(exec, 3194 3131 gpuva_to_vma(op->base.unmap.va), 3195 - res_evict, false, false); 3132 + (struct xe_vma_lock_and_validate_flags) { 3133 + .res_evict = res_evict, 3134 + .validate = false, 3135 + .request_decompress = false, 3136 + .check_purged = false, 3137 + }); 3196 3138 break; 3197 3139 case DRM_GPUVA_OP_PREFETCH: 3198 3140 { ··· 3210 3142 region <= ARRAY_SIZE(region_to_mem_type)); 3211 3143 } 3212 3144 3145 + /* 3146 + * Prefetch attempts to migrate BO's backing store without 3147 + * repopulating it first. Purged BOs have no backing store 3148 + * to migrate, so reject the operation. 3149 + */ 3213 3150 err = vma_lock_and_validate(exec, 3214 3151 gpuva_to_vma(op->base.prefetch.va), 3215 - res_evict, false, false); 3152 + (struct xe_vma_lock_and_validate_flags) { 3153 + .res_evict = res_evict, 3154 + .validate = false, 3155 + .request_decompress = false, 3156 + .check_purged = true, 3157 + }); 3216 3158 if (!err && !xe_vma_has_no_bo(vma)) 3217 3159 err = xe_bo_migrate(xe_vma_bo(vma), 3218 3160 region_to_mem_type[region],
+299 -4
drivers/gpu/drm/xe/xe_vm_madvise.c
··· 13 13 #include "xe_pt.h" 14 14 #include "xe_svm.h" 15 15 #include "xe_tlb_inval.h" 16 + #include "xe_vm.h" 16 17 17 18 struct xe_vmas_in_madvise_range { 18 19 u64 addr; ··· 27 26 /** 28 27 * struct xe_madvise_details - Argument to madvise_funcs 29 28 * @dpagemap: Reference-counted pointer to a struct drm_pagemap. 29 + * @has_purged_bo: Track if any BO was purged (for purgeable state) 30 + * @retained_ptr: User pointer for retained value (for purgeable state) 30 31 * 31 32 * The madvise IOCTL handler may, in addition to the user-space 32 33 * args, have additional info to pass into the madvise_func that ··· 37 34 */ 38 35 struct xe_madvise_details { 39 36 struct drm_pagemap *dpagemap; 37 + bool has_purged_bo; 38 + u64 retained_ptr; 40 39 }; 41 40 42 41 static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range) ··· 185 180 } 186 181 } 187 182 183 + /** 184 + * xe_bo_is_dmabuf_shared() - Check if BO is shared via dma-buf 185 + * @bo: Buffer object 186 + * 187 + * Prevent marking imported or exported dma-bufs as purgeable. 188 + * For imported BOs, Xe doesn't own the backing store and cannot 189 + * safely reclaim pages (exporter or other devices may still be 190 + * using them). For exported BOs, external devices may have active 191 + * mappings we cannot track. 192 + * 193 + * Return: true if BO is imported or exported, false otherwise 194 + */ 195 + static bool xe_bo_is_dmabuf_shared(struct xe_bo *bo) 196 + { 197 + struct drm_gem_object *obj = &bo->ttm.base; 198 + 199 + /* Imported: exporter owns backing store */ 200 + if (drm_gem_is_imported(obj)) 201 + return true; 202 + 203 + /* Exported: external devices may be accessing */ 204 + if (obj->dma_buf) 205 + return true; 206 + 207 + return false; 208 + } 209 + 210 + /** 211 + * enum xe_bo_vmas_purge_state - VMA purgeable state aggregation 212 + * 213 + * Distinguishes whether a BO's VMAs are all DONTNEED, have at least 214 + * one WILLNEED, or have no VMAs at all. 215 + * 216 + * Enum values align with XE_MADV_PURGEABLE_* states for consistency. 217 + */ 218 + enum xe_bo_vmas_purge_state { 219 + /** @XE_BO_VMAS_STATE_WILLNEED: At least one VMA is WILLNEED */ 220 + XE_BO_VMAS_STATE_WILLNEED = 0, 221 + /** @XE_BO_VMAS_STATE_DONTNEED: All VMAs are DONTNEED */ 222 + XE_BO_VMAS_STATE_DONTNEED = 1, 223 + /** @XE_BO_VMAS_STATE_NO_VMAS: BO has no VMAs */ 224 + XE_BO_VMAS_STATE_NO_VMAS = 2, 225 + }; 226 + 227 + /* 228 + * xe_bo_recompute_purgeable_state() casts between xe_bo_vmas_purge_state and 229 + * xe_madv_purgeable_state. Enforce that WILLNEED=0 and DONTNEED=1 match across 230 + * both enums so the single-line cast is always valid. 231 + */ 232 + static_assert(XE_BO_VMAS_STATE_WILLNEED == (int)XE_MADV_PURGEABLE_WILLNEED, 233 + "VMA purge state WILLNEED must equal madv purgeable WILLNEED"); 234 + static_assert(XE_BO_VMAS_STATE_DONTNEED == (int)XE_MADV_PURGEABLE_DONTNEED, 235 + "VMA purge state DONTNEED must equal madv purgeable DONTNEED"); 236 + 237 + /** 238 + * xe_bo_all_vmas_dontneed() - Determine BO VMA purgeable state 239 + * @bo: Buffer object 240 + * 241 + * Check all VMAs across all VMs to determine aggregate purgeable state. 242 + * Shared BOs require unanimous DONTNEED state from all mappings. 243 + * 244 + * Caller must hold BO dma-resv lock. 245 + * 246 + * Return: XE_BO_VMAS_STATE_DONTNEED if all VMAs are DONTNEED, 247 + * XE_BO_VMAS_STATE_WILLNEED if at least one VMA is not DONTNEED, 248 + * XE_BO_VMAS_STATE_NO_VMAS if BO has no VMAs 249 + */ 250 + static enum xe_bo_vmas_purge_state xe_bo_all_vmas_dontneed(struct xe_bo *bo) 251 + { 252 + struct drm_gpuvm_bo *vm_bo; 253 + struct drm_gpuva *gpuva; 254 + struct drm_gem_object *obj = &bo->ttm.base; 255 + bool has_vmas = false; 256 + 257 + xe_bo_assert_held(bo); 258 + 259 + /* Shared dma-bufs cannot be purgeable */ 260 + if (xe_bo_is_dmabuf_shared(bo)) 261 + return XE_BO_VMAS_STATE_WILLNEED; 262 + 263 + drm_gem_for_each_gpuvm_bo(vm_bo, obj) { 264 + drm_gpuvm_bo_for_each_va(gpuva, vm_bo) { 265 + struct xe_vma *vma = gpuva_to_vma(gpuva); 266 + 267 + has_vmas = true; 268 + 269 + /* Any non-DONTNEED VMA prevents purging */ 270 + if (vma->attr.purgeable_state != XE_MADV_PURGEABLE_DONTNEED) 271 + return XE_BO_VMAS_STATE_WILLNEED; 272 + } 273 + } 274 + 275 + /* 276 + * No VMAs => preserve existing BO purgeable state. 277 + * Avoids incorrectly flipping DONTNEED -> WILLNEED when last VMA unmapped. 278 + */ 279 + if (!has_vmas) 280 + return XE_BO_VMAS_STATE_NO_VMAS; 281 + 282 + return XE_BO_VMAS_STATE_DONTNEED; 283 + } 284 + 285 + /** 286 + * xe_bo_recompute_purgeable_state() - Recompute BO purgeable state from VMAs 287 + * @bo: Buffer object 288 + * 289 + * Walk all VMAs to determine if BO should be purgeable or not. 290 + * Shared BOs require unanimous DONTNEED state from all mappings. 291 + * If the BO has no VMAs the existing state is preserved. 292 + * 293 + * Locking: Caller must hold BO dma-resv lock. When iterating GPUVM lists, 294 + * VM lock must also be held (write) to prevent concurrent VMA modifications. 295 + * This is satisfied at both call sites: 296 + * - xe_vma_destroy(): holds vm->lock write 297 + * - madvise_purgeable(): holds vm->lock write (from madvise ioctl path) 298 + * 299 + * Return: nothing 300 + */ 301 + void xe_bo_recompute_purgeable_state(struct xe_bo *bo) 302 + { 303 + enum xe_bo_vmas_purge_state vma_state; 304 + 305 + if (!bo) 306 + return; 307 + 308 + xe_bo_assert_held(bo); 309 + 310 + /* 311 + * Once purged, always purged. Cannot transition back to WILLNEED. 312 + * This matches i915 semantics where purged BOs are permanently invalid. 313 + */ 314 + if (bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED) 315 + return; 316 + 317 + vma_state = xe_bo_all_vmas_dontneed(bo); 318 + 319 + if (vma_state != (enum xe_bo_vmas_purge_state)bo->madv_purgeable && 320 + vma_state != XE_BO_VMAS_STATE_NO_VMAS) 321 + xe_bo_set_purgeable_state(bo, (enum xe_madv_purgeable_state)vma_state); 322 + } 323 + 324 + /** 325 + * madvise_purgeable - Handle purgeable buffer object advice 326 + * @xe: XE device 327 + * @vm: VM 328 + * @vmas: Array of VMAs 329 + * @num_vmas: Number of VMAs 330 + * @op: Madvise operation 331 + * @details: Madvise details for return values 332 + * 333 + * Handles DONTNEED/WILLNEED/PURGED states. Tracks if any BO was purged 334 + * in details->has_purged_bo for later copy to userspace. 335 + */ 336 + static void madvise_purgeable(struct xe_device *xe, struct xe_vm *vm, 337 + struct xe_vma **vmas, int num_vmas, 338 + struct drm_xe_madvise *op, 339 + struct xe_madvise_details *details) 340 + { 341 + int i; 342 + 343 + xe_assert(vm->xe, op->type == DRM_XE_VMA_ATTR_PURGEABLE_STATE); 344 + 345 + for (i = 0; i < num_vmas; i++) { 346 + struct xe_bo *bo = xe_vma_bo(vmas[i]); 347 + 348 + if (!bo) { 349 + /* Purgeable state applies to BOs only, skip non-BO VMAs */ 350 + vmas[i]->skip_invalidation = true; 351 + continue; 352 + } 353 + 354 + /* BO must be locked before modifying madv state */ 355 + xe_bo_assert_held(bo); 356 + 357 + /* Skip shared dma-bufs - no PTEs to zap */ 358 + if (xe_bo_is_dmabuf_shared(bo)) { 359 + vmas[i]->skip_invalidation = true; 360 + continue; 361 + } 362 + 363 + /* 364 + * Once purged, always purged. Cannot transition back to WILLNEED. 365 + * This matches i915 semantics where purged BOs are permanently invalid. 366 + */ 367 + if (xe_bo_is_purged(bo)) { 368 + details->has_purged_bo = true; 369 + vmas[i]->skip_invalidation = true; 370 + continue; 371 + } 372 + 373 + switch (op->purge_state_val.val) { 374 + case DRM_XE_VMA_PURGEABLE_STATE_WILLNEED: 375 + vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_WILLNEED; 376 + vmas[i]->skip_invalidation = true; 377 + 378 + xe_bo_recompute_purgeable_state(bo); 379 + break; 380 + case DRM_XE_VMA_PURGEABLE_STATE_DONTNEED: 381 + vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_DONTNEED; 382 + /* 383 + * Don't zap PTEs at DONTNEED time -- pages are still 384 + * alive. The zap happens in xe_bo_move_notify() right 385 + * before the shrinker frees them. 386 + */ 387 + vmas[i]->skip_invalidation = true; 388 + 389 + xe_bo_recompute_purgeable_state(bo); 390 + break; 391 + default: 392 + /* Should never hit - values validated in madvise_args_are_sane() */ 393 + xe_assert(vm->xe, 0); 394 + return; 395 + } 396 + } 397 + } 398 + 188 399 typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm, 189 400 struct xe_vma **vmas, int num_vmas, 190 401 struct drm_xe_madvise *op, ··· 410 189 [DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc, 411 190 [DRM_XE_MEM_RANGE_ATTR_ATOMIC] = madvise_atomic, 412 191 [DRM_XE_MEM_RANGE_ATTR_PAT] = madvise_pat_index, 192 + [DRM_XE_VMA_ATTR_PURGEABLE_STATE] = madvise_purgeable, 413 193 }; 414 194 415 195 static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end) ··· 541 319 return false; 542 320 break; 543 321 } 322 + case DRM_XE_VMA_ATTR_PURGEABLE_STATE: 323 + { 324 + u32 val = args->purge_state_val.val; 325 + 326 + if (XE_IOCTL_DBG(xe, !(val == DRM_XE_VMA_PURGEABLE_STATE_WILLNEED || 327 + val == DRM_XE_VMA_PURGEABLE_STATE_DONTNEED))) 328 + return false; 329 + 330 + if (XE_IOCTL_DBG(xe, args->purge_state_val.pad)) 331 + return false; 332 + 333 + break; 334 + } 544 335 default: 545 336 if (XE_IOCTL_DBG(xe, 1)) 546 337 return false; ··· 571 336 struct xe_device *xe = vm->xe; 572 337 573 338 memset(details, 0, sizeof(*details)); 339 + 340 + /* Store retained pointer for purgeable state */ 341 + if (args->type == DRM_XE_VMA_ATTR_PURGEABLE_STATE) { 342 + details->retained_ptr = args->purge_state_val.retained_ptr; 343 + return 0; 344 + } 574 345 575 346 if (args->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC) { 576 347 int fd = args->preferred_mem_loc.devmem_fd; ··· 604 363 static void xe_madvise_details_fini(struct xe_madvise_details *details) 605 364 { 606 365 drm_pagemap_put(details->dpagemap); 366 + } 367 + 368 + static int xe_madvise_purgeable_retained_to_user(const struct xe_madvise_details *details) 369 + { 370 + u32 retained; 371 + 372 + if (!details->retained_ptr) 373 + return 0; 374 + 375 + retained = !details->has_purged_bo; 376 + 377 + if (put_user(retained, (u32 __user *)u64_to_user_ptr(details->retained_ptr))) 378 + return -EFAULT; 379 + 380 + return 0; 607 381 } 608 382 609 383 static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas, ··· 672 416 struct xe_device *xe = to_xe_device(dev); 673 417 struct xe_file *xef = to_xe_file(file); 674 418 struct drm_xe_madvise *args = data; 675 - struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start, 676 - .range = args->range, }; 419 + struct xe_vmas_in_madvise_range madvise_range = { 420 + /* 421 + * Userspace may pass canonical (sign-extended) addresses. 422 + * Strip the sign extension to get the internal non-canonical 423 + * form used by the GPUVM, matching xe_vm_bind_ioctl() behavior. 424 + */ 425 + .addr = xe_device_uncanonicalize_addr(xe, args->start), 426 + .range = args->range, 427 + }; 677 428 struct xe_madvise_details details; 678 429 u16 pat_index, coh_mode; 679 430 struct xe_vm *vm; 680 431 struct drm_exec exec; 681 432 int err, attr_type; 433 + bool do_retained; 682 434 683 435 vm = xe_vm_lookup(xef, args->vm_id); 684 436 if (XE_IOCTL_DBG(xe, !vm)) ··· 695 431 if (!madvise_args_are_sane(vm->xe, args)) { 696 432 err = -EINVAL; 697 433 goto put_vm; 434 + } 435 + 436 + /* Cache whether we need to write retained, and validate it's initialized to 0 */ 437 + do_retained = args->type == DRM_XE_VMA_ATTR_PURGEABLE_STATE && 438 + args->purge_state_val.retained_ptr; 439 + if (do_retained) { 440 + u32 retained; 441 + u32 __user *retained_ptr; 442 + 443 + retained_ptr = u64_to_user_ptr(args->purge_state_val.retained_ptr); 444 + if (get_user(retained, retained_ptr)) { 445 + err = -EFAULT; 446 + goto put_vm; 447 + } 448 + 449 + if (XE_IOCTL_DBG(xe, retained != 0)) { 450 + err = -EINVAL; 451 + goto put_vm; 452 + } 698 453 } 699 454 700 455 xe_svm_flush(vm); ··· 731 448 if (err) 732 449 goto unlock_vm; 733 450 734 - err = xe_vm_alloc_madvise_vma(vm, args->start, args->range); 451 + err = xe_vm_alloc_madvise_vma(vm, madvise_range.addr, args->range); 735 452 if (err) 736 453 goto madv_fini; 737 454 ··· 793 510 } 794 511 795 512 attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs)); 513 + 514 + /* Ensure the madvise function exists for this type */ 515 + if (!madvise_funcs[attr_type]) { 516 + err = -EINVAL; 517 + goto err_fini; 518 + } 519 + 796 520 madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args, 797 521 &details); 798 522 799 - err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range); 523 + err = xe_vm_invalidate_madvise_range(vm, madvise_range.addr, 524 + madvise_range.addr + args->range); 800 525 801 526 if (madvise_range.has_svm_userptr_vmas) 802 527 xe_svm_notifier_unlock(vm); ··· 819 528 xe_madvise_details_fini(&details); 820 529 unlock_vm: 821 530 up_write(&vm->lock); 531 + 532 + /* Write retained value to user after releasing all locks */ 533 + if (!err && do_retained) 534 + err = xe_madvise_purgeable_retained_to_user(&details); 822 535 put_vm: 823 536 xe_vm_put(vm); 824 537 return err;
+3
drivers/gpu/drm/xe/xe_vm_madvise.h
··· 8 8 9 9 struct drm_device; 10 10 struct drm_file; 11 + struct xe_bo; 11 12 12 13 int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, 13 14 struct drm_file *file); 15 + 16 + void xe_bo_recompute_purgeable_state(struct xe_bo *bo); 14 17 15 18 #endif
+11
drivers/gpu/drm/xe/xe_vm_types.h
··· 96 96 * same as default_pat_index unless overwritten by madvise. 97 97 */ 98 98 u16 pat_index; 99 + 100 + /** 101 + * @purgeable_state: Purgeable hint for this VMA mapping 102 + * 103 + * Per-VMA purgeable state from madvise. Valid states are WILLNEED (0) 104 + * or DONTNEED (1). Shared BOs require all VMAs to be DONTNEED before 105 + * the BO can be purged. PURGED state exists only at BO level. 106 + * 107 + * Protected by BO dma-resv lock. Set via DRM_IOCTL_XE_MADVISE. 108 + */ 109 + u32 purgeable_state; 99 110 }; 100 111 101 112 struct xe_vma {
+69
include/uapi/drm/xe_drm.h
··· 432 432 #define DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR (1 << 2) 433 433 #define DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT (1 << 3) 434 434 #define DRM_XE_QUERY_CONFIG_FLAG_HAS_DISABLE_STATE_CACHE_PERF_FIX (1 << 4) 435 + #define DRM_XE_QUERY_CONFIG_FLAG_HAS_PURGING_SUPPORT (1 << 5) 435 436 #define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT 2 436 437 #define DRM_XE_QUERY_CONFIG_VA_BITS 3 437 438 #define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 4 ··· 2172 2171 * - DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC: Set preferred memory location. 2173 2172 * - DRM_XE_MEM_RANGE_ATTR_ATOMIC: Set atomic access policy. 2174 2173 * - DRM_XE_MEM_RANGE_ATTR_PAT: Set page attribute table index. 2174 + * - DRM_XE_VMA_ATTR_PURGEABLE_STATE: Set purgeable state for BOs. 2175 2175 * 2176 2176 * Example: 2177 2177 * ··· 2205 2203 #define DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC 0 2206 2204 #define DRM_XE_MEM_RANGE_ATTR_ATOMIC 1 2207 2205 #define DRM_XE_MEM_RANGE_ATTR_PAT 2 2206 + #define DRM_XE_VMA_ATTR_PURGEABLE_STATE 3 2208 2207 /** @type: type of attribute */ 2209 2208 __u32 type; 2210 2209 ··· 2296 2293 /** @pat_index.reserved: Reserved */ 2297 2294 __u64 reserved; 2298 2295 } pat_index; 2296 + 2297 + /** 2298 + * @purge_state_val: Purgeable state configuration 2299 + * 2300 + * Used when @type == DRM_XE_VMA_ATTR_PURGEABLE_STATE. 2301 + * 2302 + * Configures the purgeable state of buffer objects in the specified 2303 + * virtual address range. This allows applications to hint to the kernel 2304 + * about bo's usage patterns for better memory management. 2305 + * 2306 + * By default all VMAs are in WILLNEED state. 2307 + * 2308 + * Supported values for @purge_state_val.val: 2309 + * - DRM_XE_VMA_PURGEABLE_STATE_WILLNEED (0): Marks BO as needed. 2310 + * If the BO was previously purged, the kernel sets the __u32 at 2311 + * @retained_ptr to 0 (backing store lost) so the application knows 2312 + * it must recreate the BO. 2313 + * 2314 + * - DRM_XE_VMA_PURGEABLE_STATE_DONTNEED (1): Marks BO as not currently 2315 + * needed. Kernel may purge it under memory pressure to reclaim memory. 2316 + * Only applies to non-shared BOs. The kernel sets the __u32 at 2317 + * @retained_ptr to 1 if the backing store still exists (not yet purged), 2318 + * or 0 if it was already purged. 2319 + * 2320 + * Important: Once marked as DONTNEED, touching the BO's memory 2321 + * is undefined behavior. It may succeed temporarily (before the 2322 + * kernel purges the backing store) but will suddenly fail once 2323 + * the BO transitions to PURGED state. 2324 + * 2325 + * To transition back: use WILLNEED and check @retained_ptr — 2326 + * if 0, backing store was lost and the BO must be recreated. 2327 + * 2328 + * The following operations are blocked in DONTNEED state to 2329 + * prevent the BO from being re-mapped after madvise: 2330 + * - New mmap() calls: Fail with -EBUSY 2331 + * - VM_BIND operations: Fail with -EBUSY 2332 + * - New dma-buf exports: Fail with -EBUSY 2333 + * - CPU page faults (existing mmap): Fail with SIGBUS 2334 + * - GPU page faults (fault-mode VMs): Fail with -EACCES 2335 + */ 2336 + struct { 2337 + #define DRM_XE_VMA_PURGEABLE_STATE_WILLNEED 0 2338 + #define DRM_XE_VMA_PURGEABLE_STATE_DONTNEED 1 2339 + /** @purge_state_val.val: value for DRM_XE_VMA_ATTR_PURGEABLE_STATE */ 2340 + __u32 val; 2341 + 2342 + /** @purge_state_val.pad: MBZ */ 2343 + __u32 pad; 2344 + /** 2345 + * @purge_state_val.retained_ptr: Pointer to a __u32 output 2346 + * field for backing store status. 2347 + * 2348 + * Userspace must initialize the __u32 value at this address 2349 + * to 0 before the ioctl. Kernel writes a __u32 after the 2350 + * operation: 2351 + * - 1 if backing store exists (not purged) 2352 + * - 0 if backing store was purged 2353 + * 2354 + * If userspace fails to initialize to 0, ioctl returns -EINVAL. 2355 + * This ensures a safe default (0 = assume purged) if kernel 2356 + * cannot write the result. 2357 + * 2358 + * Similar to i915's drm_i915_gem_madvise.retained field. 2359 + */ 2360 + __u64 retained_ptr; 2361 + } purge_state_val; 2299 2362 }; 2300 2363 2301 2364 /** @reserved: Reserved */