Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'drm-xe-fixes-2026-04-30' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes

API Fixes:
- Add missing pad and extensions check (Jonathan)
- Reject unsafe PAT indices for CPU cached memory (Jia)

Driver Fixes:
- Drop registration of guc_submit_wedged_fini from xe_guc_submit_wedge (Brost)
- Xe3p tuning and workaround fixes (Roper, Gustavo)
- USE drm mm instead of drm SA for CCS read/write (Satya)
- Fix leaks and null derefs (Shuicheng)
- Fix Wa_18022495364 (Tvrtko)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patch.msgid.link/afO05KvmFMn_7qcY@intel.com

+621 -105
+1
drivers/gpu/drm/xe/Makefile
··· 88 88 xe_irq.o \ 89 89 xe_late_bind_fw.o \ 90 90 xe_lrc.o \ 91 + xe_mem_pool.o \ 91 92 xe_migrate.o \ 92 93 xe_mmio.o \ 93 94 xe_mmio_gem.o \
+1 -1
drivers/gpu/drm/xe/regs/xe_gt_regs.h
··· 583 583 #define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32) 584 584 #define LSCFE_SAME_ADDRESS_ATOMICS_COALESCING_DISABLE REG_BIT(35 - 32) 585 585 586 - #define ROW_CHICKEN5 XE_REG_MCR(0xe7f0) 586 + #define ROW_CHICKEN5 XE_REG_MCR(0xe7f0, XE_REG_OPTION_MASKED) 587 587 #define CPSS_AWARE_DIS REG_BIT(3) 588 588 589 589 #define SARB_CHICKEN1 XE_REG_MCR(0xe90c)
+6 -2
drivers/gpu/drm/xe/xe_bo.c
··· 2322 2322 } 2323 2323 2324 2324 /* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */ 2325 - if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT)) 2325 + if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT)) { 2326 + xe_bo_free(bo); 2326 2327 return ERR_PTR(-EINVAL); 2328 + } 2327 2329 2328 2330 if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) && 2329 2331 !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) && ··· 2344 2342 alignment = SZ_4K >> PAGE_SHIFT; 2345 2343 } 2346 2344 2347 - if (type == ttm_bo_type_device && aligned_size != size) 2345 + if (type == ttm_bo_type_device && aligned_size != size) { 2346 + xe_bo_free(bo); 2348 2347 return ERR_PTR(-EINVAL); 2348 + } 2349 2349 2350 2350 if (!bo) { 2351 2351 bo = xe_bo_alloc();
+2 -1
drivers/gpu/drm/xe/xe_bo_types.h
··· 18 18 #include "xe_ggtt_types.h" 19 19 20 20 struct xe_device; 21 + struct xe_mem_pool_node; 21 22 struct xe_vm; 22 23 23 24 #define XE_BO_MAX_PLACEMENTS 3 ··· 89 88 bool ccs_cleared; 90 89 91 90 /** @bb_ccs: BB instructions of CCS read/write. Valid only for VF */ 92 - struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT]; 91 + struct xe_mem_pool_node *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT]; 93 92 94 93 /** 95 94 * @cpu_caching: CPU caching mode. Currently only used for userspace
+18 -5
drivers/gpu/drm/xe/xe_dma_buf.c
··· 258 258 return ERR_PTR(ret); 259 259 } 260 260 261 + /* 262 + * Takes ownership of @storage: on success it is transferred to the returned 263 + * drm_gem_object; on failure it is freed before returning the error. 264 + * This matches the contract of xe_bo_init_locked() which frees @storage on 265 + * its error paths, so callers need not (and must not) free @storage after 266 + * this call. 267 + */ 261 268 static struct drm_gem_object * 262 269 xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, 263 270 struct dma_buf *dma_buf) ··· 278 271 int ret = 0; 279 272 280 273 dummy_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 281 - if (!dummy_obj) 274 + if (!dummy_obj) { 275 + xe_bo_free(storage); 282 276 return ERR_PTR(-ENOMEM); 277 + } 283 278 284 279 dummy_obj->resv = resv; 285 280 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, ret) { ··· 290 281 if (ret) 291 282 break; 292 283 284 + /* xe_bo_init_locked() frees storage on error */ 293 285 bo = xe_bo_init_locked(xe, storage, NULL, resv, NULL, dma_buf->size, 294 286 0, /* Will require 1way or 2way for vm_bind */ 295 287 ttm_bo_type_sg, XE_BO_FLAG_SYSTEM, &exec); ··· 378 368 goto out_err; 379 369 } 380 370 381 - /* Errors here will take care of freeing the bo. */ 371 + /* 372 + * xe_dma_buf_init_obj() takes ownership of bo on both success 373 + * and failure, so we must not touch bo after this call. 374 + */ 382 375 obj = xe_dma_buf_init_obj(dev, bo, dma_buf); 383 - if (IS_ERR(obj)) 376 + if (IS_ERR(obj)) { 377 + dma_buf_detach(dma_buf, attach); 384 378 return obj; 385 - 386 - 379 + } 387 380 get_dma_buf(dma_buf); 388 381 obj->import_attach = attach; 389 382 return obj;
+2 -2
drivers/gpu/drm/xe/xe_eu_stall.c
··· 869 869 struct xe_eu_stall_data_stream *stream = file->private_data; 870 870 struct xe_gt *gt = stream->gt; 871 871 872 - drm_dev_put(&gt->tile->xe->drm); 873 - 874 872 mutex_lock(&gt->eu_stall->stream_lock); 875 873 xe_eu_stall_disable_locked(stream); 876 874 xe_eu_stall_data_buf_destroy(stream); 877 875 xe_eu_stall_stream_free(stream); 878 876 mutex_unlock(&gt->eu_stall->stream_lock); 877 + 878 + drm_dev_put(&gt->tile->xe->drm); 879 879 880 880 return 0; 881 881 }
+6 -3
drivers/gpu/drm/xe/xe_exec_queue.c
··· 1405 1405 if (q->vm && q->hwe->hw_engine_group) { 1406 1406 err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q); 1407 1407 if (err) 1408 - goto put_exec_queue; 1408 + goto kill_exec_queue; 1409 1409 } 1410 1410 } 1411 1411 ··· 1416 1416 /* user id alloc must always be last in ioctl to prevent UAF */ 1417 1417 err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); 1418 1418 if (err) 1419 - goto kill_exec_queue; 1419 + goto del_hw_engine_group; 1420 1420 1421 1421 args->exec_queue_id = id; 1422 1422 1423 1423 return 0; 1424 1424 1425 + del_hw_engine_group: 1426 + if (q->vm && q->hwe && q->hwe->hw_engine_group) 1427 + xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); 1425 1428 kill_exec_queue: 1426 1429 xe_exec_queue_kill(q); 1427 1430 delete_queue_group: ··· 1763 1760 void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q, 1764 1761 unsigned int type) 1765 1762 { 1766 - xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || 1763 + xe_assert(gt_to_xe(q->gt), type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || 1767 1764 type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); 1768 1765 1769 1766 dma_fence_put(q->tlb_inval[type].last_fence);
+1 -1
drivers/gpu/drm/xe/xe_gsc.c
··· 166 166 &rd_offset); 167 167 if (err) { 168 168 xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err); 169 - return err; 169 + goto out_bo; 170 170 } 171 171 172 172 compat->major = version_query_rd(xe, &bo->vmap, rd_offset, proj_major);
+9 -24
drivers/gpu/drm/xe/xe_guc_submit.c
··· 261 261 static void guc_submit_fini(void *arg) 262 262 { 263 263 struct xe_guc *guc = arg; 264 - 265 - /* Forcefully kill any remaining exec queues */ 266 - xe_guc_ct_stop(&guc->ct); 267 - guc_submit_reset_prepare(guc); 268 - xe_guc_softreset(guc); 269 - xe_guc_submit_stop(guc); 270 - xe_uc_fw_sanitize(&guc->fw); 271 - xe_guc_submit_pause_abort(guc); 272 - } 273 - 274 - static void guc_submit_wedged_fini(void *arg) 275 - { 276 - struct xe_guc *guc = arg; 277 264 struct xe_exec_queue *q; 278 265 unsigned long index; 279 266 267 + /* Drop any wedged queue refs */ 280 268 mutex_lock(&guc->submission_state.lock); 281 269 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 282 270 if (exec_queue_wedged(q)) { ··· 274 286 } 275 287 } 276 288 mutex_unlock(&guc->submission_state.lock); 289 + 290 + /* Forcefully kill any remaining exec queues */ 291 + xe_guc_ct_stop(&guc->ct); 292 + guc_submit_reset_prepare(guc); 293 + xe_guc_softreset(guc); 294 + xe_guc_submit_stop(guc); 295 + xe_uc_fw_sanitize(&guc->fw); 296 + xe_guc_submit_pause_abort(guc); 277 297 } 278 298 279 299 static const struct xe_exec_queue_ops guc_exec_queue_ops; ··· 1316 1320 void xe_guc_submit_wedge(struct xe_guc *guc) 1317 1321 { 1318 1322 struct xe_device *xe = guc_to_xe(guc); 1319 - struct xe_gt *gt = guc_to_gt(guc); 1320 1323 struct xe_exec_queue *q; 1321 1324 unsigned long index; 1322 - int err; 1323 1325 1324 1326 xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); 1325 1327 ··· 1329 1335 return; 1330 1336 1331 1337 if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) { 1332 - err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, 1333 - guc_submit_wedged_fini, guc); 1334 - if (err) { 1335 - xe_gt_err(gt, "Failed to register clean-up on wedged.mode=%s; " 1336 - "Although device is wedged.\n", 1337 - xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET)); 1338 - return; 1339 - } 1340 - 1341 1338 mutex_lock(&guc->submission_state.lock); 1342 1339 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1343 1340 if (xe_exec_queue_get_unless_zero(q))
+1 -1
drivers/gpu/drm/xe/xe_lrc.c
··· 1214 1214 if (xe_gt_WARN_ON(lrc->gt, max_len < 3)) 1215 1215 return -ENOSPC; 1216 1216 1217 - *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); 1217 + *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_LRM_CS_MMIO | MI_LRI_NUM_REGS(1); 1218 1218 *cmd++ = CS_DEBUG_MODE2(0).addr; 1219 1219 *cmd++ = REG_MASKED_FIELD_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE); 1220 1220
+403
drivers/gpu/drm/xe/xe_mem_pool.c
··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2026 Intel Corporation 4 + */ 5 + 6 + #include <linux/kernel.h> 7 + 8 + #include <drm/drm_managed.h> 9 + 10 + #include "instructions/xe_mi_commands.h" 11 + #include "xe_bo.h" 12 + #include "xe_device_types.h" 13 + #include "xe_map.h" 14 + #include "xe_mem_pool.h" 15 + #include "xe_mem_pool_types.h" 16 + #include "xe_tile_printk.h" 17 + 18 + /** 19 + * struct xe_mem_pool - DRM MM pool for sub-allocating memory from a BO on an 20 + * XE tile. 21 + * 22 + * The XE memory pool is a DRM MM manager that provides sub-allocation of memory 23 + * from a backing buffer object (BO) on a specific XE tile. It is designed to 24 + * manage memory for GPU workloads, allowing for efficient allocation and 25 + * deallocation of memory regions within the BO. 26 + * 27 + * The memory pool maintains a primary BO that is pinned in the GGTT and mapped 28 + * into the CPU address space for direct access. Optionally, it can also maintain 29 + * a shadow BO that can be used for atomic updates to the primary BO's contents. 30 + * 31 + * The API provided by the memory pool allows clients to allocate and free memory 32 + * regions, retrieve GPU and CPU addresses, and synchronize data between the 33 + * primary and shadow BOs as needed. 34 + */ 35 + struct xe_mem_pool { 36 + /** @base: Range allocator over [0, @size) in bytes */ 37 + struct drm_mm base; 38 + /** @bo: Active pool BO (GGTT-pinned, CPU-mapped). */ 39 + struct xe_bo *bo; 40 + /** @shadow: Shadow BO for atomic command updates. */ 41 + struct xe_bo *shadow; 42 + /** @swap_guard: Timeline guard updating @bo and @shadow */ 43 + struct mutex swap_guard; 44 + /** @cpu_addr: CPU virtual address of the active BO. */ 45 + void *cpu_addr; 46 + /** @is_iomem: Indicates if the BO mapping is I/O memory. */ 47 + bool is_iomem; 48 + }; 49 + 50 + static struct xe_mem_pool *node_to_pool(struct xe_mem_pool_node *node) 51 + { 52 + return container_of(node->sa_node.mm, struct xe_mem_pool, base); 53 + } 54 + 55 + static struct xe_tile *pool_to_tile(struct xe_mem_pool *pool) 56 + { 57 + return pool->bo->tile; 58 + } 59 + 60 + static void fini_pool_action(struct drm_device *drm, void *arg) 61 + { 62 + struct xe_mem_pool *pool = arg; 63 + 64 + if (pool->is_iomem) 65 + kvfree(pool->cpu_addr); 66 + 67 + drm_mm_takedown(&pool->base); 68 + } 69 + 70 + static int pool_shadow_init(struct xe_mem_pool *pool) 71 + { 72 + struct xe_tile *tile = pool->bo->tile; 73 + struct xe_device *xe = tile_to_xe(tile); 74 + struct xe_bo *shadow; 75 + int ret; 76 + 77 + xe_assert(xe, !pool->shadow); 78 + 79 + ret = drmm_mutex_init(&xe->drm, &pool->swap_guard); 80 + if (ret) 81 + return ret; 82 + 83 + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 84 + fs_reclaim_acquire(GFP_KERNEL); 85 + might_lock(&pool->swap_guard); 86 + fs_reclaim_release(GFP_KERNEL); 87 + } 88 + shadow = xe_managed_bo_create_pin_map(xe, tile, 89 + xe_bo_size(pool->bo), 90 + XE_BO_FLAG_VRAM_IF_DGFX(tile) | 91 + XE_BO_FLAG_GGTT | 92 + XE_BO_FLAG_GGTT_INVALIDATE | 93 + XE_BO_FLAG_PINNED_NORESTORE); 94 + if (IS_ERR(shadow)) 95 + return PTR_ERR(shadow); 96 + 97 + pool->shadow = shadow; 98 + 99 + return 0; 100 + } 101 + 102 + /** 103 + * xe_mem_pool_init() - Initialize memory pool. 104 + * @tile: the &xe_tile where allocate. 105 + * @size: number of bytes to allocate. 106 + * @guard: the size of the guard region at the end of the BO that is not 107 + * sub-allocated, in bytes. 108 + * @flags: flags to use to create shadow pool. 109 + * 110 + * Initializes a memory pool for sub-allocating memory from a backing BO on the 111 + * specified XE tile. The backing BO is pinned in the GGTT and mapped into 112 + * the CPU address space for direct access. Optionally, a shadow BO can also be 113 + * initialized for atomic updates to the primary BO's contents. 114 + * 115 + * Returns: a pointer to the &xe_mem_pool, or an error pointer on failure. 116 + */ 117 + struct xe_mem_pool *xe_mem_pool_init(struct xe_tile *tile, u32 size, 118 + u32 guard, int flags) 119 + { 120 + struct xe_device *xe = tile_to_xe(tile); 121 + struct xe_mem_pool *pool; 122 + struct xe_bo *bo; 123 + u32 managed_size; 124 + int ret; 125 + 126 + xe_tile_assert(tile, size > guard); 127 + managed_size = size - guard; 128 + 129 + pool = drmm_kzalloc(&xe->drm, sizeof(*pool), GFP_KERNEL); 130 + if (!pool) 131 + return ERR_PTR(-ENOMEM); 132 + 133 + bo = xe_managed_bo_create_pin_map(xe, tile, size, 134 + XE_BO_FLAG_VRAM_IF_DGFX(tile) | 135 + XE_BO_FLAG_GGTT | 136 + XE_BO_FLAG_GGTT_INVALIDATE | 137 + XE_BO_FLAG_PINNED_NORESTORE); 138 + if (IS_ERR(bo)) { 139 + xe_tile_err(tile, "Failed to prepare %uKiB BO for mem pool (%pe)\n", 140 + size / SZ_1K, bo); 141 + return ERR_CAST(bo); 142 + } 143 + pool->bo = bo; 144 + pool->is_iomem = bo->vmap.is_iomem; 145 + 146 + if (pool->is_iomem) { 147 + pool->cpu_addr = kvzalloc(size, GFP_KERNEL); 148 + if (!pool->cpu_addr) 149 + return ERR_PTR(-ENOMEM); 150 + } else { 151 + pool->cpu_addr = bo->vmap.vaddr; 152 + } 153 + 154 + if (flags & XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY) { 155 + ret = pool_shadow_init(pool); 156 + 157 + if (ret) 158 + goto out_err; 159 + } 160 + 161 + drm_mm_init(&pool->base, 0, managed_size); 162 + ret = drmm_add_action_or_reset(&xe->drm, fini_pool_action, pool); 163 + if (ret) 164 + return ERR_PTR(ret); 165 + 166 + return pool; 167 + 168 + out_err: 169 + if (flags & XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY) 170 + xe_tile_err(tile, 171 + "Failed to initialize shadow BO for mem pool (%d)\n", ret); 172 + if (bo->vmap.is_iomem) 173 + kvfree(pool->cpu_addr); 174 + return ERR_PTR(ret); 175 + } 176 + 177 + /** 178 + * xe_mem_pool_sync() - Copy the entire contents of the main pool to shadow pool. 179 + * @pool: the memory pool containing the primary and shadow BOs. 180 + * 181 + * Copies the entire contents of the primary pool to the shadow pool. This must 182 + * be done after xe_mem_pool_init() with the XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY 183 + * flag to ensure that the shadow pool has the same initial contents as the primary 184 + * pool. After this initial synchronization, clients can choose to synchronize the 185 + * shadow pool with the primary pool on a node basis using 186 + * xe_mem_pool_sync_shadow_locked() as needed. 187 + * 188 + * Return: None. 189 + */ 190 + void xe_mem_pool_sync(struct xe_mem_pool *pool) 191 + { 192 + struct xe_tile *tile = pool_to_tile(pool); 193 + struct xe_device *xe = tile_to_xe(tile); 194 + 195 + xe_tile_assert(tile, pool->shadow); 196 + 197 + xe_map_memcpy_to(xe, &pool->shadow->vmap, 0, 198 + pool->cpu_addr, xe_bo_size(pool->bo)); 199 + } 200 + 201 + /** 202 + * xe_mem_pool_swap_shadow_locked() - Swap the primary BO with the shadow BO. 203 + * @pool: the memory pool containing the primary and shadow BOs. 204 + * 205 + * Swaps the primary buffer object with the shadow buffer object in the mem 206 + * pool. This allows for atomic updates to the contents of the primary BO 207 + * by first writing to the shadow BO and then swapping it with the primary BO. 208 + * Swap_guard must be held to ensure synchronization with any concurrent swap 209 + * operations. 210 + * 211 + * Return: None. 212 + */ 213 + void xe_mem_pool_swap_shadow_locked(struct xe_mem_pool *pool) 214 + { 215 + struct xe_tile *tile = pool_to_tile(pool); 216 + 217 + xe_tile_assert(tile, pool->shadow); 218 + lockdep_assert_held(&pool->swap_guard); 219 + 220 + swap(pool->bo, pool->shadow); 221 + if (!pool->bo->vmap.is_iomem) 222 + pool->cpu_addr = pool->bo->vmap.vaddr; 223 + } 224 + 225 + /** 226 + * xe_mem_pool_sync_shadow_locked() - Copy node from primary pool to shadow pool. 227 + * @node: the node allocated in the memory pool. 228 + * 229 + * Copies the specified batch buffer from the primary pool to the shadow pool. 230 + * Swap_guard must be held to ensure synchronization with any concurrent swap 231 + * operations. 232 + * 233 + * Return: None. 234 + */ 235 + void xe_mem_pool_sync_shadow_locked(struct xe_mem_pool_node *node) 236 + { 237 + struct xe_mem_pool *pool = node_to_pool(node); 238 + struct xe_tile *tile = pool_to_tile(pool); 239 + struct xe_device *xe = tile_to_xe(tile); 240 + struct drm_mm_node *sa_node = &node->sa_node; 241 + 242 + xe_tile_assert(tile, pool->shadow); 243 + lockdep_assert_held(&pool->swap_guard); 244 + 245 + xe_map_memcpy_to(xe, &pool->shadow->vmap, 246 + sa_node->start, 247 + pool->cpu_addr + sa_node->start, 248 + sa_node->size); 249 + } 250 + 251 + /** 252 + * xe_mem_pool_gpu_addr() - Retrieve GPU address of memory pool. 253 + * @pool: the memory pool 254 + * 255 + * Returns: GGTT address of the memory pool. 256 + */ 257 + u64 xe_mem_pool_gpu_addr(struct xe_mem_pool *pool) 258 + { 259 + return xe_bo_ggtt_addr(pool->bo); 260 + } 261 + 262 + /** 263 + * xe_mem_pool_cpu_addr() - Retrieve CPU address of manager pool. 264 + * @pool: the memory pool 265 + * 266 + * Returns: CPU virtual address of memory pool. 267 + */ 268 + void *xe_mem_pool_cpu_addr(struct xe_mem_pool *pool) 269 + { 270 + return pool->cpu_addr; 271 + } 272 + 273 + /** 274 + * xe_mem_pool_bo_swap_guard() - Retrieve the mutex used to guard swap 275 + * operations on a memory pool. 276 + * @pool: the memory pool 277 + * 278 + * Returns: Swap guard mutex or NULL if shadow pool is not created. 279 + */ 280 + struct mutex *xe_mem_pool_bo_swap_guard(struct xe_mem_pool *pool) 281 + { 282 + if (!pool->shadow) 283 + return NULL; 284 + 285 + return &pool->swap_guard; 286 + } 287 + 288 + /** 289 + * xe_mem_pool_bo_flush_write() - Copy the data from the sub-allocation 290 + * to the GPU memory. 291 + * @node: the node allocated in the memory pool to flush. 292 + */ 293 + void xe_mem_pool_bo_flush_write(struct xe_mem_pool_node *node) 294 + { 295 + struct xe_mem_pool *pool = node_to_pool(node); 296 + struct xe_tile *tile = pool_to_tile(pool); 297 + struct xe_device *xe = tile_to_xe(tile); 298 + struct drm_mm_node *sa_node = &node->sa_node; 299 + 300 + if (!pool->bo->vmap.is_iomem) 301 + return; 302 + 303 + xe_map_memcpy_to(xe, &pool->bo->vmap, sa_node->start, 304 + pool->cpu_addr + sa_node->start, 305 + sa_node->size); 306 + } 307 + 308 + /** 309 + * xe_mem_pool_bo_sync_read() - Copy the data from GPU memory to the 310 + * sub-allocation. 311 + * @node: the node allocated in the memory pool to read back. 312 + */ 313 + void xe_mem_pool_bo_sync_read(struct xe_mem_pool_node *node) 314 + { 315 + struct xe_mem_pool *pool = node_to_pool(node); 316 + struct xe_tile *tile = pool_to_tile(pool); 317 + struct xe_device *xe = tile_to_xe(tile); 318 + struct drm_mm_node *sa_node = &node->sa_node; 319 + 320 + if (!pool->bo->vmap.is_iomem) 321 + return; 322 + 323 + xe_map_memcpy_from(xe, pool->cpu_addr + sa_node->start, 324 + &pool->bo->vmap, sa_node->start, sa_node->size); 325 + } 326 + 327 + /** 328 + * xe_mem_pool_alloc_node() - Allocate a new node for use with xe_mem_pool. 329 + * 330 + * Returns: node structure or an ERR_PTR(-ENOMEM). 331 + */ 332 + struct xe_mem_pool_node *xe_mem_pool_alloc_node(void) 333 + { 334 + struct xe_mem_pool_node *node = kzalloc_obj(*node); 335 + 336 + if (!node) 337 + return ERR_PTR(-ENOMEM); 338 + 339 + return node; 340 + } 341 + 342 + /** 343 + * xe_mem_pool_insert_node() - Insert a node into the memory pool. 344 + * @pool: the memory pool to insert into 345 + * @node: the node to insert 346 + * @size: the size of the node to be allocated in bytes. 347 + * 348 + * Inserts a node into the specified memory pool using drm_mm for 349 + * allocation. 350 + * 351 + * Returns: 0 on success or a negative error code on failure. 352 + */ 353 + int xe_mem_pool_insert_node(struct xe_mem_pool *pool, 354 + struct xe_mem_pool_node *node, u32 size) 355 + { 356 + if (!pool) 357 + return -EINVAL; 358 + 359 + return drm_mm_insert_node(&pool->base, &node->sa_node, size); 360 + } 361 + 362 + /** 363 + * xe_mem_pool_free_node() - Free a node allocated from the memory pool. 364 + * @node: the node to free 365 + * 366 + * Returns: None. 367 + */ 368 + void xe_mem_pool_free_node(struct xe_mem_pool_node *node) 369 + { 370 + if (!node) 371 + return; 372 + 373 + drm_mm_remove_node(&node->sa_node); 374 + kfree(node); 375 + } 376 + 377 + /** 378 + * xe_mem_pool_node_cpu_addr() - Retrieve CPU address of the node. 379 + * @node: the node allocated in the memory pool 380 + * 381 + * Returns: CPU virtual address of the node. 382 + */ 383 + void *xe_mem_pool_node_cpu_addr(struct xe_mem_pool_node *node) 384 + { 385 + struct xe_mem_pool *pool = node_to_pool(node); 386 + 387 + return xe_mem_pool_cpu_addr(pool) + node->sa_node.start; 388 + } 389 + 390 + /** 391 + * xe_mem_pool_dump() - Dump the state of the DRM MM manager for debugging. 392 + * @pool: the memory pool info be dumped. 393 + * @p: The DRM printer to use for output. 394 + * 395 + * Only the drm managed region is dumped, not the state of the BOs or any other 396 + * pool information. 397 + * 398 + * Returns: None. 399 + */ 400 + void xe_mem_pool_dump(struct xe_mem_pool *pool, struct drm_printer *p) 401 + { 402 + drm_mm_print(&pool->base, p); 403 + }
+35
drivers/gpu/drm/xe/xe_mem_pool.h
··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2026 Intel Corporation 4 + */ 5 + #ifndef _XE_MEM_POOL_H_ 6 + #define _XE_MEM_POOL_H_ 7 + 8 + #include <linux/sizes.h> 9 + #include <linux/types.h> 10 + 11 + #include <drm/drm_mm.h> 12 + #include "xe_mem_pool_types.h" 13 + 14 + struct drm_printer; 15 + struct xe_mem_pool; 16 + struct xe_tile; 17 + 18 + struct xe_mem_pool *xe_mem_pool_init(struct xe_tile *tile, u32 size, 19 + u32 guard, int flags); 20 + void xe_mem_pool_sync(struct xe_mem_pool *pool); 21 + void xe_mem_pool_swap_shadow_locked(struct xe_mem_pool *pool); 22 + void xe_mem_pool_sync_shadow_locked(struct xe_mem_pool_node *node); 23 + u64 xe_mem_pool_gpu_addr(struct xe_mem_pool *pool); 24 + void *xe_mem_pool_cpu_addr(struct xe_mem_pool *pool); 25 + struct mutex *xe_mem_pool_bo_swap_guard(struct xe_mem_pool *pool); 26 + void xe_mem_pool_bo_flush_write(struct xe_mem_pool_node *node); 27 + void xe_mem_pool_bo_sync_read(struct xe_mem_pool_node *node); 28 + struct xe_mem_pool_node *xe_mem_pool_alloc_node(void); 29 + int xe_mem_pool_insert_node(struct xe_mem_pool *pool, 30 + struct xe_mem_pool_node *node, u32 size); 31 + void xe_mem_pool_free_node(struct xe_mem_pool_node *node); 32 + void *xe_mem_pool_node_cpu_addr(struct xe_mem_pool_node *node); 33 + void xe_mem_pool_dump(struct xe_mem_pool *pool, struct drm_printer *p); 34 + 35 + #endif
+21
drivers/gpu/drm/xe/xe_mem_pool_types.h
··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2026 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_MEM_POOL_TYPES_H_ 7 + #define _XE_MEM_POOL_TYPES_H_ 8 + 9 + #include <drm/drm_mm.h> 10 + 11 + #define XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY BIT(0) 12 + 13 + /** 14 + * struct xe_mem_pool_node - Sub-range allocations from mem pool. 15 + */ 16 + struct xe_mem_pool_node { 17 + /** @sa_node: drm_mm_node for this allocation. */ 18 + struct drm_mm_node sa_node; 19 + }; 20 + 21 + #endif
+31 -25
drivers/gpu/drm/xe/xe_migrate.c
··· 29 29 #include "xe_hw_engine.h" 30 30 #include "xe_lrc.h" 31 31 #include "xe_map.h" 32 + #include "xe_mem_pool.h" 32 33 #include "xe_mocs.h" 33 34 #include "xe_printk.h" 34 35 #include "xe_pt.h" ··· 1167 1166 u32 batch_size, batch_size_allocated; 1168 1167 struct xe_device *xe = gt_to_xe(gt); 1169 1168 struct xe_res_cursor src_it, ccs_it; 1169 + struct xe_mem_pool *bb_pool; 1170 1170 struct xe_sriov_vf_ccs_ctx *ctx; 1171 - struct xe_sa_manager *bb_pool; 1172 1171 u64 size = xe_bo_size(src_bo); 1173 - struct xe_bb *bb = NULL; 1172 + struct xe_mem_pool_node *bb; 1174 1173 u64 src_L0, src_L0_ofs; 1174 + struct xe_bb xe_bb_tmp; 1175 1175 u32 src_L0_pt; 1176 1176 int err; 1177 1177 ··· 1210 1208 size -= src_L0; 1211 1209 } 1212 1210 1213 - bb = xe_bb_alloc(gt); 1211 + bb = xe_mem_pool_alloc_node(); 1214 1212 if (IS_ERR(bb)) 1215 1213 return PTR_ERR(bb); 1216 1214 1217 1215 bb_pool = ctx->mem.ccs_bb_pool; 1218 - scoped_guard(mutex, xe_sa_bo_swap_guard(bb_pool)) { 1219 - xe_sa_bo_swap_shadow(bb_pool); 1216 + scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) { 1217 + xe_mem_pool_swap_shadow_locked(bb_pool); 1220 1218 1221 - err = xe_bb_init(bb, bb_pool, batch_size); 1219 + err = xe_mem_pool_insert_node(bb_pool, bb, batch_size * sizeof(u32)); 1222 1220 if (err) { 1223 1221 xe_gt_err(gt, "BB allocation failed.\n"); 1224 - xe_bb_free(bb, NULL); 1222 + kfree(bb); 1225 1223 return err; 1226 1224 } 1227 1225 ··· 1229 1227 size = xe_bo_size(src_bo); 1230 1228 batch_size = 0; 1231 1229 1230 + xe_bb_tmp = (struct xe_bb){ .cs = xe_mem_pool_node_cpu_addr(bb), .len = 0 }; 1232 1231 /* 1233 1232 * Emit PTE and copy commands here. 1234 1233 * The CCS copy command can only support limited size. If the size to be ··· 1258 1255 xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE)); 1259 1256 batch_size += EMIT_COPY_CCS_DW; 1260 1257 1261 - emit_pte(m, bb, src_L0_pt, false, true, &src_it, src_L0, src); 1258 + emit_pte(m, &xe_bb_tmp, src_L0_pt, false, true, &src_it, src_L0, src); 1262 1259 1263 - emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src); 1260 + emit_pte(m, &xe_bb_tmp, ccs_pt, false, false, &ccs_it, ccs_size, src); 1264 1261 1265 - bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags); 1266 - flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_pltt, 1262 + xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len, 1263 + flush_flags); 1264 + flush_flags = xe_migrate_ccs_copy(m, &xe_bb_tmp, src_L0_ofs, src_is_pltt, 1267 1265 src_L0_ofs, dst_is_pltt, 1268 1266 src_L0, ccs_ofs, true); 1269 - bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags); 1267 + xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len, 1268 + flush_flags); 1270 1269 1271 1270 size -= src_L0; 1272 1271 } 1273 1272 1274 - xe_assert(xe, (batch_size_allocated == bb->len)); 1273 + xe_assert(xe, (batch_size_allocated == xe_bb_tmp.len)); 1274 + xe_assert(xe, bb->sa_node.size == xe_bb_tmp.len * sizeof(u32)); 1275 1275 src_bo->bb_ccs[read_write] = bb; 1276 1276 1277 1277 xe_sriov_vf_ccs_rw_update_bb_addr(ctx); 1278 - xe_sa_bo_sync_shadow(bb->bo); 1278 + xe_mem_pool_sync_shadow_locked(bb); 1279 1279 } 1280 1280 1281 1281 return 0; ··· 1303 1297 void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo, 1304 1298 enum xe_sriov_vf_ccs_rw_ctxs read_write) 1305 1299 { 1306 - struct xe_bb *bb = src_bo->bb_ccs[read_write]; 1300 + struct xe_mem_pool_node *bb = src_bo->bb_ccs[read_write]; 1307 1301 struct xe_device *xe = xe_bo_device(src_bo); 1302 + struct xe_mem_pool *bb_pool; 1308 1303 struct xe_sriov_vf_ccs_ctx *ctx; 1309 - struct xe_sa_manager *bb_pool; 1310 1304 u32 *cs; 1311 1305 1312 1306 xe_assert(xe, IS_SRIOV_VF(xe)); ··· 1314 1308 ctx = &xe->sriov.vf.ccs.contexts[read_write]; 1315 1309 bb_pool = ctx->mem.ccs_bb_pool; 1316 1310 1317 - guard(mutex) (xe_sa_bo_swap_guard(bb_pool)); 1318 - xe_sa_bo_swap_shadow(bb_pool); 1311 + scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) { 1312 + xe_mem_pool_swap_shadow_locked(bb_pool); 1319 1313 1320 - cs = xe_sa_bo_cpu_addr(bb->bo); 1321 - memset(cs, MI_NOOP, bb->len * sizeof(u32)); 1322 - xe_sriov_vf_ccs_rw_update_bb_addr(ctx); 1314 + cs = xe_mem_pool_node_cpu_addr(bb); 1315 + memset(cs, MI_NOOP, bb->sa_node.size); 1316 + xe_sriov_vf_ccs_rw_update_bb_addr(ctx); 1323 1317 1324 - xe_sa_bo_sync_shadow(bb->bo); 1325 - 1326 - xe_bb_free(bb, NULL); 1327 - src_bo->bb_ccs[read_write] = NULL; 1318 + xe_mem_pool_sync_shadow_locked(bb); 1319 + xe_mem_pool_free_node(bb); 1320 + src_bo->bb_ccs[read_write] = NULL; 1321 + } 1328 1322 } 1329 1323 1330 1324 /**
+1
drivers/gpu/drm/xe/xe_pci.c
··· 118 118 119 119 static const struct xe_graphics_desc graphics_xe3p_lpg = { 120 120 XE2_GFX_FEATURES, 121 + .has_indirect_ring_state = 1, 121 122 .multi_queue_engine_class_mask = BIT(XE_ENGINE_CLASS_COPY) | BIT(XE_ENGINE_CLASS_COMPUTE), 122 123 .num_geometry_xecore_fuse_regs = 3, 123 124 .num_compute_xecore_fuse_regs = 3,
+1 -1
drivers/gpu/drm/xe/xe_reg_whitelist.c
··· 226 226 } 227 227 228 228 range_start = reg & REG_GENMASK(25, range_bit); 229 - range_end = range_start | REG_GENMASK(range_bit, 0); 229 + range_end = range_start | REG_GENMASK(range_bit - 1, 0); 230 230 231 231 switch (val & RING_FORCE_TO_NONPRIV_ACCESS_MASK) { 232 232 case RING_FORCE_TO_NONPRIV_ACCESS_RW:
+29 -25
drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
··· 14 14 #include "xe_guc.h" 15 15 #include "xe_guc_submit.h" 16 16 #include "xe_lrc.h" 17 + #include "xe_mem_pool.h" 17 18 #include "xe_migrate.h" 18 19 #include "xe_pm.h" 19 - #include "xe_sa.h" 20 20 #include "xe_sriov_printk.h" 21 21 #include "xe_sriov_vf.h" 22 22 #include "xe_sriov_vf_ccs.h" ··· 141 141 142 142 static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx) 143 143 { 144 + struct xe_mem_pool *pool; 144 145 struct xe_device *xe = tile_to_xe(tile); 145 - struct xe_sa_manager *sa_manager; 146 + u32 *pool_cpu_addr, *last_dw_addr; 146 147 u64 bb_pool_size; 147 - int offset, err; 148 + int err; 148 149 149 150 bb_pool_size = get_ccs_bb_pool_size(xe); 150 151 xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n", 151 152 ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M); 152 153 153 - sa_manager = __xe_sa_bo_manager_init(tile, bb_pool_size, SZ_4K, SZ_16, 154 - XE_SA_BO_MANAGER_FLAG_SHADOW); 155 - 156 - if (IS_ERR(sa_manager)) { 157 - xe_sriov_err(xe, "Suballocator init failed with error: %pe\n", 158 - sa_manager); 159 - err = PTR_ERR(sa_manager); 154 + pool = xe_mem_pool_init(tile, bb_pool_size, sizeof(u32), 155 + XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY); 156 + if (IS_ERR(pool)) { 157 + xe_sriov_err(xe, "xe_mem_pool_init failed with error: %pe\n", 158 + pool); 159 + err = PTR_ERR(pool); 160 160 return err; 161 161 } 162 162 163 - offset = 0; 164 - xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP, 165 - bb_pool_size); 166 - xe_map_memset(xe, &sa_manager->shadow->vmap, offset, MI_NOOP, 167 - bb_pool_size); 163 + pool_cpu_addr = xe_mem_pool_cpu_addr(pool); 164 + memset(pool_cpu_addr, 0, bb_pool_size); 168 165 169 - offset = bb_pool_size - sizeof(u32); 170 - xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END); 171 - xe_map_wr(xe, &sa_manager->shadow->vmap, offset, u32, MI_BATCH_BUFFER_END); 166 + last_dw_addr = pool_cpu_addr + (bb_pool_size / sizeof(u32)) - 1; 167 + *last_dw_addr = MI_BATCH_BUFFER_END; 172 168 173 - ctx->mem.ccs_bb_pool = sa_manager; 169 + /** 170 + * Sync the main copy and shadow copy so that the shadow copy is 171 + * replica of main copy. We sync only BBs after init part. So, we 172 + * need to make sure the main pool and shadow copy are in sync after 173 + * this point. This is needed as GuC may read the BB commands from 174 + * shadow copy. 175 + */ 176 + xe_mem_pool_sync(pool); 174 177 178 + ctx->mem.ccs_bb_pool = pool; 175 179 return 0; 176 180 } 177 181 178 182 static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx) 179 183 { 180 - u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool); 184 + u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool); 181 185 struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); 182 186 u32 dw[10], i = 0; 183 187 ··· 392 388 #define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET (2 * sizeof(u32)) 393 389 void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx) 394 390 { 395 - u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool); 391 + u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool); 396 392 struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); 397 393 struct xe_device *xe = gt_to_xe(ctx->mig_q->gt); 398 394 ··· 416 412 struct xe_device *xe = xe_bo_device(bo); 417 413 enum xe_sriov_vf_ccs_rw_ctxs ctx_id; 418 414 struct xe_sriov_vf_ccs_ctx *ctx; 415 + struct xe_mem_pool_node *bb; 419 416 struct xe_tile *tile; 420 - struct xe_bb *bb; 421 417 int err = 0; 422 418 423 419 xe_assert(xe, IS_VF_CCS_READY(xe)); ··· 449 445 { 450 446 struct xe_device *xe = xe_bo_device(bo); 451 447 enum xe_sriov_vf_ccs_rw_ctxs ctx_id; 452 - struct xe_bb *bb; 448 + struct xe_mem_pool_node *bb; 453 449 454 450 xe_assert(xe, IS_VF_CCS_READY(xe)); 455 451 ··· 475 471 */ 476 472 void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p) 477 473 { 478 - struct xe_sa_manager *bb_pool; 479 474 enum xe_sriov_vf_ccs_rw_ctxs ctx_id; 475 + struct xe_mem_pool *bb_pool; 480 476 481 477 if (!IS_VF_CCS_READY(xe)) 482 478 return; ··· 489 485 490 486 drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read"); 491 487 drm_printf(p, "-------------------------\n"); 492 - drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool)); 488 + xe_mem_pool_dump(bb_pool, p); 493 489 drm_puts(p, "\n"); 494 490 } 495 491 }
+1 -4
drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
··· 17 17 XE_SRIOV_VF_CCS_CTX_COUNT 18 18 }; 19 19 20 - struct xe_migrate; 21 - struct xe_sa_manager; 22 - 23 20 /** 24 21 * struct xe_sriov_vf_ccs_ctx - VF CCS migration context data. 25 22 */ ··· 30 33 /** @mem: memory data */ 31 34 struct { 32 35 /** @mem.ccs_bb_pool: Pool from which batch buffers are allocated. */ 33 - struct xe_sa_manager *ccs_bb_pool; 36 + struct xe_mem_pool *ccs_bb_pool; 34 37 } mem; 35 38 }; 36 39
+1 -1
drivers/gpu/drm/xe/xe_tuning.c
··· 97 97 { XE_RTP_NAME("Tuning: Set STLB Bank Hash Mode to 4KB"), 98 98 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3510, XE_RTP_END_VERSION_UNDEFINED), 99 99 IS_INTEGRATED), 100 - XE_RTP_ACTIONS(FIELD_SET(XEHP_GAMSTLB_CTRL, BANK_HASH_MODE, 100 + XE_RTP_ACTIONS(FIELD_SET(GAMSTLB_CTRL, BANK_HASH_MODE, 101 101 BANK_HASH_4KB_MODE)) 102 102 }, 103 103 };
+4 -1
drivers/gpu/drm/xe/xe_vm.c
··· 3658 3658 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3659 3659 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3660 3660 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3661 + XE_IOCTL_DBG(xe, !IS_DGFX(xe) && coh_mode == XE_COH_NONE && 3662 + is_cpu_addr_mirror) || 3661 3663 XE_IOCTL_DBG(xe, xe_device_is_l2_flush_optimized(xe) && 3662 3664 (op == DRM_XE_VM_BIND_OP_MAP_USERPTR || 3663 3665 is_cpu_addr_mirror) && ··· 4158 4156 int ret = 0; 4159 4157 4160 4158 if (XE_IOCTL_DBG(xe, (args->reserved[0] || args->reserved[1] || 4161 - args->reserved[2]))) 4159 + args->reserved[2] || args->extensions || 4160 + args->pad))) 4162 4161 return -EINVAL; 4163 4162 4164 4163 vm = xe_vm_lookup(xef, args->vm_id);
+47
drivers/gpu/drm/xe/xe_vm_madvise.c
··· 621 621 return 0; 622 622 } 623 623 624 + static bool check_pat_args_are_sane(struct xe_device *xe, 625 + struct xe_vmas_in_madvise_range *madvise_range, 626 + u16 pat_index) 627 + { 628 + u16 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 629 + int i; 630 + 631 + /* 632 + * Using coh_none with CPU cached buffers is not allowed on iGPU. 633 + * On iGPU the GPU shares the LLC with the CPU, so with coh_none 634 + * the GPU bypasses CPU caches and reads directly from DRAM, 635 + * potentially seeing stale sensitive data from previously freed 636 + * pages. On dGPU this restriction does not apply, because the 637 + * platform does not provide a non-coherent system memory access 638 + * path that would violate the DMA coherency contract. 639 + */ 640 + if (coh_mode != XE_COH_NONE || IS_DGFX(xe)) 641 + return true; 642 + 643 + for (i = 0; i < madvise_range->num_vmas; i++) { 644 + struct xe_vma *vma = madvise_range->vmas[i]; 645 + struct xe_bo *bo = xe_vma_bo(vma); 646 + 647 + if (bo) { 648 + /* BO with WB caching + COH_NONE is not allowed */ 649 + if (XE_IOCTL_DBG(xe, bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) 650 + return false; 651 + /* Imported dma-buf without caching info, assume cached */ 652 + if (XE_IOCTL_DBG(xe, !bo->cpu_caching)) 653 + return false; 654 + } else if (XE_IOCTL_DBG(xe, xe_vma_is_cpu_addr_mirror(vma) || 655 + xe_vma_is_userptr(vma))) 656 + /* System memory (userptr/SVM) is always CPU cached */ 657 + return false; 658 + } 659 + 660 + return true; 661 + } 662 + 624 663 static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas, 625 664 int num_vmas, u32 atomic_val) 626 665 { ··· 786 747 (pat_index != 19 && coh_mode != XE_COH_2WAY))) { 787 748 err = -EINVAL; 788 749 goto madv_fini; 750 + } 751 + } 752 + 753 + if (args->type == DRM_XE_MEM_RANGE_ATTR_PAT) { 754 + if (!check_pat_args_are_sane(xe, &madvise_range, 755 + args->pat_index.val)) { 756 + err = -EINVAL; 757 + goto free_vmas; 789 758 } 790 759 } 791 760
-8
drivers/gpu/drm/xe/xe_wa.c
··· 743 743 XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), 744 744 XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) 745 745 }, 746 - { XE_RTP_NAME("14019988906"), 747 - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), 748 - XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) 749 - }, 750 - { XE_RTP_NAME("14019877138"), 751 - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), 752 - XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) 753 - }, 754 746 { XE_RTP_NAME("14021490052"), 755 747 XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), 756 748 XE_RTP_ACTIONS(SET(FF_MODE,