drm/xe: Add memory pool with shadow support

+1

drivers/gpu/drm/xe/Makefile

··· 88 88 xe_irq.o \ 89 89 xe_late_bind_fw.o \ 90 90 xe_lrc.o \ 91 + xe_mem_pool.o \ 91 92 xe_migrate.o \ 92 93 xe_mmio.o \ 93 94 xe_mmio_gem.o \

+403

drivers/gpu/drm/xe/xe_mem_pool.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2026 Intel Corporation 4 + */ 5 + 6 + #include <linux/kernel.h> 7 + 8 + #include <drm/drm_managed.h> 9 + 10 + #include "instructions/xe_mi_commands.h" 11 + #include "xe_bo.h" 12 + #include "xe_device_types.h" 13 + #include "xe_map.h" 14 + #include "xe_mem_pool.h" 15 + #include "xe_mem_pool_types.h" 16 + #include "xe_tile_printk.h" 17 + 18 + /** 19 + * struct xe_mem_pool - DRM MM pool for sub-allocating memory from a BO on an 20 + * XE tile. 21 + * 22 + * The XE memory pool is a DRM MM manager that provides sub-allocation of memory 23 + * from a backing buffer object (BO) on a specific XE tile. It is designed to 24 + * manage memory for GPU workloads, allowing for efficient allocation and 25 + * deallocation of memory regions within the BO. 26 + * 27 + * The memory pool maintains a primary BO that is pinned in the GGTT and mapped 28 + * into the CPU address space for direct access. Optionally, it can also maintain 29 + * a shadow BO that can be used for atomic updates to the primary BO's contents. 30 + * 31 + * The API provided by the memory pool allows clients to allocate and free memory 32 + * regions, retrieve GPU and CPU addresses, and synchronize data between the 33 + * primary and shadow BOs as needed. 34 + */ 35 + struct xe_mem_pool { 36 + /** @base: Range allocator over [0, @size) in bytes */ 37 + struct drm_mm base; 38 + /** @bo: Active pool BO (GGTT-pinned, CPU-mapped). */ 39 + struct xe_bo *bo; 40 + /** @shadow: Shadow BO for atomic command updates. */ 41 + struct xe_bo *shadow; 42 + /** @swap_guard: Timeline guard updating @bo and @shadow */ 43 + struct mutex swap_guard; 44 + /** @cpu_addr: CPU virtual address of the active BO. */ 45 + void *cpu_addr; 46 + /** @is_iomem: Indicates if the BO mapping is I/O memory. */ 47 + bool is_iomem; 48 + }; 49 + 50 + static struct xe_mem_pool *node_to_pool(struct xe_mem_pool_node *node) 51 + { 52 + return container_of(node->sa_node.mm, struct xe_mem_pool, base); 53 + } 54 + 55 + static struct xe_tile *pool_to_tile(struct xe_mem_pool *pool) 56 + { 57 + return pool->bo->tile; 58 + } 59 + 60 + static void fini_pool_action(struct drm_device *drm, void *arg) 61 + { 62 + struct xe_mem_pool *pool = arg; 63 + 64 + if (pool->is_iomem) 65 + kvfree(pool->cpu_addr); 66 + 67 + drm_mm_takedown(&pool->base); 68 + } 69 + 70 + static int pool_shadow_init(struct xe_mem_pool *pool) 71 + { 72 + struct xe_tile *tile = pool->bo->tile; 73 + struct xe_device *xe = tile_to_xe(tile); 74 + struct xe_bo *shadow; 75 + int ret; 76 + 77 + xe_assert(xe, !pool->shadow); 78 + 79 + ret = drmm_mutex_init(&xe->drm, &pool->swap_guard); 80 + if (ret) 81 + return ret; 82 + 83 + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 84 + fs_reclaim_acquire(GFP_KERNEL); 85 + might_lock(&pool->swap_guard); 86 + fs_reclaim_release(GFP_KERNEL); 87 + } 88 + shadow = xe_managed_bo_create_pin_map(xe, tile, 89 + xe_bo_size(pool->bo), 90 + XE_BO_FLAG_VRAM_IF_DGFX(tile) | 91 + XE_BO_FLAG_GGTT | 92 + XE_BO_FLAG_GGTT_INVALIDATE | 93 + XE_BO_FLAG_PINNED_NORESTORE); 94 + if (IS_ERR(shadow)) 95 + return PTR_ERR(shadow); 96 + 97 + pool->shadow = shadow; 98 + 99 + return 0; 100 + } 101 + 102 + /** 103 + * xe_mem_pool_init() - Initialize memory pool. 104 + * @tile: the &xe_tile where allocate. 105 + * @size: number of bytes to allocate. 106 + * @guard: the size of the guard region at the end of the BO that is not 107 + * sub-allocated, in bytes. 108 + * @flags: flags to use to create shadow pool. 109 + * 110 + * Initializes a memory pool for sub-allocating memory from a backing BO on the 111 + * specified XE tile. The backing BO is pinned in the GGTT and mapped into 112 + * the CPU address space for direct access. Optionally, a shadow BO can also be 113 + * initialized for atomic updates to the primary BO's contents. 114 + * 115 + * Returns: a pointer to the &xe_mem_pool, or an error pointer on failure. 116 + */ 117 + struct xe_mem_pool *xe_mem_pool_init(struct xe_tile *tile, u32 size, 118 + u32 guard, int flags) 119 + { 120 + struct xe_device *xe = tile_to_xe(tile); 121 + struct xe_mem_pool *pool; 122 + struct xe_bo *bo; 123 + u32 managed_size; 124 + int ret; 125 + 126 + xe_tile_assert(tile, size > guard); 127 + managed_size = size - guard; 128 + 129 + pool = drmm_kzalloc(&xe->drm, sizeof(*pool), GFP_KERNEL); 130 + if (!pool) 131 + return ERR_PTR(-ENOMEM); 132 + 133 + bo = xe_managed_bo_create_pin_map(xe, tile, size, 134 + XE_BO_FLAG_VRAM_IF_DGFX(tile) | 135 + XE_BO_FLAG_GGTT | 136 + XE_BO_FLAG_GGTT_INVALIDATE | 137 + XE_BO_FLAG_PINNED_NORESTORE); 138 + if (IS_ERR(bo)) { 139 + xe_tile_err(tile, "Failed to prepare %uKiB BO for mem pool (%pe)\n", 140 + size / SZ_1K, bo); 141 + return ERR_CAST(bo); 142 + } 143 + pool->bo = bo; 144 + pool->is_iomem = bo->vmap.is_iomem; 145 + 146 + if (pool->is_iomem) { 147 + pool->cpu_addr = kvzalloc(size, GFP_KERNEL); 148 + if (!pool->cpu_addr) 149 + return ERR_PTR(-ENOMEM); 150 + } else { 151 + pool->cpu_addr = bo->vmap.vaddr; 152 + } 153 + 154 + if (flags & XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY) { 155 + ret = pool_shadow_init(pool); 156 + 157 + if (ret) 158 + goto out_err; 159 + } 160 + 161 + drm_mm_init(&pool->base, 0, managed_size); 162 + ret = drmm_add_action_or_reset(&xe->drm, fini_pool_action, pool); 163 + if (ret) 164 + return ERR_PTR(ret); 165 + 166 + return pool; 167 + 168 + out_err: 169 + if (flags & XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY) 170 + xe_tile_err(tile, 171 + "Failed to initialize shadow BO for mem pool (%d)\n", ret); 172 + if (bo->vmap.is_iomem) 173 + kvfree(pool->cpu_addr); 174 + return ERR_PTR(ret); 175 + } 176 + 177 + /** 178 + * xe_mem_pool_sync() - Copy the entire contents of the main pool to shadow pool. 179 + * @pool: the memory pool containing the primary and shadow BOs. 180 + * 181 + * Copies the entire contents of the primary pool to the shadow pool. This must 182 + * be done after xe_mem_pool_init() with the XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY 183 + * flag to ensure that the shadow pool has the same initial contents as the primary 184 + * pool. After this initial synchronization, clients can choose to synchronize the 185 + * shadow pool with the primary pool on a node basis using 186 + * xe_mem_pool_sync_shadow_locked() as needed. 187 + * 188 + * Return: None. 189 + */ 190 + void xe_mem_pool_sync(struct xe_mem_pool *pool) 191 + { 192 + struct xe_tile *tile = pool_to_tile(pool); 193 + struct xe_device *xe = tile_to_xe(tile); 194 + 195 + xe_tile_assert(tile, pool->shadow); 196 + 197 + xe_map_memcpy_to(xe, &pool->shadow->vmap, 0, 198 + pool->cpu_addr, xe_bo_size(pool->bo)); 199 + } 200 + 201 + /** 202 + * xe_mem_pool_swap_shadow_locked() - Swap the primary BO with the shadow BO. 203 + * @pool: the memory pool containing the primary and shadow BOs. 204 + * 205 + * Swaps the primary buffer object with the shadow buffer object in the mem 206 + * pool. This allows for atomic updates to the contents of the primary BO 207 + * by first writing to the shadow BO and then swapping it with the primary BO. 208 + * Swap_guard must be held to ensure synchronization with any concurrent swap 209 + * operations. 210 + * 211 + * Return: None. 212 + */ 213 + void xe_mem_pool_swap_shadow_locked(struct xe_mem_pool *pool) 214 + { 215 + struct xe_tile *tile = pool_to_tile(pool); 216 + 217 + xe_tile_assert(tile, pool->shadow); 218 + lockdep_assert_held(&pool->swap_guard); 219 + 220 + swap(pool->bo, pool->shadow); 221 + if (!pool->bo->vmap.is_iomem) 222 + pool->cpu_addr = pool->bo->vmap.vaddr; 223 + } 224 + 225 + /** 226 + * xe_mem_pool_sync_shadow_locked() - Copy node from primary pool to shadow pool. 227 + * @node: the node allocated in the memory pool. 228 + * 229 + * Copies the specified batch buffer from the primary pool to the shadow pool. 230 + * Swap_guard must be held to ensure synchronization with any concurrent swap 231 + * operations. 232 + * 233 + * Return: None. 234 + */ 235 + void xe_mem_pool_sync_shadow_locked(struct xe_mem_pool_node *node) 236 + { 237 + struct xe_mem_pool *pool = node_to_pool(node); 238 + struct xe_tile *tile = pool_to_tile(pool); 239 + struct xe_device *xe = tile_to_xe(tile); 240 + struct drm_mm_node *sa_node = &node->sa_node; 241 + 242 + xe_tile_assert(tile, pool->shadow); 243 + lockdep_assert_held(&pool->swap_guard); 244 + 245 + xe_map_memcpy_to(xe, &pool->shadow->vmap, 246 + sa_node->start, 247 + pool->cpu_addr + sa_node->start, 248 + sa_node->size); 249 + } 250 + 251 + /** 252 + * xe_mem_pool_gpu_addr() - Retrieve GPU address of memory pool. 253 + * @pool: the memory pool 254 + * 255 + * Returns: GGTT address of the memory pool. 256 + */ 257 + u64 xe_mem_pool_gpu_addr(struct xe_mem_pool *pool) 258 + { 259 + return xe_bo_ggtt_addr(pool->bo); 260 + } 261 + 262 + /** 263 + * xe_mem_pool_cpu_addr() - Retrieve CPU address of manager pool. 264 + * @pool: the memory pool 265 + * 266 + * Returns: CPU virtual address of memory pool. 267 + */ 268 + void *xe_mem_pool_cpu_addr(struct xe_mem_pool *pool) 269 + { 270 + return pool->cpu_addr; 271 + } 272 + 273 + /** 274 + * xe_mem_pool_bo_swap_guard() - Retrieve the mutex used to guard swap 275 + * operations on a memory pool. 276 + * @pool: the memory pool 277 + * 278 + * Returns: Swap guard mutex or NULL if shadow pool is not created. 279 + */ 280 + struct mutex *xe_mem_pool_bo_swap_guard(struct xe_mem_pool *pool) 281 + { 282 + if (!pool->shadow) 283 + return NULL; 284 + 285 + return &pool->swap_guard; 286 + } 287 + 288 + /** 289 + * xe_mem_pool_bo_flush_write() - Copy the data from the sub-allocation 290 + * to the GPU memory. 291 + * @node: the node allocated in the memory pool to flush. 292 + */ 293 + void xe_mem_pool_bo_flush_write(struct xe_mem_pool_node *node) 294 + { 295 + struct xe_mem_pool *pool = node_to_pool(node); 296 + struct xe_tile *tile = pool_to_tile(pool); 297 + struct xe_device *xe = tile_to_xe(tile); 298 + struct drm_mm_node *sa_node = &node->sa_node; 299 + 300 + if (!pool->bo->vmap.is_iomem) 301 + return; 302 + 303 + xe_map_memcpy_to(xe, &pool->bo->vmap, sa_node->start, 304 + pool->cpu_addr + sa_node->start, 305 + sa_node->size); 306 + } 307 + 308 + /** 309 + * xe_mem_pool_bo_sync_read() - Copy the data from GPU memory to the 310 + * sub-allocation. 311 + * @node: the node allocated in the memory pool to read back. 312 + */ 313 + void xe_mem_pool_bo_sync_read(struct xe_mem_pool_node *node) 314 + { 315 + struct xe_mem_pool *pool = node_to_pool(node); 316 + struct xe_tile *tile = pool_to_tile(pool); 317 + struct xe_device *xe = tile_to_xe(tile); 318 + struct drm_mm_node *sa_node = &node->sa_node; 319 + 320 + if (!pool->bo->vmap.is_iomem) 321 + return; 322 + 323 + xe_map_memcpy_from(xe, pool->cpu_addr + sa_node->start, 324 + &pool->bo->vmap, sa_node->start, sa_node->size); 325 + } 326 + 327 + /** 328 + * xe_mem_pool_alloc_node() - Allocate a new node for use with xe_mem_pool. 329 + * 330 + * Returns: node structure or an ERR_PTR(-ENOMEM). 331 + */ 332 + struct xe_mem_pool_node *xe_mem_pool_alloc_node(void) 333 + { 334 + struct xe_mem_pool_node *node = kzalloc_obj(*node); 335 + 336 + if (!node) 337 + return ERR_PTR(-ENOMEM); 338 + 339 + return node; 340 + } 341 + 342 + /** 343 + * xe_mem_pool_insert_node() - Insert a node into the memory pool. 344 + * @pool: the memory pool to insert into 345 + * @node: the node to insert 346 + * @size: the size of the node to be allocated in bytes. 347 + * 348 + * Inserts a node into the specified memory pool using drm_mm for 349 + * allocation. 350 + * 351 + * Returns: 0 on success or a negative error code on failure. 352 + */ 353 + int xe_mem_pool_insert_node(struct xe_mem_pool *pool, 354 + struct xe_mem_pool_node *node, u32 size) 355 + { 356 + if (!pool) 357 + return -EINVAL; 358 + 359 + return drm_mm_insert_node(&pool->base, &node->sa_node, size); 360 + } 361 + 362 + /** 363 + * xe_mem_pool_free_node() - Free a node allocated from the memory pool. 364 + * @node: the node to free 365 + * 366 + * Returns: None. 367 + */ 368 + void xe_mem_pool_free_node(struct xe_mem_pool_node *node) 369 + { 370 + if (!node) 371 + return; 372 + 373 + drm_mm_remove_node(&node->sa_node); 374 + kfree(node); 375 + } 376 + 377 + /** 378 + * xe_mem_pool_node_cpu_addr() - Retrieve CPU address of the node. 379 + * @node: the node allocated in the memory pool 380 + * 381 + * Returns: CPU virtual address of the node. 382 + */ 383 + void *xe_mem_pool_node_cpu_addr(struct xe_mem_pool_node *node) 384 + { 385 + struct xe_mem_pool *pool = node_to_pool(node); 386 + 387 + return xe_mem_pool_cpu_addr(pool) + node->sa_node.start; 388 + } 389 + 390 + /** 391 + * xe_mem_pool_dump() - Dump the state of the DRM MM manager for debugging. 392 + * @pool: the memory pool info be dumped. 393 + * @p: The DRM printer to use for output. 394 + * 395 + * Only the drm managed region is dumped, not the state of the BOs or any other 396 + * pool information. 397 + * 398 + * Returns: None. 399 + */ 400 + void xe_mem_pool_dump(struct xe_mem_pool *pool, struct drm_printer *p) 401 + { 402 + drm_mm_print(&pool->base, p); 403 + }

+35

drivers/gpu/drm/xe/xe_mem_pool.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2026 Intel Corporation 4 + */ 5 + #ifndef _XE_MEM_POOL_H_ 6 + #define _XE_MEM_POOL_H_ 7 + 8 + #include <linux/sizes.h> 9 + #include <linux/types.h> 10 + 11 + #include <drm/drm_mm.h> 12 + #include "xe_mem_pool_types.h" 13 + 14 + struct drm_printer; 15 + struct xe_mem_pool; 16 + struct xe_tile; 17 + 18 + struct xe_mem_pool *xe_mem_pool_init(struct xe_tile *tile, u32 size, 19 + u32 guard, int flags); 20 + void xe_mem_pool_sync(struct xe_mem_pool *pool); 21 + void xe_mem_pool_swap_shadow_locked(struct xe_mem_pool *pool); 22 + void xe_mem_pool_sync_shadow_locked(struct xe_mem_pool_node *node); 23 + u64 xe_mem_pool_gpu_addr(struct xe_mem_pool *pool); 24 + void *xe_mem_pool_cpu_addr(struct xe_mem_pool *pool); 25 + struct mutex *xe_mem_pool_bo_swap_guard(struct xe_mem_pool *pool); 26 + void xe_mem_pool_bo_flush_write(struct xe_mem_pool_node *node); 27 + void xe_mem_pool_bo_sync_read(struct xe_mem_pool_node *node); 28 + struct xe_mem_pool_node *xe_mem_pool_alloc_node(void); 29 + int xe_mem_pool_insert_node(struct xe_mem_pool *pool, 30 + struct xe_mem_pool_node *node, u32 size); 31 + void xe_mem_pool_free_node(struct xe_mem_pool_node *node); 32 + void *xe_mem_pool_node_cpu_addr(struct xe_mem_pool_node *node); 33 + void xe_mem_pool_dump(struct xe_mem_pool *pool, struct drm_printer *p); 34 + 35 + #endif

+21

drivers/gpu/drm/xe/xe_mem_pool_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2026 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_MEM_POOL_TYPES_H_ 7 + #define _XE_MEM_POOL_TYPES_H_ 8 + 9 + #include <drm/drm_mm.h> 10 + 11 + #define XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY BIT(0) 12 + 13 + /** 14 + * struct xe_mem_pool_node - Sub-range allocations from mem pool. 15 + */ 16 + struct xe_mem_pool_node { 17 + /** @sa_node: drm_mm_node for this allocation. */ 18 + struct drm_mm_node sa_node; 19 + }; 20 + 21 + #endif

Configure Feed

Configure Feed