Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at master 876 lines 24 kB view raw
1// SPDX-License-Identifier: MIT 2/* 3 * Copyright © 2025 Intel Corporation 4 */ 5 6#include "xe_vm_madvise.h" 7 8#include <linux/nospec.h> 9#include <drm/xe_drm.h> 10 11#include "xe_bo.h" 12#include "xe_pat.h" 13#include "xe_pt.h" 14#include "xe_svm.h" 15#include "xe_tlb_inval.h" 16#include "xe_vm.h" 17 18struct xe_vmas_in_madvise_range { 19 u64 addr; 20 u64 range; 21 struct xe_vma **vmas; 22 int num_vmas; 23 bool has_bo_vmas; 24 bool has_svm_userptr_vmas; 25}; 26 27/** 28 * struct xe_madvise_details - Argument to madvise_funcs 29 * @dpagemap: Reference-counted pointer to a struct drm_pagemap. 30 * @has_purged_bo: Track if any BO was purged (for purgeable state) 31 * @retained_ptr: User pointer for retained value (for purgeable state) 32 * 33 * The madvise IOCTL handler may, in addition to the user-space 34 * args, have additional info to pass into the madvise_func that 35 * handles the madvise type. Use a struct_xe_madvise_details 36 * for that and extend the struct as necessary. 37 */ 38struct xe_madvise_details { 39 struct drm_pagemap *dpagemap; 40 bool has_purged_bo; 41 u64 retained_ptr; 42}; 43 44static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range) 45{ 46 u64 addr = madvise_range->addr; 47 u64 range = madvise_range->range; 48 49 struct xe_vma **__vmas; 50 struct drm_gpuva *gpuva; 51 int max_vmas = 8; 52 53 lockdep_assert_held(&vm->lock); 54 55 madvise_range->num_vmas = 0; 56 madvise_range->vmas = kmalloc_objs(*madvise_range->vmas, max_vmas); 57 if (!madvise_range->vmas) 58 return -ENOMEM; 59 60 vm_dbg(&vm->xe->drm, "VMA's in range: start=0x%016llx, end=0x%016llx", addr, addr + range); 61 62 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, addr, addr + range) { 63 struct xe_vma *vma = gpuva_to_vma(gpuva); 64 65 if (xe_vma_bo(vma)) 66 madvise_range->has_bo_vmas = true; 67 else if (xe_vma_is_cpu_addr_mirror(vma) || xe_vma_is_userptr(vma)) 68 madvise_range->has_svm_userptr_vmas = true; 69 70 if (madvise_range->num_vmas == max_vmas) { 71 max_vmas <<= 1; 72 __vmas = krealloc(madvise_range->vmas, 73 max_vmas * sizeof(*madvise_range->vmas), 74 GFP_KERNEL); 75 if (!__vmas) { 76 kfree(madvise_range->vmas); 77 return -ENOMEM; 78 } 79 madvise_range->vmas = __vmas; 80 } 81 82 madvise_range->vmas[madvise_range->num_vmas] = vma; 83 (madvise_range->num_vmas)++; 84 } 85 86 if (!madvise_range->num_vmas) 87 kfree(madvise_range->vmas); 88 89 vm_dbg(&vm->xe->drm, "madvise_range-num_vmas = %d\n", madvise_range->num_vmas); 90 91 return 0; 92} 93 94static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm, 95 struct xe_vma **vmas, int num_vmas, 96 struct drm_xe_madvise *op, 97 struct xe_madvise_details *details) 98{ 99 int i; 100 101 xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC); 102 103 for (i = 0; i < num_vmas; i++) { 104 struct xe_vma *vma = vmas[i]; 105 struct xe_vma_preferred_loc *loc = &vma->attr.preferred_loc; 106 107 /*TODO: Extend attributes to bo based vmas */ 108 if ((loc->devmem_fd == op->preferred_mem_loc.devmem_fd && 109 loc->migration_policy == op->preferred_mem_loc.migration_policy) || 110 !xe_vma_is_cpu_addr_mirror(vma)) { 111 vma->skip_invalidation = true; 112 } else { 113 vma->skip_invalidation = false; 114 loc->devmem_fd = op->preferred_mem_loc.devmem_fd; 115 /* Till multi-device support is not added migration_policy 116 * is of no use and can be ignored. 117 */ 118 loc->migration_policy = op->preferred_mem_loc.migration_policy; 119 drm_pagemap_put(loc->dpagemap); 120 loc->dpagemap = NULL; 121 if (details->dpagemap) 122 loc->dpagemap = drm_pagemap_get(details->dpagemap); 123 } 124 } 125} 126 127static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm, 128 struct xe_vma **vmas, int num_vmas, 129 struct drm_xe_madvise *op, 130 struct xe_madvise_details *details) 131{ 132 struct xe_bo *bo; 133 int i; 134 135 xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC); 136 xe_assert(vm->xe, op->atomic.val <= DRM_XE_ATOMIC_CPU); 137 138 for (i = 0; i < num_vmas; i++) { 139 if (xe_vma_is_userptr(vmas[i]) && 140 !(op->atomic.val == DRM_XE_ATOMIC_DEVICE && 141 xe->info.has_device_atomics_on_smem)) { 142 vmas[i]->skip_invalidation = true; 143 continue; 144 } 145 146 if (vmas[i]->attr.atomic_access == op->atomic.val) { 147 vmas[i]->skip_invalidation = true; 148 } else { 149 vmas[i]->skip_invalidation = false; 150 vmas[i]->attr.atomic_access = op->atomic.val; 151 } 152 153 bo = xe_vma_bo(vmas[i]); 154 if (!bo || bo->attr.atomic_access == op->atomic.val) 155 continue; 156 157 vmas[i]->skip_invalidation = false; 158 xe_bo_assert_held(bo); 159 bo->attr.atomic_access = op->atomic.val; 160 161 /* Invalidate cpu page table, so bo can migrate to smem in next access */ 162 if (xe_bo_is_vram(bo) && 163 (bo->attr.atomic_access == DRM_XE_ATOMIC_CPU || 164 bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL)) 165 ttm_bo_unmap_virtual(&bo->ttm); 166 } 167} 168 169static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm, 170 struct xe_vma **vmas, int num_vmas, 171 struct drm_xe_madvise *op, 172 struct xe_madvise_details *details) 173{ 174 int i; 175 176 xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PAT); 177 178 for (i = 0; i < num_vmas; i++) { 179 if (vmas[i]->attr.pat_index == op->pat_index.val) { 180 vmas[i]->skip_invalidation = true; 181 } else { 182 vmas[i]->skip_invalidation = false; 183 vmas[i]->attr.pat_index = op->pat_index.val; 184 } 185 } 186} 187 188/** 189 * xe_bo_is_dmabuf_shared() - Check if BO is shared via dma-buf 190 * @bo: Buffer object 191 * 192 * Prevent marking imported or exported dma-bufs as purgeable. 193 * For imported BOs, Xe doesn't own the backing store and cannot 194 * safely reclaim pages (exporter or other devices may still be 195 * using them). For exported BOs, external devices may have active 196 * mappings we cannot track. 197 * 198 * Return: true if BO is imported or exported, false otherwise 199 */ 200static bool xe_bo_is_dmabuf_shared(struct xe_bo *bo) 201{ 202 struct drm_gem_object *obj = &bo->ttm.base; 203 204 /* Imported: exporter owns backing store */ 205 if (drm_gem_is_imported(obj)) 206 return true; 207 208 /* Exported: external devices may be accessing */ 209 if (obj->dma_buf) 210 return true; 211 212 return false; 213} 214 215/** 216 * enum xe_bo_vmas_purge_state - VMA purgeable state aggregation 217 * 218 * Distinguishes whether a BO's VMAs are all DONTNEED, have at least 219 * one WILLNEED, or have no VMAs at all. 220 * 221 * Enum values align with XE_MADV_PURGEABLE_* states for consistency. 222 */ 223enum xe_bo_vmas_purge_state { 224 /** @XE_BO_VMAS_STATE_WILLNEED: At least one VMA is WILLNEED */ 225 XE_BO_VMAS_STATE_WILLNEED = 0, 226 /** @XE_BO_VMAS_STATE_DONTNEED: All VMAs are DONTNEED */ 227 XE_BO_VMAS_STATE_DONTNEED = 1, 228 /** @XE_BO_VMAS_STATE_NO_VMAS: BO has no VMAs */ 229 XE_BO_VMAS_STATE_NO_VMAS = 2, 230}; 231 232/* 233 * xe_bo_recompute_purgeable_state() casts between xe_bo_vmas_purge_state and 234 * xe_madv_purgeable_state. Enforce that WILLNEED=0 and DONTNEED=1 match across 235 * both enums so the single-line cast is always valid. 236 */ 237static_assert(XE_BO_VMAS_STATE_WILLNEED == (int)XE_MADV_PURGEABLE_WILLNEED, 238 "VMA purge state WILLNEED must equal madv purgeable WILLNEED"); 239static_assert(XE_BO_VMAS_STATE_DONTNEED == (int)XE_MADV_PURGEABLE_DONTNEED, 240 "VMA purge state DONTNEED must equal madv purgeable DONTNEED"); 241 242/** 243 * xe_bo_all_vmas_dontneed() - Determine BO VMA purgeable state 244 * @bo: Buffer object 245 * 246 * Check all VMAs across all VMs to determine aggregate purgeable state. 247 * Shared BOs require unanimous DONTNEED state from all mappings. 248 * 249 * Caller must hold BO dma-resv lock. 250 * 251 * Return: XE_BO_VMAS_STATE_DONTNEED if all VMAs are DONTNEED, 252 * XE_BO_VMAS_STATE_WILLNEED if at least one VMA is not DONTNEED, 253 * XE_BO_VMAS_STATE_NO_VMAS if BO has no VMAs 254 */ 255static enum xe_bo_vmas_purge_state xe_bo_all_vmas_dontneed(struct xe_bo *bo) 256{ 257 struct drm_gpuvm_bo *vm_bo; 258 struct drm_gpuva *gpuva; 259 struct drm_gem_object *obj = &bo->ttm.base; 260 bool has_vmas = false; 261 262 xe_bo_assert_held(bo); 263 264 /* Shared dma-bufs cannot be purgeable */ 265 if (xe_bo_is_dmabuf_shared(bo)) 266 return XE_BO_VMAS_STATE_WILLNEED; 267 268 drm_gem_for_each_gpuvm_bo(vm_bo, obj) { 269 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) { 270 struct xe_vma *vma = gpuva_to_vma(gpuva); 271 272 has_vmas = true; 273 274 /* Any non-DONTNEED VMA prevents purging */ 275 if (vma->attr.purgeable_state != XE_MADV_PURGEABLE_DONTNEED) 276 return XE_BO_VMAS_STATE_WILLNEED; 277 } 278 } 279 280 /* 281 * No VMAs => preserve existing BO purgeable state. 282 * Avoids incorrectly flipping DONTNEED -> WILLNEED when last VMA unmapped. 283 */ 284 if (!has_vmas) 285 return XE_BO_VMAS_STATE_NO_VMAS; 286 287 return XE_BO_VMAS_STATE_DONTNEED; 288} 289 290/** 291 * xe_bo_recompute_purgeable_state() - Recompute BO purgeable state from VMAs 292 * @bo: Buffer object 293 * 294 * Walk all VMAs to determine if BO should be purgeable or not. 295 * Shared BOs require unanimous DONTNEED state from all mappings. 296 * If the BO has no VMAs the existing state is preserved. 297 * 298 * Locking: Caller must hold BO dma-resv lock. When iterating GPUVM lists, 299 * VM lock must also be held (write) to prevent concurrent VMA modifications. 300 * This is satisfied at both call sites: 301 * - xe_vma_destroy(): holds vm->lock write 302 * - madvise_purgeable(): holds vm->lock write (from madvise ioctl path) 303 * 304 * Return: nothing 305 */ 306void xe_bo_recompute_purgeable_state(struct xe_bo *bo) 307{ 308 enum xe_bo_vmas_purge_state vma_state; 309 310 if (!bo) 311 return; 312 313 xe_bo_assert_held(bo); 314 315 /* 316 * Once purged, always purged. Cannot transition back to WILLNEED. 317 * This matches i915 semantics where purged BOs are permanently invalid. 318 */ 319 if (bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED) 320 return; 321 322 vma_state = xe_bo_all_vmas_dontneed(bo); 323 324 if (vma_state != (enum xe_bo_vmas_purge_state)bo->madv_purgeable && 325 vma_state != XE_BO_VMAS_STATE_NO_VMAS) 326 xe_bo_set_purgeable_state(bo, (enum xe_madv_purgeable_state)vma_state); 327} 328 329/** 330 * madvise_purgeable - Handle purgeable buffer object advice 331 * @xe: XE device 332 * @vm: VM 333 * @vmas: Array of VMAs 334 * @num_vmas: Number of VMAs 335 * @op: Madvise operation 336 * @details: Madvise details for return values 337 * 338 * Handles DONTNEED/WILLNEED/PURGED states. Tracks if any BO was purged 339 * in details->has_purged_bo for later copy to userspace. 340 */ 341static void madvise_purgeable(struct xe_device *xe, struct xe_vm *vm, 342 struct xe_vma **vmas, int num_vmas, 343 struct drm_xe_madvise *op, 344 struct xe_madvise_details *details) 345{ 346 int i; 347 348 xe_assert(vm->xe, op->type == DRM_XE_VMA_ATTR_PURGEABLE_STATE); 349 350 for (i = 0; i < num_vmas; i++) { 351 struct xe_bo *bo = xe_vma_bo(vmas[i]); 352 353 if (!bo) { 354 /* Purgeable state applies to BOs only, skip non-BO VMAs */ 355 vmas[i]->skip_invalidation = true; 356 continue; 357 } 358 359 /* BO must be locked before modifying madv state */ 360 xe_bo_assert_held(bo); 361 362 /* Skip shared dma-bufs - no PTEs to zap */ 363 if (xe_bo_is_dmabuf_shared(bo)) { 364 vmas[i]->skip_invalidation = true; 365 continue; 366 } 367 368 /* 369 * Once purged, always purged. Cannot transition back to WILLNEED. 370 * This matches i915 semantics where purged BOs are permanently invalid. 371 */ 372 if (xe_bo_is_purged(bo)) { 373 details->has_purged_bo = true; 374 vmas[i]->skip_invalidation = true; 375 continue; 376 } 377 378 switch (op->purge_state_val.val) { 379 case DRM_XE_VMA_PURGEABLE_STATE_WILLNEED: 380 vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_WILLNEED; 381 vmas[i]->skip_invalidation = true; 382 383 xe_bo_recompute_purgeable_state(bo); 384 break; 385 case DRM_XE_VMA_PURGEABLE_STATE_DONTNEED: 386 vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_DONTNEED; 387 /* 388 * Don't zap PTEs at DONTNEED time -- pages are still 389 * alive. The zap happens in xe_bo_move_notify() right 390 * before the shrinker frees them. 391 */ 392 vmas[i]->skip_invalidation = true; 393 394 xe_bo_recompute_purgeable_state(bo); 395 break; 396 default: 397 /* Should never hit - values validated in madvise_args_are_sane() */ 398 xe_assert(vm->xe, 0); 399 return; 400 } 401 } 402} 403 404typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm, 405 struct xe_vma **vmas, int num_vmas, 406 struct drm_xe_madvise *op, 407 struct xe_madvise_details *details); 408 409static const madvise_func madvise_funcs[] = { 410 [DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc, 411 [DRM_XE_MEM_RANGE_ATTR_ATOMIC] = madvise_atomic, 412 [DRM_XE_MEM_RANGE_ATTR_PAT] = madvise_pat_index, 413 [DRM_XE_VMA_ATTR_PURGEABLE_STATE] = madvise_purgeable, 414}; 415 416static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end) 417{ 418 struct drm_gpuva *gpuva; 419 struct xe_tile *tile; 420 u8 id, tile_mask = 0; 421 422 lockdep_assert_held_write(&vm->lock); 423 424 /* Wait for pending binds */ 425 if (dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP, 426 false, MAX_SCHEDULE_TIMEOUT) <= 0) 427 XE_WARN_ON(1); 428 429 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { 430 struct xe_vma *vma = gpuva_to_vma(gpuva); 431 432 if (vma->skip_invalidation || xe_vma_is_null(vma)) 433 continue; 434 435 if (xe_vma_is_cpu_addr_mirror(vma)) { 436 tile_mask |= xe_svm_ranges_zap_ptes_in_range(vm, 437 xe_vma_start(vma), 438 xe_vma_end(vma)); 439 } else { 440 for_each_tile(tile, vm->xe, id) { 441 if (xe_pt_zap_ptes(tile, vma)) { 442 tile_mask |= BIT(id); 443 444 /* 445 * WRITE_ONCE pairs with READ_ONCE 446 * in xe_vm_has_valid_gpu_mapping() 447 */ 448 WRITE_ONCE(vma->tile_invalidated, 449 vma->tile_invalidated | BIT(id)); 450 } 451 } 452 } 453 } 454 455 return tile_mask; 456} 457 458static int xe_vm_invalidate_madvise_range(struct xe_vm *vm, u64 start, u64 end) 459{ 460 u8 tile_mask = xe_zap_ptes_in_madvise_range(vm, start, end); 461 struct xe_tlb_inval_batch batch; 462 int err; 463 464 if (!tile_mask) 465 return 0; 466 467 xe_device_wmb(vm->xe); 468 469 err = xe_tlb_inval_range_tilemask_submit(vm->xe, vm->usm.asid, start, end, 470 tile_mask, &batch); 471 if (!err) 472 xe_tlb_inval_batch_wait(&batch); 473 474 return err; 475} 476 477static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madvise *args) 478{ 479 if (XE_IOCTL_DBG(xe, !args)) 480 return false; 481 482 if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->start, SZ_4K))) 483 return false; 484 485 if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->range, SZ_4K))) 486 return false; 487 488 if (XE_IOCTL_DBG(xe, args->range < SZ_4K)) 489 return false; 490 491 switch (args->type) { 492 case DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC: 493 { 494 s32 fd = (s32)args->preferred_mem_loc.devmem_fd; 495 496 if (XE_IOCTL_DBG(xe, fd < DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM)) 497 return false; 498 499 if (XE_IOCTL_DBG(xe, fd <= DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE && 500 args->preferred_mem_loc.region_instance != 0)) 501 return false; 502 503 if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy > 504 DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES)) 505 return false; 506 507 if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.reserved)) 508 return false; 509 break; 510 } 511 case DRM_XE_MEM_RANGE_ATTR_ATOMIC: 512 if (XE_IOCTL_DBG(xe, args->atomic.val > DRM_XE_ATOMIC_CPU)) 513 return false; 514 515 if (XE_IOCTL_DBG(xe, args->atomic.pad)) 516 return false; 517 518 if (XE_IOCTL_DBG(xe, args->atomic.reserved)) 519 return false; 520 521 break; 522 case DRM_XE_MEM_RANGE_ATTR_PAT: 523 { 524 u16 pat_index, coh_mode; 525 526 if (XE_IOCTL_DBG(xe, args->pat_index.val >= xe->pat.n_entries)) 527 return false; 528 529 pat_index = array_index_nospec(args->pat_index.val, xe->pat.n_entries); 530 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 531 if (XE_IOCTL_DBG(xe, !coh_mode)) 532 return false; 533 534 if (XE_WARN_ON(coh_mode > XE_COH_2WAY)) 535 return false; 536 537 if (XE_IOCTL_DBG(xe, args->pat_index.pad)) 538 return false; 539 540 if (XE_IOCTL_DBG(xe, args->pat_index.reserved)) 541 return false; 542 break; 543 } 544 case DRM_XE_VMA_ATTR_PURGEABLE_STATE: 545 { 546 u32 val = args->purge_state_val.val; 547 548 if (XE_IOCTL_DBG(xe, !(val == DRM_XE_VMA_PURGEABLE_STATE_WILLNEED || 549 val == DRM_XE_VMA_PURGEABLE_STATE_DONTNEED))) 550 return false; 551 552 if (XE_IOCTL_DBG(xe, args->purge_state_val.pad)) 553 return false; 554 555 break; 556 } 557 default: 558 if (XE_IOCTL_DBG(xe, 1)) 559 return false; 560 } 561 562 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 563 return false; 564 565 return true; 566} 567 568static int xe_madvise_details_init(struct xe_vm *vm, const struct drm_xe_madvise *args, 569 struct xe_madvise_details *details) 570{ 571 struct xe_device *xe = vm->xe; 572 573 memset(details, 0, sizeof(*details)); 574 575 /* Store retained pointer for purgeable state */ 576 if (args->type == DRM_XE_VMA_ATTR_PURGEABLE_STATE) { 577 details->retained_ptr = args->purge_state_val.retained_ptr; 578 return 0; 579 } 580 581 if (args->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC) { 582 int fd = args->preferred_mem_loc.devmem_fd; 583 struct drm_pagemap *dpagemap; 584 585 if (fd <= 0) 586 return 0; 587 588 dpagemap = xe_drm_pagemap_from_fd(args->preferred_mem_loc.devmem_fd, 589 args->preferred_mem_loc.region_instance); 590 if (XE_IOCTL_DBG(xe, IS_ERR(dpagemap))) 591 return PTR_ERR(dpagemap); 592 593 /* Don't allow a foreign placement without a fast interconnect! */ 594 if (XE_IOCTL_DBG(xe, dpagemap->pagemap->owner != vm->svm.peer.owner)) { 595 drm_pagemap_put(dpagemap); 596 return -ENOLINK; 597 } 598 details->dpagemap = dpagemap; 599 } 600 601 return 0; 602} 603 604static void xe_madvise_details_fini(struct xe_madvise_details *details) 605{ 606 drm_pagemap_put(details->dpagemap); 607} 608 609static int xe_madvise_purgeable_retained_to_user(const struct xe_madvise_details *details) 610{ 611 u32 retained; 612 613 if (!details->retained_ptr) 614 return 0; 615 616 retained = !details->has_purged_bo; 617 618 if (put_user(retained, (u32 __user *)u64_to_user_ptr(details->retained_ptr))) 619 return -EFAULT; 620 621 return 0; 622} 623 624static bool check_pat_args_are_sane(struct xe_device *xe, 625 struct xe_vmas_in_madvise_range *madvise_range, 626 u16 pat_index) 627{ 628 u16 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 629 int i; 630 631 /* 632 * Using coh_none with CPU cached buffers is not allowed on iGPU. 633 * On iGPU the GPU shares the LLC with the CPU, so with coh_none 634 * the GPU bypasses CPU caches and reads directly from DRAM, 635 * potentially seeing stale sensitive data from previously freed 636 * pages. On dGPU this restriction does not apply, because the 637 * platform does not provide a non-coherent system memory access 638 * path that would violate the DMA coherency contract. 639 */ 640 if (coh_mode != XE_COH_NONE || IS_DGFX(xe)) 641 return true; 642 643 for (i = 0; i < madvise_range->num_vmas; i++) { 644 struct xe_vma *vma = madvise_range->vmas[i]; 645 struct xe_bo *bo = xe_vma_bo(vma); 646 647 if (bo) { 648 /* BO with WB caching + COH_NONE is not allowed */ 649 if (XE_IOCTL_DBG(xe, bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) 650 return false; 651 /* Imported dma-buf without caching info, assume cached */ 652 if (XE_IOCTL_DBG(xe, !bo->cpu_caching)) 653 return false; 654 } else if (XE_IOCTL_DBG(xe, xe_vma_is_cpu_addr_mirror(vma) || 655 xe_vma_is_userptr(vma))) 656 /* System memory (userptr/SVM) is always CPU cached */ 657 return false; 658 } 659 660 return true; 661} 662 663static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas, 664 int num_vmas, u32 atomic_val) 665{ 666 struct xe_device *xe = vm->xe; 667 struct xe_bo *bo; 668 int i; 669 670 for (i = 0; i < num_vmas; i++) { 671 bo = xe_vma_bo(vmas[i]); 672 if (!bo) 673 continue; 674 /* 675 * NOTE: The following atomic checks are platform-specific. For example, 676 * if a device supports CXL atomics, these may not be necessary or 677 * may behave differently. 678 */ 679 if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_CPU && 680 !(bo->flags & XE_BO_FLAG_SYSTEM))) 681 return false; 682 683 if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_DEVICE && 684 !(bo->flags & XE_BO_FLAG_VRAM0) && 685 !(bo->flags & XE_BO_FLAG_VRAM1) && 686 !(bo->flags & XE_BO_FLAG_SYSTEM && 687 xe->info.has_device_atomics_on_smem))) 688 return false; 689 690 if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_GLOBAL && 691 (!(bo->flags & XE_BO_FLAG_SYSTEM) || 692 (!(bo->flags & XE_BO_FLAG_VRAM0) && 693 !(bo->flags & XE_BO_FLAG_VRAM1))))) 694 return false; 695 } 696 return true; 697} 698/** 699 * xe_vm_madvise_ioctl - Handle MADVise ioctl for a VM 700 * @dev: DRM device pointer 701 * @data: Pointer to ioctl data (drm_xe_madvise*) 702 * @file: DRM file pointer 703 * 704 * Handles the MADVISE ioctl to provide memory advice for vma's within 705 * input range. 706 * 707 * Return: 0 on success or a negative error code on failure. 708 */ 709int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 710{ 711 struct xe_device *xe = to_xe_device(dev); 712 struct xe_file *xef = to_xe_file(file); 713 struct drm_xe_madvise *args = data; 714 struct xe_vmas_in_madvise_range madvise_range = { 715 /* 716 * Userspace may pass canonical (sign-extended) addresses. 717 * Strip the sign extension to get the internal non-canonical 718 * form used by the GPUVM, matching xe_vm_bind_ioctl() behavior. 719 */ 720 .addr = xe_device_uncanonicalize_addr(xe, args->start), 721 .range = args->range, 722 }; 723 struct xe_madvise_details details; 724 u16 pat_index, coh_mode; 725 struct xe_vm *vm; 726 struct drm_exec exec; 727 int err, attr_type; 728 bool do_retained; 729 730 vm = xe_vm_lookup(xef, args->vm_id); 731 if (XE_IOCTL_DBG(xe, !vm)) 732 return -EINVAL; 733 734 if (!madvise_args_are_sane(vm->xe, args)) { 735 err = -EINVAL; 736 goto put_vm; 737 } 738 739 /* Cache whether we need to write retained, and validate it's initialized to 0 */ 740 do_retained = args->type == DRM_XE_VMA_ATTR_PURGEABLE_STATE && 741 args->purge_state_val.retained_ptr; 742 if (do_retained) { 743 u32 retained; 744 u32 __user *retained_ptr; 745 746 retained_ptr = u64_to_user_ptr(args->purge_state_val.retained_ptr); 747 if (get_user(retained, retained_ptr)) { 748 err = -EFAULT; 749 goto put_vm; 750 } 751 752 if (XE_IOCTL_DBG(xe, retained != 0)) { 753 err = -EINVAL; 754 goto put_vm; 755 } 756 } 757 758 xe_svm_flush(vm); 759 760 err = down_write_killable(&vm->lock); 761 if (err) 762 goto put_vm; 763 764 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 765 err = -ENOENT; 766 goto unlock_vm; 767 } 768 769 err = xe_madvise_details_init(vm, args, &details); 770 if (err) 771 goto unlock_vm; 772 773 err = xe_vm_alloc_madvise_vma(vm, madvise_range.addr, args->range); 774 if (err) 775 goto madv_fini; 776 777 err = get_vmas(vm, &madvise_range); 778 if (err || !madvise_range.num_vmas) 779 goto madv_fini; 780 781 if (args->type == DRM_XE_MEM_RANGE_ATTR_PAT) { 782 pat_index = array_index_nospec(args->pat_index.val, xe->pat.n_entries); 783 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); 784 if (XE_IOCTL_DBG(xe, madvise_range.has_svm_userptr_vmas && 785 xe_device_is_l2_flush_optimized(xe) && 786 (pat_index != 19 && coh_mode != XE_COH_2WAY))) { 787 err = -EINVAL; 788 goto madv_fini; 789 } 790 } 791 792 if (args->type == DRM_XE_MEM_RANGE_ATTR_PAT) { 793 if (!check_pat_args_are_sane(xe, &madvise_range, 794 args->pat_index.val)) { 795 err = -EINVAL; 796 goto free_vmas; 797 } 798 } 799 800 if (madvise_range.has_bo_vmas) { 801 if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) { 802 if (!check_bo_args_are_sane(vm, madvise_range.vmas, 803 madvise_range.num_vmas, 804 args->atomic.val)) { 805 err = -EINVAL; 806 goto free_vmas; 807 } 808 } 809 810 drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 811 drm_exec_until_all_locked(&exec) { 812 for (int i = 0; i < madvise_range.num_vmas; i++) { 813 struct xe_bo *bo = xe_vma_bo(madvise_range.vmas[i]); 814 815 if (!bo) 816 continue; 817 818 if (args->type == DRM_XE_MEM_RANGE_ATTR_PAT) { 819 if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && 820 xe_device_is_l2_flush_optimized(xe) && 821 (pat_index != 19 && 822 coh_mode != XE_COH_2WAY))) { 823 err = -EINVAL; 824 goto err_fini; 825 } 826 } 827 828 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 829 drm_exec_retry_on_contention(&exec); 830 if (err) 831 goto err_fini; 832 } 833 } 834 } 835 836 if (madvise_range.has_svm_userptr_vmas) { 837 err = xe_svm_notifier_lock_interruptible(vm); 838 if (err) 839 goto err_fini; 840 } 841 842 attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs)); 843 844 /* Ensure the madvise function exists for this type */ 845 if (!madvise_funcs[attr_type]) { 846 err = -EINVAL; 847 goto err_fini; 848 } 849 850 madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args, 851 &details); 852 853 err = xe_vm_invalidate_madvise_range(vm, madvise_range.addr, 854 madvise_range.addr + args->range); 855 856 if (madvise_range.has_svm_userptr_vmas) 857 xe_svm_notifier_unlock(vm); 858 859err_fini: 860 if (madvise_range.has_bo_vmas) 861 drm_exec_fini(&exec); 862free_vmas: 863 kfree(madvise_range.vmas); 864 madvise_range.vmas = NULL; 865madv_fini: 866 xe_madvise_details_fini(&details); 867unlock_vm: 868 up_write(&vm->lock); 869 870 /* Write retained value to user after releasing all locks */ 871 if (!err && do_retained) 872 err = xe_madvise_purgeable_retained_to_user(&details); 873put_vm: 874 xe_vm_put(vm); 875 return err; 876}