Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2025 Intel Corporation
4 */
5
6#include "xe_vm_madvise.h"
7
8#include <linux/nospec.h>
9#include <drm/xe_drm.h>
10
11#include "xe_bo.h"
12#include "xe_pat.h"
13#include "xe_pt.h"
14#include "xe_svm.h"
15#include "xe_tlb_inval.h"
16#include "xe_vm.h"
17
18struct xe_vmas_in_madvise_range {
19 u64 addr;
20 u64 range;
21 struct xe_vma **vmas;
22 int num_vmas;
23 bool has_bo_vmas;
24 bool has_svm_userptr_vmas;
25};
26
27/**
28 * struct xe_madvise_details - Argument to madvise_funcs
29 * @dpagemap: Reference-counted pointer to a struct drm_pagemap.
30 * @has_purged_bo: Track if any BO was purged (for purgeable state)
31 * @retained_ptr: User pointer for retained value (for purgeable state)
32 *
33 * The madvise IOCTL handler may, in addition to the user-space
34 * args, have additional info to pass into the madvise_func that
35 * handles the madvise type. Use a struct_xe_madvise_details
36 * for that and extend the struct as necessary.
37 */
38struct xe_madvise_details {
39 struct drm_pagemap *dpagemap;
40 bool has_purged_bo;
41 u64 retained_ptr;
42};
43
44static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range)
45{
46 u64 addr = madvise_range->addr;
47 u64 range = madvise_range->range;
48
49 struct xe_vma **__vmas;
50 struct drm_gpuva *gpuva;
51 int max_vmas = 8;
52
53 lockdep_assert_held(&vm->lock);
54
55 madvise_range->num_vmas = 0;
56 madvise_range->vmas = kmalloc_objs(*madvise_range->vmas, max_vmas);
57 if (!madvise_range->vmas)
58 return -ENOMEM;
59
60 vm_dbg(&vm->xe->drm, "VMA's in range: start=0x%016llx, end=0x%016llx", addr, addr + range);
61
62 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, addr, addr + range) {
63 struct xe_vma *vma = gpuva_to_vma(gpuva);
64
65 if (xe_vma_bo(vma))
66 madvise_range->has_bo_vmas = true;
67 else if (xe_vma_is_cpu_addr_mirror(vma) || xe_vma_is_userptr(vma))
68 madvise_range->has_svm_userptr_vmas = true;
69
70 if (madvise_range->num_vmas == max_vmas) {
71 max_vmas <<= 1;
72 __vmas = krealloc(madvise_range->vmas,
73 max_vmas * sizeof(*madvise_range->vmas),
74 GFP_KERNEL);
75 if (!__vmas) {
76 kfree(madvise_range->vmas);
77 return -ENOMEM;
78 }
79 madvise_range->vmas = __vmas;
80 }
81
82 madvise_range->vmas[madvise_range->num_vmas] = vma;
83 (madvise_range->num_vmas)++;
84 }
85
86 if (!madvise_range->num_vmas)
87 kfree(madvise_range->vmas);
88
89 vm_dbg(&vm->xe->drm, "madvise_range-num_vmas = %d\n", madvise_range->num_vmas);
90
91 return 0;
92}
93
94static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm,
95 struct xe_vma **vmas, int num_vmas,
96 struct drm_xe_madvise *op,
97 struct xe_madvise_details *details)
98{
99 int i;
100
101 xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC);
102
103 for (i = 0; i < num_vmas; i++) {
104 struct xe_vma *vma = vmas[i];
105 struct xe_vma_preferred_loc *loc = &vma->attr.preferred_loc;
106
107 /*TODO: Extend attributes to bo based vmas */
108 if ((loc->devmem_fd == op->preferred_mem_loc.devmem_fd &&
109 loc->migration_policy == op->preferred_mem_loc.migration_policy) ||
110 !xe_vma_is_cpu_addr_mirror(vma)) {
111 vma->skip_invalidation = true;
112 } else {
113 vma->skip_invalidation = false;
114 loc->devmem_fd = op->preferred_mem_loc.devmem_fd;
115 /* Till multi-device support is not added migration_policy
116 * is of no use and can be ignored.
117 */
118 loc->migration_policy = op->preferred_mem_loc.migration_policy;
119 drm_pagemap_put(loc->dpagemap);
120 loc->dpagemap = NULL;
121 if (details->dpagemap)
122 loc->dpagemap = drm_pagemap_get(details->dpagemap);
123 }
124 }
125}
126
127static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm,
128 struct xe_vma **vmas, int num_vmas,
129 struct drm_xe_madvise *op,
130 struct xe_madvise_details *details)
131{
132 struct xe_bo *bo;
133 int i;
134
135 xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC);
136 xe_assert(vm->xe, op->atomic.val <= DRM_XE_ATOMIC_CPU);
137
138 for (i = 0; i < num_vmas; i++) {
139 if (xe_vma_is_userptr(vmas[i]) &&
140 !(op->atomic.val == DRM_XE_ATOMIC_DEVICE &&
141 xe->info.has_device_atomics_on_smem)) {
142 vmas[i]->skip_invalidation = true;
143 continue;
144 }
145
146 if (vmas[i]->attr.atomic_access == op->atomic.val) {
147 vmas[i]->skip_invalidation = true;
148 } else {
149 vmas[i]->skip_invalidation = false;
150 vmas[i]->attr.atomic_access = op->atomic.val;
151 }
152
153 bo = xe_vma_bo(vmas[i]);
154 if (!bo || bo->attr.atomic_access == op->atomic.val)
155 continue;
156
157 vmas[i]->skip_invalidation = false;
158 xe_bo_assert_held(bo);
159 bo->attr.atomic_access = op->atomic.val;
160
161 /* Invalidate cpu page table, so bo can migrate to smem in next access */
162 if (xe_bo_is_vram(bo) &&
163 (bo->attr.atomic_access == DRM_XE_ATOMIC_CPU ||
164 bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL))
165 ttm_bo_unmap_virtual(&bo->ttm);
166 }
167}
168
169static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm,
170 struct xe_vma **vmas, int num_vmas,
171 struct drm_xe_madvise *op,
172 struct xe_madvise_details *details)
173{
174 int i;
175
176 xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PAT);
177
178 for (i = 0; i < num_vmas; i++) {
179 if (vmas[i]->attr.pat_index == op->pat_index.val) {
180 vmas[i]->skip_invalidation = true;
181 } else {
182 vmas[i]->skip_invalidation = false;
183 vmas[i]->attr.pat_index = op->pat_index.val;
184 }
185 }
186}
187
188/**
189 * xe_bo_is_dmabuf_shared() - Check if BO is shared via dma-buf
190 * @bo: Buffer object
191 *
192 * Prevent marking imported or exported dma-bufs as purgeable.
193 * For imported BOs, Xe doesn't own the backing store and cannot
194 * safely reclaim pages (exporter or other devices may still be
195 * using them). For exported BOs, external devices may have active
196 * mappings we cannot track.
197 *
198 * Return: true if BO is imported or exported, false otherwise
199 */
200static bool xe_bo_is_dmabuf_shared(struct xe_bo *bo)
201{
202 struct drm_gem_object *obj = &bo->ttm.base;
203
204 /* Imported: exporter owns backing store */
205 if (drm_gem_is_imported(obj))
206 return true;
207
208 /* Exported: external devices may be accessing */
209 if (obj->dma_buf)
210 return true;
211
212 return false;
213}
214
215/**
216 * enum xe_bo_vmas_purge_state - VMA purgeable state aggregation
217 *
218 * Distinguishes whether a BO's VMAs are all DONTNEED, have at least
219 * one WILLNEED, or have no VMAs at all.
220 *
221 * Enum values align with XE_MADV_PURGEABLE_* states for consistency.
222 */
223enum xe_bo_vmas_purge_state {
224 /** @XE_BO_VMAS_STATE_WILLNEED: At least one VMA is WILLNEED */
225 XE_BO_VMAS_STATE_WILLNEED = 0,
226 /** @XE_BO_VMAS_STATE_DONTNEED: All VMAs are DONTNEED */
227 XE_BO_VMAS_STATE_DONTNEED = 1,
228 /** @XE_BO_VMAS_STATE_NO_VMAS: BO has no VMAs */
229 XE_BO_VMAS_STATE_NO_VMAS = 2,
230};
231
232/*
233 * xe_bo_recompute_purgeable_state() casts between xe_bo_vmas_purge_state and
234 * xe_madv_purgeable_state. Enforce that WILLNEED=0 and DONTNEED=1 match across
235 * both enums so the single-line cast is always valid.
236 */
237static_assert(XE_BO_VMAS_STATE_WILLNEED == (int)XE_MADV_PURGEABLE_WILLNEED,
238 "VMA purge state WILLNEED must equal madv purgeable WILLNEED");
239static_assert(XE_BO_VMAS_STATE_DONTNEED == (int)XE_MADV_PURGEABLE_DONTNEED,
240 "VMA purge state DONTNEED must equal madv purgeable DONTNEED");
241
242/**
243 * xe_bo_all_vmas_dontneed() - Determine BO VMA purgeable state
244 * @bo: Buffer object
245 *
246 * Check all VMAs across all VMs to determine aggregate purgeable state.
247 * Shared BOs require unanimous DONTNEED state from all mappings.
248 *
249 * Caller must hold BO dma-resv lock.
250 *
251 * Return: XE_BO_VMAS_STATE_DONTNEED if all VMAs are DONTNEED,
252 * XE_BO_VMAS_STATE_WILLNEED if at least one VMA is not DONTNEED,
253 * XE_BO_VMAS_STATE_NO_VMAS if BO has no VMAs
254 */
255static enum xe_bo_vmas_purge_state xe_bo_all_vmas_dontneed(struct xe_bo *bo)
256{
257 struct drm_gpuvm_bo *vm_bo;
258 struct drm_gpuva *gpuva;
259 struct drm_gem_object *obj = &bo->ttm.base;
260 bool has_vmas = false;
261
262 xe_bo_assert_held(bo);
263
264 /* Shared dma-bufs cannot be purgeable */
265 if (xe_bo_is_dmabuf_shared(bo))
266 return XE_BO_VMAS_STATE_WILLNEED;
267
268 drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
269 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
270 struct xe_vma *vma = gpuva_to_vma(gpuva);
271
272 has_vmas = true;
273
274 /* Any non-DONTNEED VMA prevents purging */
275 if (vma->attr.purgeable_state != XE_MADV_PURGEABLE_DONTNEED)
276 return XE_BO_VMAS_STATE_WILLNEED;
277 }
278 }
279
280 /*
281 * No VMAs => preserve existing BO purgeable state.
282 * Avoids incorrectly flipping DONTNEED -> WILLNEED when last VMA unmapped.
283 */
284 if (!has_vmas)
285 return XE_BO_VMAS_STATE_NO_VMAS;
286
287 return XE_BO_VMAS_STATE_DONTNEED;
288}
289
290/**
291 * xe_bo_recompute_purgeable_state() - Recompute BO purgeable state from VMAs
292 * @bo: Buffer object
293 *
294 * Walk all VMAs to determine if BO should be purgeable or not.
295 * Shared BOs require unanimous DONTNEED state from all mappings.
296 * If the BO has no VMAs the existing state is preserved.
297 *
298 * Locking: Caller must hold BO dma-resv lock. When iterating GPUVM lists,
299 * VM lock must also be held (write) to prevent concurrent VMA modifications.
300 * This is satisfied at both call sites:
301 * - xe_vma_destroy(): holds vm->lock write
302 * - madvise_purgeable(): holds vm->lock write (from madvise ioctl path)
303 *
304 * Return: nothing
305 */
306void xe_bo_recompute_purgeable_state(struct xe_bo *bo)
307{
308 enum xe_bo_vmas_purge_state vma_state;
309
310 if (!bo)
311 return;
312
313 xe_bo_assert_held(bo);
314
315 /*
316 * Once purged, always purged. Cannot transition back to WILLNEED.
317 * This matches i915 semantics where purged BOs are permanently invalid.
318 */
319 if (bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED)
320 return;
321
322 vma_state = xe_bo_all_vmas_dontneed(bo);
323
324 if (vma_state != (enum xe_bo_vmas_purge_state)bo->madv_purgeable &&
325 vma_state != XE_BO_VMAS_STATE_NO_VMAS)
326 xe_bo_set_purgeable_state(bo, (enum xe_madv_purgeable_state)vma_state);
327}
328
329/**
330 * madvise_purgeable - Handle purgeable buffer object advice
331 * @xe: XE device
332 * @vm: VM
333 * @vmas: Array of VMAs
334 * @num_vmas: Number of VMAs
335 * @op: Madvise operation
336 * @details: Madvise details for return values
337 *
338 * Handles DONTNEED/WILLNEED/PURGED states. Tracks if any BO was purged
339 * in details->has_purged_bo for later copy to userspace.
340 */
341static void madvise_purgeable(struct xe_device *xe, struct xe_vm *vm,
342 struct xe_vma **vmas, int num_vmas,
343 struct drm_xe_madvise *op,
344 struct xe_madvise_details *details)
345{
346 int i;
347
348 xe_assert(vm->xe, op->type == DRM_XE_VMA_ATTR_PURGEABLE_STATE);
349
350 for (i = 0; i < num_vmas; i++) {
351 struct xe_bo *bo = xe_vma_bo(vmas[i]);
352
353 if (!bo) {
354 /* Purgeable state applies to BOs only, skip non-BO VMAs */
355 vmas[i]->skip_invalidation = true;
356 continue;
357 }
358
359 /* BO must be locked before modifying madv state */
360 xe_bo_assert_held(bo);
361
362 /* Skip shared dma-bufs - no PTEs to zap */
363 if (xe_bo_is_dmabuf_shared(bo)) {
364 vmas[i]->skip_invalidation = true;
365 continue;
366 }
367
368 /*
369 * Once purged, always purged. Cannot transition back to WILLNEED.
370 * This matches i915 semantics where purged BOs are permanently invalid.
371 */
372 if (xe_bo_is_purged(bo)) {
373 details->has_purged_bo = true;
374 vmas[i]->skip_invalidation = true;
375 continue;
376 }
377
378 switch (op->purge_state_val.val) {
379 case DRM_XE_VMA_PURGEABLE_STATE_WILLNEED:
380 vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_WILLNEED;
381 vmas[i]->skip_invalidation = true;
382
383 xe_bo_recompute_purgeable_state(bo);
384 break;
385 case DRM_XE_VMA_PURGEABLE_STATE_DONTNEED:
386 vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_DONTNEED;
387 /*
388 * Don't zap PTEs at DONTNEED time -- pages are still
389 * alive. The zap happens in xe_bo_move_notify() right
390 * before the shrinker frees them.
391 */
392 vmas[i]->skip_invalidation = true;
393
394 xe_bo_recompute_purgeable_state(bo);
395 break;
396 default:
397 /* Should never hit - values validated in madvise_args_are_sane() */
398 xe_assert(vm->xe, 0);
399 return;
400 }
401 }
402}
403
404typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm,
405 struct xe_vma **vmas, int num_vmas,
406 struct drm_xe_madvise *op,
407 struct xe_madvise_details *details);
408
409static const madvise_func madvise_funcs[] = {
410 [DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc,
411 [DRM_XE_MEM_RANGE_ATTR_ATOMIC] = madvise_atomic,
412 [DRM_XE_MEM_RANGE_ATTR_PAT] = madvise_pat_index,
413 [DRM_XE_VMA_ATTR_PURGEABLE_STATE] = madvise_purgeable,
414};
415
416static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end)
417{
418 struct drm_gpuva *gpuva;
419 struct xe_tile *tile;
420 u8 id, tile_mask = 0;
421
422 lockdep_assert_held_write(&vm->lock);
423
424 /* Wait for pending binds */
425 if (dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP,
426 false, MAX_SCHEDULE_TIMEOUT) <= 0)
427 XE_WARN_ON(1);
428
429 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) {
430 struct xe_vma *vma = gpuva_to_vma(gpuva);
431
432 if (vma->skip_invalidation || xe_vma_is_null(vma))
433 continue;
434
435 if (xe_vma_is_cpu_addr_mirror(vma)) {
436 tile_mask |= xe_svm_ranges_zap_ptes_in_range(vm,
437 xe_vma_start(vma),
438 xe_vma_end(vma));
439 } else {
440 for_each_tile(tile, vm->xe, id) {
441 if (xe_pt_zap_ptes(tile, vma)) {
442 tile_mask |= BIT(id);
443
444 /*
445 * WRITE_ONCE pairs with READ_ONCE
446 * in xe_vm_has_valid_gpu_mapping()
447 */
448 WRITE_ONCE(vma->tile_invalidated,
449 vma->tile_invalidated | BIT(id));
450 }
451 }
452 }
453 }
454
455 return tile_mask;
456}
457
458static int xe_vm_invalidate_madvise_range(struct xe_vm *vm, u64 start, u64 end)
459{
460 u8 tile_mask = xe_zap_ptes_in_madvise_range(vm, start, end);
461 struct xe_tlb_inval_batch batch;
462 int err;
463
464 if (!tile_mask)
465 return 0;
466
467 xe_device_wmb(vm->xe);
468
469 err = xe_tlb_inval_range_tilemask_submit(vm->xe, vm->usm.asid, start, end,
470 tile_mask, &batch);
471 if (!err)
472 xe_tlb_inval_batch_wait(&batch);
473
474 return err;
475}
476
477static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madvise *args)
478{
479 if (XE_IOCTL_DBG(xe, !args))
480 return false;
481
482 if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->start, SZ_4K)))
483 return false;
484
485 if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->range, SZ_4K)))
486 return false;
487
488 if (XE_IOCTL_DBG(xe, args->range < SZ_4K))
489 return false;
490
491 switch (args->type) {
492 case DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC:
493 {
494 s32 fd = (s32)args->preferred_mem_loc.devmem_fd;
495
496 if (XE_IOCTL_DBG(xe, fd < DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM))
497 return false;
498
499 if (XE_IOCTL_DBG(xe, fd <= DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE &&
500 args->preferred_mem_loc.region_instance != 0))
501 return false;
502
503 if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy >
504 DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES))
505 return false;
506
507 if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.reserved))
508 return false;
509 break;
510 }
511 case DRM_XE_MEM_RANGE_ATTR_ATOMIC:
512 if (XE_IOCTL_DBG(xe, args->atomic.val > DRM_XE_ATOMIC_CPU))
513 return false;
514
515 if (XE_IOCTL_DBG(xe, args->atomic.pad))
516 return false;
517
518 if (XE_IOCTL_DBG(xe, args->atomic.reserved))
519 return false;
520
521 break;
522 case DRM_XE_MEM_RANGE_ATTR_PAT:
523 {
524 u16 pat_index, coh_mode;
525
526 if (XE_IOCTL_DBG(xe, args->pat_index.val >= xe->pat.n_entries))
527 return false;
528
529 pat_index = array_index_nospec(args->pat_index.val, xe->pat.n_entries);
530 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
531 if (XE_IOCTL_DBG(xe, !coh_mode))
532 return false;
533
534 if (XE_WARN_ON(coh_mode > XE_COH_2WAY))
535 return false;
536
537 if (XE_IOCTL_DBG(xe, args->pat_index.pad))
538 return false;
539
540 if (XE_IOCTL_DBG(xe, args->pat_index.reserved))
541 return false;
542 break;
543 }
544 case DRM_XE_VMA_ATTR_PURGEABLE_STATE:
545 {
546 u32 val = args->purge_state_val.val;
547
548 if (XE_IOCTL_DBG(xe, !(val == DRM_XE_VMA_PURGEABLE_STATE_WILLNEED ||
549 val == DRM_XE_VMA_PURGEABLE_STATE_DONTNEED)))
550 return false;
551
552 if (XE_IOCTL_DBG(xe, args->purge_state_val.pad))
553 return false;
554
555 break;
556 }
557 default:
558 if (XE_IOCTL_DBG(xe, 1))
559 return false;
560 }
561
562 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
563 return false;
564
565 return true;
566}
567
568static int xe_madvise_details_init(struct xe_vm *vm, const struct drm_xe_madvise *args,
569 struct xe_madvise_details *details)
570{
571 struct xe_device *xe = vm->xe;
572
573 memset(details, 0, sizeof(*details));
574
575 /* Store retained pointer for purgeable state */
576 if (args->type == DRM_XE_VMA_ATTR_PURGEABLE_STATE) {
577 details->retained_ptr = args->purge_state_val.retained_ptr;
578 return 0;
579 }
580
581 if (args->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC) {
582 int fd = args->preferred_mem_loc.devmem_fd;
583 struct drm_pagemap *dpagemap;
584
585 if (fd <= 0)
586 return 0;
587
588 dpagemap = xe_drm_pagemap_from_fd(args->preferred_mem_loc.devmem_fd,
589 args->preferred_mem_loc.region_instance);
590 if (XE_IOCTL_DBG(xe, IS_ERR(dpagemap)))
591 return PTR_ERR(dpagemap);
592
593 /* Don't allow a foreign placement without a fast interconnect! */
594 if (XE_IOCTL_DBG(xe, dpagemap->pagemap->owner != vm->svm.peer.owner)) {
595 drm_pagemap_put(dpagemap);
596 return -ENOLINK;
597 }
598 details->dpagemap = dpagemap;
599 }
600
601 return 0;
602}
603
604static void xe_madvise_details_fini(struct xe_madvise_details *details)
605{
606 drm_pagemap_put(details->dpagemap);
607}
608
609static int xe_madvise_purgeable_retained_to_user(const struct xe_madvise_details *details)
610{
611 u32 retained;
612
613 if (!details->retained_ptr)
614 return 0;
615
616 retained = !details->has_purged_bo;
617
618 if (put_user(retained, (u32 __user *)u64_to_user_ptr(details->retained_ptr)))
619 return -EFAULT;
620
621 return 0;
622}
623
624static bool check_pat_args_are_sane(struct xe_device *xe,
625 struct xe_vmas_in_madvise_range *madvise_range,
626 u16 pat_index)
627{
628 u16 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
629 int i;
630
631 /*
632 * Using coh_none with CPU cached buffers is not allowed on iGPU.
633 * On iGPU the GPU shares the LLC with the CPU, so with coh_none
634 * the GPU bypasses CPU caches and reads directly from DRAM,
635 * potentially seeing stale sensitive data from previously freed
636 * pages. On dGPU this restriction does not apply, because the
637 * platform does not provide a non-coherent system memory access
638 * path that would violate the DMA coherency contract.
639 */
640 if (coh_mode != XE_COH_NONE || IS_DGFX(xe))
641 return true;
642
643 for (i = 0; i < madvise_range->num_vmas; i++) {
644 struct xe_vma *vma = madvise_range->vmas[i];
645 struct xe_bo *bo = xe_vma_bo(vma);
646
647 if (bo) {
648 /* BO with WB caching + COH_NONE is not allowed */
649 if (XE_IOCTL_DBG(xe, bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
650 return false;
651 /* Imported dma-buf without caching info, assume cached */
652 if (XE_IOCTL_DBG(xe, !bo->cpu_caching))
653 return false;
654 } else if (XE_IOCTL_DBG(xe, xe_vma_is_cpu_addr_mirror(vma) ||
655 xe_vma_is_userptr(vma)))
656 /* System memory (userptr/SVM) is always CPU cached */
657 return false;
658 }
659
660 return true;
661}
662
663static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas,
664 int num_vmas, u32 atomic_val)
665{
666 struct xe_device *xe = vm->xe;
667 struct xe_bo *bo;
668 int i;
669
670 for (i = 0; i < num_vmas; i++) {
671 bo = xe_vma_bo(vmas[i]);
672 if (!bo)
673 continue;
674 /*
675 * NOTE: The following atomic checks are platform-specific. For example,
676 * if a device supports CXL atomics, these may not be necessary or
677 * may behave differently.
678 */
679 if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_CPU &&
680 !(bo->flags & XE_BO_FLAG_SYSTEM)))
681 return false;
682
683 if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_DEVICE &&
684 !(bo->flags & XE_BO_FLAG_VRAM0) &&
685 !(bo->flags & XE_BO_FLAG_VRAM1) &&
686 !(bo->flags & XE_BO_FLAG_SYSTEM &&
687 xe->info.has_device_atomics_on_smem)))
688 return false;
689
690 if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_GLOBAL &&
691 (!(bo->flags & XE_BO_FLAG_SYSTEM) ||
692 (!(bo->flags & XE_BO_FLAG_VRAM0) &&
693 !(bo->flags & XE_BO_FLAG_VRAM1)))))
694 return false;
695 }
696 return true;
697}
698/**
699 * xe_vm_madvise_ioctl - Handle MADVise ioctl for a VM
700 * @dev: DRM device pointer
701 * @data: Pointer to ioctl data (drm_xe_madvise*)
702 * @file: DRM file pointer
703 *
704 * Handles the MADVISE ioctl to provide memory advice for vma's within
705 * input range.
706 *
707 * Return: 0 on success or a negative error code on failure.
708 */
709int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
710{
711 struct xe_device *xe = to_xe_device(dev);
712 struct xe_file *xef = to_xe_file(file);
713 struct drm_xe_madvise *args = data;
714 struct xe_vmas_in_madvise_range madvise_range = {
715 /*
716 * Userspace may pass canonical (sign-extended) addresses.
717 * Strip the sign extension to get the internal non-canonical
718 * form used by the GPUVM, matching xe_vm_bind_ioctl() behavior.
719 */
720 .addr = xe_device_uncanonicalize_addr(xe, args->start),
721 .range = args->range,
722 };
723 struct xe_madvise_details details;
724 u16 pat_index, coh_mode;
725 struct xe_vm *vm;
726 struct drm_exec exec;
727 int err, attr_type;
728 bool do_retained;
729
730 vm = xe_vm_lookup(xef, args->vm_id);
731 if (XE_IOCTL_DBG(xe, !vm))
732 return -EINVAL;
733
734 if (!madvise_args_are_sane(vm->xe, args)) {
735 err = -EINVAL;
736 goto put_vm;
737 }
738
739 /* Cache whether we need to write retained, and validate it's initialized to 0 */
740 do_retained = args->type == DRM_XE_VMA_ATTR_PURGEABLE_STATE &&
741 args->purge_state_val.retained_ptr;
742 if (do_retained) {
743 u32 retained;
744 u32 __user *retained_ptr;
745
746 retained_ptr = u64_to_user_ptr(args->purge_state_val.retained_ptr);
747 if (get_user(retained, retained_ptr)) {
748 err = -EFAULT;
749 goto put_vm;
750 }
751
752 if (XE_IOCTL_DBG(xe, retained != 0)) {
753 err = -EINVAL;
754 goto put_vm;
755 }
756 }
757
758 xe_svm_flush(vm);
759
760 err = down_write_killable(&vm->lock);
761 if (err)
762 goto put_vm;
763
764 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
765 err = -ENOENT;
766 goto unlock_vm;
767 }
768
769 err = xe_madvise_details_init(vm, args, &details);
770 if (err)
771 goto unlock_vm;
772
773 err = xe_vm_alloc_madvise_vma(vm, madvise_range.addr, args->range);
774 if (err)
775 goto madv_fini;
776
777 err = get_vmas(vm, &madvise_range);
778 if (err || !madvise_range.num_vmas)
779 goto madv_fini;
780
781 if (args->type == DRM_XE_MEM_RANGE_ATTR_PAT) {
782 pat_index = array_index_nospec(args->pat_index.val, xe->pat.n_entries);
783 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
784 if (XE_IOCTL_DBG(xe, madvise_range.has_svm_userptr_vmas &&
785 xe_device_is_l2_flush_optimized(xe) &&
786 (pat_index != 19 && coh_mode != XE_COH_2WAY))) {
787 err = -EINVAL;
788 goto madv_fini;
789 }
790 }
791
792 if (args->type == DRM_XE_MEM_RANGE_ATTR_PAT) {
793 if (!check_pat_args_are_sane(xe, &madvise_range,
794 args->pat_index.val)) {
795 err = -EINVAL;
796 goto free_vmas;
797 }
798 }
799
800 if (madvise_range.has_bo_vmas) {
801 if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) {
802 if (!check_bo_args_are_sane(vm, madvise_range.vmas,
803 madvise_range.num_vmas,
804 args->atomic.val)) {
805 err = -EINVAL;
806 goto free_vmas;
807 }
808 }
809
810 drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
811 drm_exec_until_all_locked(&exec) {
812 for (int i = 0; i < madvise_range.num_vmas; i++) {
813 struct xe_bo *bo = xe_vma_bo(madvise_range.vmas[i]);
814
815 if (!bo)
816 continue;
817
818 if (args->type == DRM_XE_MEM_RANGE_ATTR_PAT) {
819 if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach &&
820 xe_device_is_l2_flush_optimized(xe) &&
821 (pat_index != 19 &&
822 coh_mode != XE_COH_2WAY))) {
823 err = -EINVAL;
824 goto err_fini;
825 }
826 }
827
828 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
829 drm_exec_retry_on_contention(&exec);
830 if (err)
831 goto err_fini;
832 }
833 }
834 }
835
836 if (madvise_range.has_svm_userptr_vmas) {
837 err = xe_svm_notifier_lock_interruptible(vm);
838 if (err)
839 goto err_fini;
840 }
841
842 attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs));
843
844 /* Ensure the madvise function exists for this type */
845 if (!madvise_funcs[attr_type]) {
846 err = -EINVAL;
847 goto err_fini;
848 }
849
850 madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args,
851 &details);
852
853 err = xe_vm_invalidate_madvise_range(vm, madvise_range.addr,
854 madvise_range.addr + args->range);
855
856 if (madvise_range.has_svm_userptr_vmas)
857 xe_svm_notifier_unlock(vm);
858
859err_fini:
860 if (madvise_range.has_bo_vmas)
861 drm_exec_fini(&exec);
862free_vmas:
863 kfree(madvise_range.vmas);
864 madvise_range.vmas = NULL;
865madv_fini:
866 xe_madvise_details_fini(&details);
867unlock_vm:
868 up_write(&vm->lock);
869
870 /* Write retained value to user after releasing all locks */
871 if (!err && do_retained)
872 err = xe_madvise_purgeable_retained_to_user(&details);
873put_vm:
874 xe_vm_put(vm);
875 return err;
876}