Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2025 Intel Corporation
4 */
5
6#include "xe_svm.h"
7#include "xe_userptr.h"
8
9#include <linux/mm.h>
10
11#include "xe_tlb_inval.h"
12#include "xe_trace_bo.h"
13
14static void xe_userptr_assert_in_notifier(struct xe_vm *vm)
15{
16 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) ||
17 (lockdep_is_held(&vm->lock) &&
18 lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) &&
19 dma_resv_held(xe_vm_resv(vm))));
20}
21
22/**
23 * xe_vma_userptr_check_repin() - Advisory check for repin needed
24 * @uvma: The userptr vma
25 *
26 * Check if the userptr vma has been invalidated since last successful
27 * repin. The check is advisory only and can the function can be called
28 * without the vm->svm.gpusvm.notifier_lock held. There is no guarantee that the
29 * vma userptr will remain valid after a lockless check, so typically
30 * the call needs to be followed by a proper check under the notifier_lock.
31 *
32 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
33 */
34int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
35{
36 return mmu_interval_check_retry(&uvma->userptr.notifier,
37 uvma->userptr.pages.notifier_seq) ?
38 -EAGAIN : 0;
39}
40
41/**
42 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
43 * that need repinning.
44 * @vm: The VM.
45 *
46 * This function checks for whether the VM has userptrs that need repinning,
47 * and provides a release-type barrier on the svm.gpusvm.notifier_lock after
48 * checking.
49 *
50 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
51 */
52int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
53{
54 lockdep_assert_held_read(&vm->svm.gpusvm.notifier_lock);
55
56 return (list_empty(&vm->userptr.repin_list) &&
57 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
58}
59
60int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
61{
62 struct xe_vma *vma = &uvma->vma;
63 struct xe_vm *vm = xe_vma_vm(vma);
64 struct xe_device *xe = vm->xe;
65 struct drm_gpusvm_ctx ctx = {
66 .read_only = xe_vma_read_only(vma),
67 .device_private_page_owner = xe_svm_private_page_owner(vm, false),
68 .allow_mixed = true,
69 };
70
71 lockdep_assert_held(&vm->lock);
72 xe_assert(xe, xe_vma_is_userptr(vma));
73
74 if (vma->gpuva.flags & XE_VMA_DESTROYED)
75 return 0;
76
77 return drm_gpusvm_get_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
78 uvma->userptr.notifier.mm,
79 &uvma->userptr.notifier,
80 xe_vma_userptr(vma),
81 xe_vma_userptr(vma) + xe_vma_size(vma),
82 &ctx);
83}
84
85static struct mmu_interval_notifier_finish *
86xe_vma_userptr_do_inval(struct xe_vm *vm, struct xe_userptr_vma *uvma, bool is_deferred)
87{
88 struct xe_userptr *userptr = &uvma->userptr;
89 struct xe_vma *vma = &uvma->vma;
90 struct drm_gpusvm_ctx ctx = {
91 .in_notifier = true,
92 .read_only = xe_vma_read_only(vma),
93 };
94 long err;
95
96 xe_userptr_assert_in_notifier(vm);
97 if (is_deferred)
98 xe_assert(vm->xe, userptr->finish_inuse && !userptr->tlb_inval_submitted);
99
100 err = dma_resv_wait_timeout(xe_vm_resv(vm),
101 DMA_RESV_USAGE_BOOKKEEP,
102 false, MAX_SCHEDULE_TIMEOUT);
103 XE_WARN_ON(err <= 0);
104
105 if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
106 if (!userptr->finish_inuse) {
107 /*
108 * Defer the TLB wait to an extra pass so the caller
109 * can pipeline TLB flushes across GPUs before waiting
110 * on any of them.
111 */
112 xe_assert(vm->xe, !userptr->tlb_inval_submitted);
113 userptr->finish_inuse = true;
114 userptr->tlb_inval_submitted = true;
115 err = xe_vm_invalidate_vma_submit(vma, &userptr->inval_batch);
116 XE_WARN_ON(err);
117 return &userptr->finish;
118 }
119 err = xe_vm_invalidate_vma(vma);
120 XE_WARN_ON(err);
121 }
122
123 if (is_deferred)
124 userptr->finish_inuse = false;
125 drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
126 xe_vma_size(vma) >> PAGE_SHIFT, &ctx);
127 return NULL;
128}
129
130static void
131xe_vma_userptr_complete_tlb_inval(struct xe_vm *vm, struct xe_userptr_vma *uvma)
132{
133 struct xe_userptr *userptr = &uvma->userptr;
134 struct xe_vma *vma = &uvma->vma;
135 struct drm_gpusvm_ctx ctx = {
136 .in_notifier = true,
137 .read_only = xe_vma_read_only(vma),
138 };
139
140 xe_userptr_assert_in_notifier(vm);
141 xe_assert(vm->xe, userptr->finish_inuse);
142 xe_assert(vm->xe, userptr->tlb_inval_submitted);
143
144 xe_tlb_inval_batch_wait(&userptr->inval_batch);
145 userptr->tlb_inval_submitted = false;
146 userptr->finish_inuse = false;
147 drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
148 xe_vma_size(vma) >> PAGE_SHIFT, &ctx);
149}
150
151static struct mmu_interval_notifier_finish *
152xe_vma_userptr_invalidate_pass1(struct xe_vm *vm, struct xe_userptr_vma *uvma)
153{
154 struct xe_userptr *userptr = &uvma->userptr;
155 struct xe_vma *vma = &uvma->vma;
156 struct dma_resv_iter cursor;
157 struct dma_fence *fence;
158 bool signaled = true;
159
160 xe_userptr_assert_in_notifier(vm);
161
162 /*
163 * Tell exec and rebind worker they need to repin and rebind this
164 * userptr.
165 */
166 if (!xe_vm_in_fault_mode(vm) &&
167 !(vma->gpuva.flags & XE_VMA_DESTROYED)) {
168 spin_lock(&vm->userptr.invalidated_lock);
169 list_move_tail(&userptr->invalidate_link,
170 &vm->userptr.invalidated);
171 spin_unlock(&vm->userptr.invalidated_lock);
172 }
173
174 /*
175 * Preempt fences turn into schedule disables, pipeline these.
176 * Note that even in fault mode, we need to wait for binds and
177 * unbinds to complete, and those are attached as BOOKMARK fences
178 * to the vm.
179 */
180 dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
181 DMA_RESV_USAGE_BOOKKEEP);
182 dma_resv_for_each_fence_unlocked(&cursor, fence) {
183 dma_fence_enable_sw_signaling(fence);
184 if (signaled && !dma_fence_is_signaled(fence))
185 signaled = false;
186 }
187 dma_resv_iter_end(&cursor);
188
189 /*
190 * Only one caller at a time can use the multi-pass state.
191 * If it's already in use, or all fences are already signaled,
192 * proceed directly to invalidation without deferring.
193 */
194 if (signaled || userptr->finish_inuse)
195 return xe_vma_userptr_do_inval(vm, uvma, false);
196
197 /* Defer: the notifier core will call invalidate_finish once done. */
198 userptr->finish_inuse = true;
199
200 return &userptr->finish;
201}
202
203static bool xe_vma_userptr_invalidate_start(struct mmu_interval_notifier *mni,
204 const struct mmu_notifier_range *range,
205 unsigned long cur_seq,
206 struct mmu_interval_notifier_finish **p_finish)
207{
208 struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier);
209 struct xe_vma *vma = &uvma->vma;
210 struct xe_vm *vm = xe_vma_vm(vma);
211
212 xe_assert(vm->xe, xe_vma_is_userptr(vma));
213 trace_xe_vma_userptr_invalidate(vma);
214
215 if (!mmu_notifier_range_blockable(range))
216 return false;
217
218 vm_dbg(&xe_vma_vm(vma)->xe->drm,
219 "NOTIFIER PASS1: addr=0x%016llx, range=0x%016llx",
220 xe_vma_start(vma), xe_vma_size(vma));
221
222 down_write(&vm->svm.gpusvm.notifier_lock);
223 mmu_interval_set_seq(mni, cur_seq);
224
225 *p_finish = xe_vma_userptr_invalidate_pass1(vm, uvma);
226
227 up_write(&vm->svm.gpusvm.notifier_lock);
228 if (!*p_finish)
229 trace_xe_vma_userptr_invalidate_complete(vma);
230
231 return true;
232}
233
234static void xe_vma_userptr_invalidate_finish(struct mmu_interval_notifier_finish *finish)
235{
236 struct xe_userptr_vma *uvma = container_of(finish, typeof(*uvma), userptr.finish);
237 struct xe_vma *vma = &uvma->vma;
238 struct xe_vm *vm = xe_vma_vm(vma);
239
240 vm_dbg(&xe_vma_vm(vma)->xe->drm,
241 "NOTIFIER PASS2: addr=0x%016llx, range=0x%016llx",
242 xe_vma_start(vma), xe_vma_size(vma));
243
244 down_write(&vm->svm.gpusvm.notifier_lock);
245 /*
246 * If a TLB invalidation was previously submitted (deferred from the
247 * synchronous pass1 fallback), wait for it and unmap pages.
248 * Otherwise, fences have now completed: invalidate the TLB and unmap.
249 */
250 if (uvma->userptr.tlb_inval_submitted)
251 xe_vma_userptr_complete_tlb_inval(vm, uvma);
252 else
253 xe_vma_userptr_do_inval(vm, uvma, true);
254 up_write(&vm->svm.gpusvm.notifier_lock);
255 trace_xe_vma_userptr_invalidate_complete(vma);
256}
257
258static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
259 .invalidate_start = xe_vma_userptr_invalidate_start,
260 .invalidate_finish = xe_vma_userptr_invalidate_finish,
261};
262
263#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
264/**
265 * xe_vma_userptr_force_invalidate() - force invalidate a userptr
266 * @uvma: The userptr vma to invalidate
267 *
268 * Perform a forced userptr invalidation for testing purposes.
269 */
270void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
271{
272 static struct mmu_interval_notifier_finish *finish;
273 struct xe_vm *vm = xe_vma_vm(&uvma->vma);
274
275 /* Protect against concurrent userptr pinning */
276 lockdep_assert_held(&vm->lock);
277 /* Protect against concurrent notifiers */
278 lockdep_assert_held(&vm->svm.gpusvm.notifier_lock);
279 /*
280 * Protect against concurrent instances of this function and
281 * the critical exec sections
282 */
283 xe_vm_assert_held(vm);
284
285 if (!mmu_interval_read_retry(&uvma->userptr.notifier,
286 uvma->userptr.pages.notifier_seq))
287 uvma->userptr.pages.notifier_seq -= 2;
288
289 finish = xe_vma_userptr_invalidate_pass1(vm, uvma);
290 if (finish)
291 finish = xe_vma_userptr_do_inval(vm, uvma, true);
292 if (finish)
293 xe_vma_userptr_complete_tlb_inval(vm, uvma);
294}
295#endif
296
297int xe_vm_userptr_pin(struct xe_vm *vm)
298{
299 struct xe_userptr_vma *uvma, *next;
300 int err = 0;
301
302 xe_assert(vm->xe, !xe_vm_in_fault_mode(vm));
303 lockdep_assert_held_write(&vm->lock);
304
305 /* Collect invalidated userptrs */
306 spin_lock(&vm->userptr.invalidated_lock);
307 xe_assert(vm->xe, list_empty(&vm->userptr.repin_list));
308 list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
309 userptr.invalidate_link) {
310 list_del_init(&uvma->userptr.invalidate_link);
311 list_add_tail(&uvma->userptr.repin_link,
312 &vm->userptr.repin_list);
313 }
314 spin_unlock(&vm->userptr.invalidated_lock);
315
316 /* Pin and move to bind list */
317 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
318 userptr.repin_link) {
319 err = xe_vma_userptr_pin_pages(uvma);
320 if (err == -EFAULT) {
321 list_del_init(&uvma->userptr.repin_link);
322 /*
323 * We might have already done the pin once already, but
324 * then had to retry before the re-bind happened, due
325 * some other condition in the caller, but in the
326 * meantime the userptr got dinged by the notifier such
327 * that we need to revalidate here, but this time we hit
328 * the EFAULT. In such a case make sure we remove
329 * ourselves from the rebind list to avoid going down in
330 * flames.
331 */
332 if (!list_empty(&uvma->vma.combined_links.rebind))
333 list_del_init(&uvma->vma.combined_links.rebind);
334
335 /* Wait for pending binds */
336 xe_vm_lock(vm, false);
337 dma_resv_wait_timeout(xe_vm_resv(vm),
338 DMA_RESV_USAGE_BOOKKEEP,
339 false, MAX_SCHEDULE_TIMEOUT);
340
341 down_read(&vm->svm.gpusvm.notifier_lock);
342 err = xe_vm_invalidate_vma(&uvma->vma);
343 up_read(&vm->svm.gpusvm.notifier_lock);
344 xe_vm_unlock(vm);
345 if (err)
346 break;
347 } else {
348 if (err)
349 break;
350
351 list_del_init(&uvma->userptr.repin_link);
352 list_move_tail(&uvma->vma.combined_links.rebind,
353 &vm->rebind_list);
354 }
355 }
356
357 if (err) {
358 down_write(&vm->svm.gpusvm.notifier_lock);
359 spin_lock(&vm->userptr.invalidated_lock);
360 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
361 userptr.repin_link) {
362 list_del_init(&uvma->userptr.repin_link);
363 list_move_tail(&uvma->userptr.invalidate_link,
364 &vm->userptr.invalidated);
365 }
366 spin_unlock(&vm->userptr.invalidated_lock);
367 up_write(&vm->svm.gpusvm.notifier_lock);
368 }
369 return err;
370}
371
372/**
373 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
374 * that need repinning.
375 * @vm: The VM.
376 *
377 * This function does an advisory check for whether the VM has userptrs that
378 * need repinning.
379 *
380 * Return: 0 if there are no indications of userptrs needing repinning,
381 * -EAGAIN if there are.
382 */
383int xe_vm_userptr_check_repin(struct xe_vm *vm)
384{
385 return (list_empty_careful(&vm->userptr.repin_list) &&
386 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
387}
388
389int xe_userptr_setup(struct xe_userptr_vma *uvma, unsigned long start,
390 unsigned long range)
391{
392 struct xe_userptr *userptr = &uvma->userptr;
393 int err;
394
395 INIT_LIST_HEAD(&userptr->invalidate_link);
396 INIT_LIST_HEAD(&userptr->repin_link);
397
398 err = mmu_interval_notifier_insert(&userptr->notifier, current->mm,
399 start, range,
400 &vma_userptr_notifier_ops);
401 if (err)
402 return err;
403
404 userptr->pages.notifier_seq = LONG_MAX;
405
406 return 0;
407}
408
409void xe_userptr_remove(struct xe_userptr_vma *uvma)
410{
411 struct xe_vm *vm = xe_vma_vm(&uvma->vma);
412 struct xe_userptr *userptr = &uvma->userptr;
413
414 drm_gpusvm_free_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
415 xe_vma_size(&uvma->vma) >> PAGE_SHIFT);
416
417 /*
418 * Since userptr pages are not pinned, we can't remove
419 * the notifier until we're sure the GPU is not accessing
420 * them anymore
421 */
422 mmu_interval_notifier_remove(&userptr->notifier);
423}
424
425void xe_userptr_destroy(struct xe_userptr_vma *uvma)
426{
427 struct xe_vm *vm = xe_vma_vm(&uvma->vma);
428
429 spin_lock(&vm->userptr.invalidated_lock);
430 xe_assert(vm->xe, list_empty(&uvma->userptr.repin_link));
431 list_del(&uvma->userptr.invalidate_link);
432 spin_unlock(&vm->userptr.invalidated_lock);
433}