Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2021 Intel Corporation
4 */
5
6#include "xe_execlist.h"
7
8#include <drm/drm_managed.h>
9
10#include "instructions/xe_mi_commands.h"
11#include "regs/xe_engine_regs.h"
12#include "regs/xe_gt_regs.h"
13#include "regs/xe_lrc_layout.h"
14#include "xe_assert.h"
15#include "xe_bo.h"
16#include "xe_device.h"
17#include "xe_exec_queue.h"
18#include "xe_gt_types.h"
19#include "xe_irq.h"
20#include "xe_lrc.h"
21#include "xe_macros.h"
22#include "xe_mmio.h"
23#include "xe_mocs.h"
24#include "xe_ring_ops_types.h"
25#include "xe_sched_job.h"
26
27#define XE_EXECLIST_HANG_LIMIT 1
28
29#define SW_CTX_ID_SHIFT 37
30#define SW_CTX_ID_WIDTH 11
31#define XEHP_SW_CTX_ID_SHIFT 39
32#define XEHP_SW_CTX_ID_WIDTH 16
33
34#define SW_CTX_ID \
35 GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \
36 SW_CTX_ID_SHIFT)
37
38#define XEHP_SW_CTX_ID \
39 GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \
40 XEHP_SW_CTX_ID_SHIFT)
41
42
43static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
44 u32 ctx_id)
45{
46 struct xe_gt *gt = hwe->gt;
47 struct xe_mmio *mmio = >->mmio;
48 struct xe_device *xe = gt_to_xe(gt);
49 u64 lrc_desc;
50 u32 ring_mode = REG_MASKED_FIELD_ENABLE(GFX_DISABLE_LEGACY_MODE);
51
52 lrc_desc = xe_lrc_descriptor(lrc);
53
54 if (GRAPHICS_VERx100(xe) >= 1250) {
55 xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id));
56 lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id);
57 } else {
58 xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id));
59 lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id);
60 }
61
62 if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
63 xe_mmio_write32(mmio, RCU_MODE,
64 REG_MASKED_FIELD_ENABLE(RCU_MODE_CCS_ENABLE));
65
66 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
67 lrc->ring.old_tail = lrc->ring.tail;
68
69 /*
70 * Make sure the context image is complete before we submit it to HW.
71 *
72 * Ostensibly, writes (including the WCB) should be flushed prior to
73 * an uncached write such as our mmio register access, the empirical
74 * evidence (esp. on Braswell) suggests that the WC write into memory
75 * may not be visible to the HW prior to the completion of the UC
76 * register write and that we may begin execution from the context
77 * before its image is complete leading to invalid PD chasing.
78 */
79 wmb();
80
81 xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base),
82 xe_bo_ggtt_addr(hwe->hwsp));
83 xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base));
84
85 if (xe_device_has_msix(gt_to_xe(hwe->gt)))
86 ring_mode |= REG_MASKED_FIELD_ENABLE(GFX_MSIX_INTERRUPT_ENABLE);
87 xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base), ring_mode);
88
89 xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
90 lower_32_bits(lrc_desc));
91 xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base),
92 upper_32_bits(lrc_desc));
93 xe_mmio_write32(mmio, RING_EXECLIST_CONTROL(hwe->mmio_base),
94 EL_CTRL_LOAD);
95}
96
97static void __xe_execlist_port_start(struct xe_execlist_port *port,
98 struct xe_execlist_exec_queue *exl)
99{
100 struct xe_device *xe = gt_to_xe(port->hwe->gt);
101 int max_ctx = FIELD_MAX(SW_CTX_ID);
102
103 if (GRAPHICS_VERx100(xe) >= 1250)
104 max_ctx = FIELD_MAX(XEHP_SW_CTX_ID);
105
106 xe_execlist_port_assert_held(port);
107
108 if (port->running_exl != exl || !exl->has_run) {
109 port->last_ctx_id++;
110
111 /* 0 is reserved for the kernel context */
112 if (port->last_ctx_id > max_ctx)
113 port->last_ctx_id = 1;
114 }
115
116 __start_lrc(port->hwe, exl->q->lrc[0], port->last_ctx_id);
117 port->running_exl = exl;
118 exl->has_run = true;
119}
120
121static void __xe_execlist_port_idle(struct xe_execlist_port *port)
122{
123 u32 noop[2] = { MI_NOOP, MI_NOOP };
124
125 xe_execlist_port_assert_held(port);
126
127 if (!port->running_exl)
128 return;
129
130 xe_lrc_write_ring(port->lrc, noop, sizeof(noop));
131 __start_lrc(port->hwe, port->lrc, 0);
132 port->running_exl = NULL;
133}
134
135static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl)
136{
137 struct xe_lrc *lrc = exl->q->lrc[0];
138
139 return lrc->ring.tail == lrc->ring.old_tail;
140}
141
142static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port)
143{
144 struct xe_execlist_exec_queue *exl = NULL;
145 int i;
146
147 xe_execlist_port_assert_held(port);
148
149 for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) {
150 while (!list_empty(&port->active[i])) {
151 exl = list_first_entry(&port->active[i],
152 struct xe_execlist_exec_queue,
153 active_link);
154 list_del(&exl->active_link);
155
156 if (xe_execlist_is_idle(exl)) {
157 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
158 continue;
159 }
160
161 list_add_tail(&exl->active_link, &port->active[i]);
162 __xe_execlist_port_start(port, exl);
163 return;
164 }
165 }
166
167 __xe_execlist_port_idle(port);
168}
169
170static u64 read_execlist_status(struct xe_hw_engine *hwe)
171{
172 struct xe_gt *gt = hwe->gt;
173 u32 hi, lo;
174
175 lo = xe_mmio_read32(>->mmio, RING_EXECLIST_STATUS_LO(hwe->mmio_base));
176 hi = xe_mmio_read32(>->mmio, RING_EXECLIST_STATUS_HI(hwe->mmio_base));
177
178 return lo | (u64)hi << 32;
179}
180
181static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port)
182{
183 u64 status;
184
185 xe_execlist_port_assert_held(port);
186
187 status = read_execlist_status(port->hwe);
188 if (status & BIT(7))
189 return;
190
191 __xe_execlist_port_start_next_active(port);
192}
193
194static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe,
195 u16 intr_vec)
196{
197 struct xe_execlist_port *port = hwe->exl_port;
198
199 spin_lock(&port->lock);
200 xe_execlist_port_irq_handler_locked(port);
201 spin_unlock(&port->lock);
202}
203
204static void xe_execlist_port_wake_locked(struct xe_execlist_port *port,
205 enum xe_exec_queue_priority priority)
206{
207 xe_execlist_port_assert_held(port);
208
209 if (port->running_exl && port->running_exl->active_priority >= priority)
210 return;
211
212 __xe_execlist_port_start_next_active(port);
213}
214
215static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl)
216{
217 struct xe_execlist_port *port = exl->port;
218 enum xe_exec_queue_priority priority = exl->q->sched_props.priority;
219
220 XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET);
221 XE_WARN_ON(priority < 0);
222 XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active));
223
224 spin_lock_irq(&port->lock);
225
226 if (exl->active_priority != priority &&
227 exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) {
228 /* Priority changed, move it to the right list */
229 list_del(&exl->active_link);
230 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
231 }
232
233 if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) {
234 exl->active_priority = priority;
235 list_add_tail(&exl->active_link, &port->active[priority]);
236 }
237
238 xe_execlist_port_wake_locked(exl->port, priority);
239
240 spin_unlock_irq(&port->lock);
241}
242
243static void xe_execlist_port_irq_fail_timer(struct timer_list *timer)
244{
245 struct xe_execlist_port *port =
246 container_of(timer, struct xe_execlist_port, irq_fail);
247
248 spin_lock_irq(&port->lock);
249 xe_execlist_port_irq_handler_locked(port);
250 spin_unlock_irq(&port->lock);
251
252 port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
253 add_timer(&port->irq_fail);
254}
255
256struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
257 struct xe_hw_engine *hwe)
258{
259 struct drm_device *drm = &xe->drm;
260 struct xe_execlist_port *port;
261 int i, err;
262
263 port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL);
264 if (!port) {
265 err = -ENOMEM;
266 goto err;
267 }
268
269 port->hwe = hwe;
270
271 port->lrc = xe_lrc_create(hwe, NULL, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX, 0);
272 if (IS_ERR(port->lrc)) {
273 err = PTR_ERR(port->lrc);
274 goto err;
275 }
276
277 spin_lock_init(&port->lock);
278 for (i = 0; i < ARRAY_SIZE(port->active); i++)
279 INIT_LIST_HEAD(&port->active[i]);
280
281 port->last_ctx_id = 1;
282 port->running_exl = NULL;
283
284 hwe->irq_handler = xe_execlist_port_irq_handler;
285
286 /* TODO: Fix the interrupt code so it doesn't race like mad */
287 timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0);
288 port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
289 add_timer(&port->irq_fail);
290
291 return port;
292
293err:
294 return ERR_PTR(err);
295}
296
297void xe_execlist_port_destroy(struct xe_execlist_port *port)
298{
299 timer_delete(&port->irq_fail);
300
301 /* Prevent an interrupt while we're destroying */
302 spin_lock_irq(>_to_xe(port->hwe->gt)->irq.lock);
303 port->hwe->irq_handler = NULL;
304 spin_unlock_irq(>_to_xe(port->hwe->gt)->irq.lock);
305
306 xe_lrc_put(port->lrc);
307}
308
309static struct dma_fence *
310execlist_run_job(struct drm_sched_job *drm_job)
311{
312 struct xe_sched_job *job = to_xe_sched_job(drm_job);
313 struct xe_exec_queue *q = job->q;
314 struct xe_execlist_exec_queue *exl = job->q->execlist;
315
316 q->ring_ops->emit_job(job);
317 xe_execlist_make_active(exl);
318
319 return job->fence;
320}
321
322static void execlist_job_free(struct drm_sched_job *drm_job)
323{
324 struct xe_sched_job *job = to_xe_sched_job(drm_job);
325
326 xe_exec_queue_update_run_ticks(job->q);
327 xe_sched_job_put(job);
328}
329
330static const struct drm_sched_backend_ops drm_sched_ops = {
331 .run_job = execlist_run_job,
332 .free_job = execlist_job_free,
333};
334
335static int execlist_exec_queue_init(struct xe_exec_queue *q)
336{
337 struct drm_gpu_scheduler *sched;
338 const struct drm_sched_init_args args = {
339 .ops = &drm_sched_ops,
340 .num_rqs = 1,
341 .credit_limit = xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES,
342 .hang_limit = XE_SCHED_HANG_LIMIT,
343 .timeout = XE_SCHED_JOB_TIMEOUT,
344 .name = q->hwe->name,
345 .dev = gt_to_xe(q->gt)->drm.dev,
346 };
347 struct xe_execlist_exec_queue *exl;
348 struct xe_device *xe = gt_to_xe(q->gt);
349 int err;
350
351 xe_assert(xe, !xe_device_uc_enabled(xe));
352
353 drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n");
354
355 exl = kzalloc_obj(*exl);
356 if (!exl)
357 return -ENOMEM;
358
359 exl->q = q;
360
361 err = drm_sched_init(&exl->sched, &args);
362 if (err)
363 goto err_free;
364
365 sched = &exl->sched;
366 err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL);
367 if (err)
368 goto err_sched;
369
370 exl->port = q->hwe->exl_port;
371 exl->has_run = false;
372 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
373 q->execlist = exl;
374 q->entity = &exl->entity;
375
376 xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1);
377
378 return 0;
379
380err_sched:
381 drm_sched_fini(&exl->sched);
382err_free:
383 kfree(exl);
384 return err;
385}
386
387static void execlist_exec_queue_fini(struct xe_exec_queue *q)
388{
389 struct xe_execlist_exec_queue *exl = q->execlist;
390
391 drm_sched_entity_fini(&exl->entity);
392 drm_sched_fini(&exl->sched);
393
394 kfree(exl);
395}
396
397static void execlist_exec_queue_destroy_async(struct work_struct *w)
398{
399 struct xe_execlist_exec_queue *ee =
400 container_of(w, struct xe_execlist_exec_queue, destroy_async);
401 struct xe_exec_queue *q = ee->q;
402 struct xe_execlist_exec_queue *exl = q->execlist;
403 struct xe_device *xe = gt_to_xe(q->gt);
404 unsigned long flags;
405
406 xe_assert(xe, !xe_device_uc_enabled(xe));
407
408 spin_lock_irqsave(&exl->port->lock, flags);
409 if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET))
410 list_del(&exl->active_link);
411 spin_unlock_irqrestore(&exl->port->lock, flags);
412
413 xe_exec_queue_fini(q);
414}
415
416static void execlist_exec_queue_kill(struct xe_exec_queue *q)
417{
418 /* NIY */
419}
420
421static void execlist_exec_queue_destroy(struct xe_exec_queue *q)
422{
423 INIT_WORK(&q->execlist->destroy_async, execlist_exec_queue_destroy_async);
424 queue_work(system_dfl_wq, &q->execlist->destroy_async);
425}
426
427static int execlist_exec_queue_set_priority(struct xe_exec_queue *q,
428 enum xe_exec_queue_priority priority)
429{
430 /* NIY */
431 return 0;
432}
433
434static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
435{
436 /* NIY */
437 return 0;
438}
439
440static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
441 u32 preempt_timeout_us)
442{
443 /* NIY */
444 return 0;
445}
446
447static int execlist_exec_queue_suspend(struct xe_exec_queue *q)
448{
449 /* NIY */
450 return 0;
451}
452
453static int execlist_exec_queue_suspend_wait(struct xe_exec_queue *q)
454
455{
456 /* NIY */
457 return 0;
458}
459
460static void execlist_exec_queue_resume(struct xe_exec_queue *q)
461{
462 /* NIY */
463}
464
465static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q)
466{
467 /* NIY */
468 return false;
469}
470
471static bool execlist_exec_queue_active(struct xe_exec_queue *q)
472{
473 /* NIY */
474 return false;
475}
476
477static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
478 .init = execlist_exec_queue_init,
479 .kill = execlist_exec_queue_kill,
480 .fini = execlist_exec_queue_fini,
481 .destroy = execlist_exec_queue_destroy,
482 .set_priority = execlist_exec_queue_set_priority,
483 .set_timeslice = execlist_exec_queue_set_timeslice,
484 .set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,
485 .suspend = execlist_exec_queue_suspend,
486 .suspend_wait = execlist_exec_queue_suspend_wait,
487 .resume = execlist_exec_queue_resume,
488 .reset_status = execlist_exec_queue_reset_status,
489 .active = execlist_exec_queue_active,
490};
491
492int xe_execlist_init(struct xe_gt *gt)
493{
494 /* GuC submission enabled, nothing to do */
495 if (xe_device_uc_enabled(gt_to_xe(gt)))
496 return 0;
497
498 gt->exec_queue_ops = &execlist_exec_queue_ops;
499
500 return 0;
501}