drivers/gpu/drm/xe/xe_guc_submit.c at master

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / drivers / gpu / drm / xe / xe_guc_submit.c
at master 3454 lines 100 kB view raw
wrap content
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2022 Intel Corporation
   4 */
   5
   6#include "xe_guc_submit.h"
   7
   8#include <linux/bitfield.h>
   9#include <linux/bitmap.h>
  10#include <linux/circ_buf.h>
  11#include <linux/dma-fence-array.h>
  12
  13#include <drm/drm_managed.h>
  14
  15#include "abi/guc_actions_abi.h"
  16#include "abi/guc_actions_slpc_abi.h"
  17#include "abi/guc_klvs_abi.h"
  18#include "xe_assert.h"
  19#include "xe_bo.h"
  20#include "xe_devcoredump.h"
  21#include "xe_device.h"
  22#include "xe_exec_queue.h"
  23#include "xe_force_wake.h"
  24#include "xe_gpu_scheduler.h"
  25#include "xe_gt.h"
  26#include "xe_gt_clock.h"
  27#include "xe_gt_printk.h"
  28#include "xe_guc.h"
  29#include "xe_guc_capture.h"
  30#include "xe_guc_ct.h"
  31#include "xe_guc_exec_queue_types.h"
  32#include "xe_guc_id_mgr.h"
  33#include "xe_guc_klv_helpers.h"
  34#include "xe_guc_submit_types.h"
  35#include "xe_hw_engine.h"
  36#include "xe_lrc.h"
  37#include "xe_macros.h"
  38#include "xe_map.h"
  39#include "xe_mocs.h"
  40#include "xe_pm.h"
  41#include "xe_ring_ops_types.h"
  42#include "xe_sched_job.h"
  43#include "xe_sleep.h"
  44#include "xe_trace.h"
  45#include "xe_uc_fw.h"
  46#include "xe_vm.h"
  47
  48#define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN		6
  49
  50static int guc_submit_reset_prepare(struct xe_guc *guc);
  51
  52static struct xe_guc *
  53exec_queue_to_guc(struct xe_exec_queue *q)
  54{
  55	return &q->gt->uc.guc;
  56}
  57
  58/*
  59 * Helpers for engine state, using an atomic as some of the bits can transition
  60 * as the same time (e.g. a suspend can be happning at the same time as schedule
  61 * engine done being processed).
  62 */
  63#define EXEC_QUEUE_STATE_REGISTERED		(1 << 0)
  64#define EXEC_QUEUE_STATE_ENABLED		(1 << 1)
  65#define EXEC_QUEUE_STATE_PENDING_ENABLE		(1 << 2)
  66#define EXEC_QUEUE_STATE_PENDING_DISABLE	(1 << 3)
  67#define EXEC_QUEUE_STATE_DESTROYED		(1 << 4)
  68#define EXEC_QUEUE_STATE_SUSPENDED		(1 << 5)
  69#define EXEC_QUEUE_STATE_RESET			(1 << 6)
  70#define EXEC_QUEUE_STATE_KILLED			(1 << 7)
  71#define EXEC_QUEUE_STATE_WEDGED			(1 << 8)
  72#define EXEC_QUEUE_STATE_BANNED			(1 << 9)
  73#define EXEC_QUEUE_STATE_PENDING_RESUME		(1 << 10)
  74#define EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND	(1 << 11)
  75
  76static bool exec_queue_registered(struct xe_exec_queue *q)
  77{
  78	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED;
  79}
  80
  81static void set_exec_queue_registered(struct xe_exec_queue *q)
  82{
  83	atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
  84}
  85
  86static void clear_exec_queue_registered(struct xe_exec_queue *q)
  87{
  88	atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
  89}
  90
  91static bool exec_queue_enabled(struct xe_exec_queue *q)
  92{
  93	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED;
  94}
  95
  96static void set_exec_queue_enabled(struct xe_exec_queue *q)
  97{
  98	atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state);
  99}
 100
 101static void clear_exec_queue_enabled(struct xe_exec_queue *q)
 102{
 103	atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state);
 104}
 105
 106static bool exec_queue_pending_enable(struct xe_exec_queue *q)
 107{
 108	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE;
 109}
 110
 111static void set_exec_queue_pending_enable(struct xe_exec_queue *q)
 112{
 113	atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
 114}
 115
 116static void clear_exec_queue_pending_enable(struct xe_exec_queue *q)
 117{
 118	atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
 119}
 120
 121static bool exec_queue_pending_disable(struct xe_exec_queue *q)
 122{
 123	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE;
 124}
 125
 126static void set_exec_queue_pending_disable(struct xe_exec_queue *q)
 127{
 128	atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
 129}
 130
 131static void clear_exec_queue_pending_disable(struct xe_exec_queue *q)
 132{
 133	atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
 134}
 135
 136static bool exec_queue_destroyed(struct xe_exec_queue *q)
 137{
 138	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED;
 139}
 140
 141static void set_exec_queue_destroyed(struct xe_exec_queue *q)
 142{
 143	atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state);
 144}
 145
 146static void clear_exec_queue_destroyed(struct xe_exec_queue *q)
 147{
 148	atomic_and(~EXEC_QUEUE_STATE_DESTROYED, &q->guc->state);
 149}
 150
 151static bool exec_queue_banned(struct xe_exec_queue *q)
 152{
 153	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED;
 154}
 155
 156static void set_exec_queue_banned(struct xe_exec_queue *q)
 157{
 158	atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state);
 159}
 160
 161static bool exec_queue_suspended(struct xe_exec_queue *q)
 162{
 163	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED;
 164}
 165
 166static void set_exec_queue_suspended(struct xe_exec_queue *q)
 167{
 168	atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state);
 169}
 170
 171static void clear_exec_queue_suspended(struct xe_exec_queue *q)
 172{
 173	atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state);
 174}
 175
 176static bool exec_queue_reset(struct xe_exec_queue *q)
 177{
 178	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET;
 179}
 180
 181static void set_exec_queue_reset(struct xe_exec_queue *q)
 182{
 183	atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state);
 184}
 185
 186static bool exec_queue_killed(struct xe_exec_queue *q)
 187{
 188	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED;
 189}
 190
 191static void set_exec_queue_killed(struct xe_exec_queue *q)
 192{
 193	atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state);
 194}
 195
 196static bool exec_queue_wedged(struct xe_exec_queue *q)
 197{
 198	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED;
 199}
 200
 201static void set_exec_queue_wedged(struct xe_exec_queue *q)
 202{
 203	atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state);
 204}
 205
 206static bool exec_queue_pending_resume(struct xe_exec_queue *q)
 207{
 208	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME;
 209}
 210
 211static void set_exec_queue_pending_resume(struct xe_exec_queue *q)
 212{
 213	atomic_or(EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state);
 214}
 215
 216static void clear_exec_queue_pending_resume(struct xe_exec_queue *q)
 217{
 218	atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state);
 219}
 220
 221static bool exec_queue_idle_skip_suspend(struct xe_exec_queue *q)
 222{
 223	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND;
 224}
 225
 226static void set_exec_queue_idle_skip_suspend(struct xe_exec_queue *q)
 227{
 228	atomic_or(EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND, &q->guc->state);
 229}
 230
 231static void clear_exec_queue_idle_skip_suspend(struct xe_exec_queue *q)
 232{
 233	atomic_and(~EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND, &q->guc->state);
 234}
 235
 236static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
 237{
 238	return (atomic_read(&q->guc->state) &
 239		(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED |
 240		 EXEC_QUEUE_STATE_BANNED));
 241}
 242
 243static void guc_submit_sw_fini(struct drm_device *drm, void *arg)
 244{
 245	struct xe_guc *guc = arg;
 246	struct xe_device *xe = guc_to_xe(guc);
 247	struct xe_gt *gt = guc_to_gt(guc);
 248	int ret;
 249
 250	ret = wait_event_timeout(guc->submission_state.fini_wq,
 251				 xa_empty(&guc->submission_state.exec_queue_lookup),
 252				 HZ * 5);
 253
 254	drain_workqueue(xe->destroy_wq);
 255
 256	xe_gt_assert(gt, ret);
 257
 258	xa_destroy(&guc->submission_state.exec_queue_lookup);
 259}
 260
 261static void guc_submit_fini(void *arg)
 262{
 263	struct xe_guc *guc = arg;
 264	struct xe_exec_queue *q;
 265	unsigned long index;
 266
 267	/* Drop any wedged queue refs */
 268	mutex_lock(&guc->submission_state.lock);
 269	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
 270		if (exec_queue_wedged(q)) {
 271			mutex_unlock(&guc->submission_state.lock);
 272			xe_exec_queue_put(q);
 273			mutex_lock(&guc->submission_state.lock);
 274		}
 275	}
 276	mutex_unlock(&guc->submission_state.lock);
 277
 278	/* Forcefully kill any remaining exec queues */
 279	xe_guc_ct_stop(&guc->ct);
 280	guc_submit_reset_prepare(guc);
 281	xe_guc_softreset(guc);
 282	xe_guc_submit_stop(guc);
 283	xe_uc_fw_sanitize(&guc->fw);
 284	xe_guc_submit_pause_abort(guc);
 285}
 286
 287static const struct xe_exec_queue_ops guc_exec_queue_ops;
 288
 289static void primelockdep(struct xe_guc *guc)
 290{
 291	if (!IS_ENABLED(CONFIG_LOCKDEP))
 292		return;
 293
 294	fs_reclaim_acquire(GFP_KERNEL);
 295
 296	mutex_lock(&guc->submission_state.lock);
 297	mutex_unlock(&guc->submission_state.lock);
 298
 299	fs_reclaim_release(GFP_KERNEL);
 300}
 301
 302/**
 303 * xe_guc_submit_init() - Initialize GuC submission.
 304 * @guc: the &xe_guc to initialize
 305 * @num_ids: number of GuC context IDs to use
 306 *
 307 * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all
 308 * GuC context IDs supported by the GuC firmware should be used for submission.
 309 *
 310 * Only VF drivers will have to provide explicit number of GuC context IDs
 311 * that they can use for submission.
 312 *
 313 * Return: 0 on success or a negative error code on failure.
 314 */
 315int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
 316{
 317	struct xe_device *xe = guc_to_xe(guc);
 318	struct xe_gt *gt = guc_to_gt(guc);
 319	int err;
 320
 321	err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock);
 322	if (err)
 323		return err;
 324
 325	err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids);
 326	if (err)
 327		return err;
 328
 329	gt->exec_queue_ops = &guc_exec_queue_ops;
 330
 331	xa_init(&guc->submission_state.exec_queue_lookup);
 332
 333	init_waitqueue_head(&guc->submission_state.fini_wq);
 334
 335	primelockdep(guc);
 336
 337	guc->submission_state.initialized = true;
 338
 339	err = drmm_add_action_or_reset(&xe->drm, guc_submit_sw_fini, guc);
 340	if (err)
 341		return err;
 342
 343	return devm_add_action_or_reset(xe->drm.dev, guc_submit_fini, guc);
 344}
 345
 346/*
 347 * Given that we want to guarantee enough RCS throughput to avoid missing
 348 * frames, we set the yield policy to 20% of each 80ms interval.
 349 */
 350#define RC_YIELD_DURATION	80	/* in ms */
 351#define RC_YIELD_RATIO		20	/* in percent */
 352static u32 *emit_render_compute_yield_klv(u32 *emit)
 353{
 354	*emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD);
 355	*emit++ = RC_YIELD_DURATION;
 356	*emit++ = RC_YIELD_RATIO;
 357
 358	return emit;
 359}
 360
 361#define SCHEDULING_POLICY_MAX_DWORDS 16
 362static int guc_init_global_schedule_policy(struct xe_guc *guc)
 363{
 364	u32 data[SCHEDULING_POLICY_MAX_DWORDS];
 365	u32 *emit = data;
 366	u32 count = 0;
 367	int ret;
 368
 369	if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0))
 370		return 0;
 371
 372	*emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV;
 373
 374	if (CCS_INSTANCES(guc_to_gt(guc)))
 375		emit = emit_render_compute_yield_klv(emit);
 376
 377	count = emit - data;
 378	if (count > 1) {
 379		xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS);
 380
 381		ret = xe_guc_ct_send_block(&guc->ct, data, count);
 382		if (ret < 0) {
 383			xe_gt_err(guc_to_gt(guc),
 384				  "failed to enable GuC scheduling policies: %pe\n",
 385				  ERR_PTR(ret));
 386			return ret;
 387		}
 388	}
 389
 390	return 0;
 391}
 392
 393int xe_guc_submit_enable(struct xe_guc *guc)
 394{
 395	int ret;
 396
 397	ret = guc_init_global_schedule_policy(guc);
 398	if (ret)
 399		return ret;
 400
 401	guc->submission_state.enabled = true;
 402
 403	return 0;
 404}
 405
 406void xe_guc_submit_disable(struct xe_guc *guc)
 407{
 408	guc->submission_state.enabled = false;
 409}
 410
 411static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count)
 412{
 413	int i;
 414
 415	lockdep_assert_held(&guc->submission_state.lock);
 416
 417	for (i = 0; i < xa_count; ++i)
 418		xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i);
 419
 420	xe_guc_id_mgr_release_locked(&guc->submission_state.idm,
 421				     q->guc->id, q->width);
 422
 423	if (xa_empty(&guc->submission_state.exec_queue_lookup))
 424		wake_up(&guc->submission_state.fini_wq);
 425}
 426
 427static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
 428{
 429	int ret;
 430	int i;
 431
 432	/*
 433	 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path,
 434	 * worse case user gets -ENOMEM on engine create and has to try again.
 435	 *
 436	 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent
 437	 * failure.
 438	 */
 439	lockdep_assert_held(&guc->submission_state.lock);
 440
 441	ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm,
 442					   q->width);
 443	if (ret < 0)
 444		return ret;
 445
 446	q->guc->id = ret;
 447
 448	for (i = 0; i < q->width; ++i) {
 449		ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup,
 450				      q->guc->id + i, q, GFP_NOWAIT));
 451		if (ret)
 452			goto err_release;
 453	}
 454
 455	return 0;
 456
 457err_release:
 458	__release_guc_id(guc, q, i);
 459
 460	return ret;
 461}
 462
 463static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
 464{
 465	mutex_lock(&guc->submission_state.lock);
 466	__release_guc_id(guc, q, q->width);
 467	mutex_unlock(&guc->submission_state.lock);
 468}
 469
 470struct exec_queue_policy {
 471	u32 count;
 472	struct guc_update_exec_queue_policy h2g;
 473};
 474
 475static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy)
 476{
 477	size_t bytes = sizeof(policy->h2g.header) +
 478		       (sizeof(policy->h2g.klv[0]) * policy->count);
 479
 480	return bytes / sizeof(u32);
 481}
 482
 483static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy,
 484					      u16 guc_id)
 485{
 486	policy->h2g.header.action =
 487		XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
 488	policy->h2g.header.guc_id = guc_id;
 489	policy->count = 0;
 490}
 491
 492#define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \
 493static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \
 494					   u32 data) \
 495{ \
 496	XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
 497\
 498	policy->h2g.klv[policy->count].kl = \
 499		FIELD_PREP(GUC_KLV_0_KEY, \
 500			   GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
 501		FIELD_PREP(GUC_KLV_0_LEN, 1); \
 502	policy->h2g.klv[policy->count].value = data; \
 503	policy->count++; \
 504}
 505
 506MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
 507MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
 508MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY)
 509MAKE_EXEC_QUEUE_POLICY_ADD(slpc_exec_queue_freq_req, SLPM_GT_FREQUENCY)
 510#undef MAKE_EXEC_QUEUE_POLICY_ADD
 511
 512static const int xe_exec_queue_prio_to_guc[] = {
 513	[XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL,
 514	[XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL,
 515	[XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH,
 516	[XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH,
 517};
 518
 519static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
 520{
 521	struct exec_queue_policy policy;
 522	enum xe_exec_queue_priority prio = q->sched_props.priority;
 523	u32 timeslice_us = q->sched_props.timeslice_us;
 524	u32 slpc_exec_queue_freq_req = 0;
 525	u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
 526
 527	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q) &&
 528		     !xe_exec_queue_is_multi_queue_secondary(q));
 529
 530	if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY)
 531		slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE;
 532
 533	__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
 534	__guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]);
 535	__guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us);
 536	__guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us);
 537	__guc_exec_queue_policy_add_slpc_exec_queue_freq_req(&policy,
 538							     slpc_exec_queue_freq_req);
 539
 540	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
 541		       __guc_exec_queue_policy_action_size(&policy), 0, 0);
 542}
 543
 544static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q)
 545{
 546	struct exec_queue_policy policy;
 547
 548	xe_assert(guc_to_xe(guc), !xe_exec_queue_is_multi_queue_secondary(q));
 549
 550	__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
 551	__guc_exec_queue_policy_add_preemption_timeout(&policy, 1);
 552
 553	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
 554		       __guc_exec_queue_policy_action_size(&policy), 0, 0);
 555}
 556
 557static bool vf_recovery(struct xe_guc *guc)
 558{
 559	return xe_gt_recovery_pending(guc_to_gt(guc));
 560}
 561
 562static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
 563{
 564	struct xe_guc *guc = exec_queue_to_guc(q);
 565	struct xe_device *xe = guc_to_xe(guc);
 566
 567	/** to wakeup xe_wait_user_fence ioctl if exec queue is reset */
 568	wake_up_all(&xe->ufence_wq);
 569
 570	xe_sched_tdr_queue_imm(&q->guc->sched);
 571}
 572
 573static void xe_guc_exec_queue_group_stop(struct xe_exec_queue *q)
 574{
 575	struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
 576	struct xe_exec_queue_group *group = q->multi_queue.group;
 577	struct xe_exec_queue *eq, *next;
 578	LIST_HEAD(tmp);
 579
 580	xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)),
 581		     xe_exec_queue_is_multi_queue(q));
 582
 583	mutex_lock(&group->list_lock);
 584
 585	/*
 586	 * Stop all future queues being from executing while group is stopped.
 587	 */
 588	group->stopped = true;
 589
 590	list_for_each_entry_safe(eq, next, &group->list, multi_queue.link)
 591		/*
 592		 * Refcount prevents an attempted removal from &group->list,
 593		 * temporary list allows safe iteration after dropping
 594		 * &group->list_lock.
 595		 */
 596		if (xe_exec_queue_get_unless_zero(eq))
 597			list_move_tail(&eq->multi_queue.link, &tmp);
 598
 599	mutex_unlock(&group->list_lock);
 600
 601	/* We cannot stop under list lock without getting inversions */
 602	xe_sched_submission_stop(&primary->guc->sched);
 603	list_for_each_entry(eq, &tmp, multi_queue.link)
 604		xe_sched_submission_stop(&eq->guc->sched);
 605
 606	mutex_lock(&group->list_lock);
 607	list_for_each_entry_safe(eq, next, &tmp, multi_queue.link) {
 608		/*
 609		 * Corner where we got banned while stopping and not on
 610		 * &group->list
 611		 */
 612		if (READ_ONCE(group->banned))
 613			xe_guc_exec_queue_trigger_cleanup(eq);
 614
 615		list_move_tail(&eq->multi_queue.link, &group->list);
 616		xe_exec_queue_put(eq);
 617	}
 618	mutex_unlock(&group->list_lock);
 619}
 620
 621static void xe_guc_exec_queue_group_start(struct xe_exec_queue *q)
 622{
 623	struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
 624	struct xe_exec_queue_group *group = q->multi_queue.group;
 625	struct xe_exec_queue *eq;
 626
 627	xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)),
 628		     xe_exec_queue_is_multi_queue(q));
 629
 630	xe_sched_submission_start(&primary->guc->sched);
 631
 632	mutex_lock(&group->list_lock);
 633	group->stopped = false;
 634	list_for_each_entry(eq, &group->list, multi_queue.link)
 635		xe_sched_submission_start(&eq->guc->sched);
 636	mutex_unlock(&group->list_lock);
 637}
 638
 639static void xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue *q)
 640{
 641	struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
 642	struct xe_exec_queue_group *group = q->multi_queue.group;
 643	struct xe_exec_queue *eq;
 644
 645	xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)),
 646		     xe_exec_queue_is_multi_queue(q));
 647
 648	/* Group banned, skip timeout check in TDR */
 649	WRITE_ONCE(group->banned, true);
 650	xe_guc_exec_queue_trigger_cleanup(primary);
 651
 652	mutex_lock(&group->list_lock);
 653	list_for_each_entry(eq, &group->list, multi_queue.link)
 654		xe_guc_exec_queue_trigger_cleanup(eq);
 655	mutex_unlock(&group->list_lock);
 656}
 657
 658static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q)
 659{
 660	if (xe_exec_queue_is_multi_queue(q)) {
 661		struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
 662		struct xe_exec_queue_group *group = q->multi_queue.group;
 663		struct xe_exec_queue *eq;
 664
 665		/* Group banned, skip timeout check in TDR */
 666		WRITE_ONCE(group->banned, true);
 667
 668		set_exec_queue_reset(primary);
 669		if (!exec_queue_banned(primary))
 670			xe_guc_exec_queue_trigger_cleanup(primary);
 671
 672		mutex_lock(&group->list_lock);
 673		list_for_each_entry(eq, &group->list, multi_queue.link) {
 674			set_exec_queue_reset(eq);
 675			if (!exec_queue_banned(eq))
 676				xe_guc_exec_queue_trigger_cleanup(eq);
 677		}
 678		mutex_unlock(&group->list_lock);
 679	} else {
 680		set_exec_queue_reset(q);
 681		if (!exec_queue_banned(q))
 682			xe_guc_exec_queue_trigger_cleanup(q);
 683	}
 684}
 685
 686static void set_exec_queue_group_banned(struct xe_exec_queue *q)
 687{
 688	struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
 689	struct xe_exec_queue_group *group = q->multi_queue.group;
 690	struct xe_exec_queue *eq;
 691
 692	/* Ban all queues of the multi-queue group */
 693	xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)),
 694		     xe_exec_queue_is_multi_queue(q));
 695	set_exec_queue_banned(primary);
 696
 697	mutex_lock(&group->list_lock);
 698	list_for_each_entry(eq, &group->list, multi_queue.link)
 699		set_exec_queue_banned(eq);
 700	mutex_unlock(&group->list_lock);
 701}
 702
 703/* Helper for context registration H2G */
 704struct guc_ctxt_registration_info {
 705	u32 flags;
 706	u32 context_idx;
 707	u32 engine_class;
 708	u32 engine_submit_mask;
 709	u32 wq_desc_lo;
 710	u32 wq_desc_hi;
 711	u32 wq_base_lo;
 712	u32 wq_base_hi;
 713	u32 wq_size;
 714	u32 cgp_lo;
 715	u32 cgp_hi;
 716	u32 hwlrca_lo;
 717	u32 hwlrca_hi;
 718};
 719
 720#define parallel_read(xe_, map_, field_) \
 721	xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
 722			field_)
 723#define parallel_write(xe_, map_, field_, val_) \
 724	xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
 725			field_, val_)
 726
 727/**
 728 * DOC: Multi Queue Group GuC interface
 729 *
 730 * The multi queue group coordination between KMD and GuC is through a software
 731 * construct called Context Group Page (CGP). The CGP is a KMD managed 4KB page
 732 * allocated in the global GTT.
 733 *
 734 * CGP format:
 735 *
 736 * +-----------+---------------------------+---------------------------------------------+
 737 * | DWORD     | Name                      | Description                                 |
 738 * +-----------+---------------------------+---------------------------------------------+
 739 * | 0         | Version                   | Bits [15:8]=Major ver, [7:0]=Minor ver      |
 740 * +-----------+---------------------------+---------------------------------------------+
 741 * | 1..15     | RESERVED                  | MBZ                                         |
 742 * +-----------+---------------------------+---------------------------------------------+
 743 * | 16        | KMD_QUEUE_UPDATE_MASK_DW0 | KMD queue mask for queues 31..0             |
 744 * +-----------+---------------------------+---------------------------------------------+
 745 * | 17        | KMD_QUEUE_UPDATE_MASK_DW1 | KMD queue mask for queues 63..32            |
 746 * +-----------+---------------------------+---------------------------------------------+
 747 * | 18..31    | RESERVED                  | MBZ                                         |
 748 * +-----------+---------------------------+---------------------------------------------+
 749 * | 32        | Q0CD_DW0                  | Queue 0 context LRC descriptor lower DWORD  |
 750 * +-----------+---------------------------+---------------------------------------------+
 751 * | 33        | Q0ContextIndex            | Context ID for Queue 0                      |
 752 * +-----------+---------------------------+---------------------------------------------+
 753 * | 34        | Q1CD_DW0                  | Queue 1 context LRC descriptor lower DWORD  |
 754 * +-----------+---------------------------+---------------------------------------------+
 755 * | 35        | Q1ContextIndex            | Context ID for Queue 1                      |
 756 * +-----------+---------------------------+---------------------------------------------+
 757 * | ...       |...                        | ...                                         |
 758 * +-----------+---------------------------+---------------------------------------------+
 759 * | 158       | Q63CD_DW0                 | Queue 63 context LRC descriptor lower DWORD |
 760 * +-----------+---------------------------+---------------------------------------------+
 761 * | 159       | Q63ContextIndex           | Context ID for Queue 63                     |
 762 * +-----------+---------------------------+---------------------------------------------+
 763 * | 160..1024 | RESERVED                  | MBZ                                         |
 764 * +-----------+---------------------------+---------------------------------------------+
 765 *
 766 * While registering Q0 with GuC, CGP is updated with Q0 entry and GuC is notified
 767 * through XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE H2G message which specifies
 768 * the CGP address. When the secondary queues are added to the group, the CGP is
 769 * updated with entry for that queue and GuC is notified through the H2G interface
 770 * XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC. GuC responds to these H2G messages
 771 * with a XE_GUC_ACTION_NOTIFY_MULTIQ_CONTEXT_CGP_SYNC_DONE G2H message. GuC also
 772 * sends a XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR notification for any
 773 * error in the CGP. Only one of these CGP update messages can be outstanding
 774 * (waiting for GuC response) at any time. The bits in KMD_QUEUE_UPDATE_MASK_DW*
 775 * fields indicate which queue entry is being updated in the CGP.
 776 *
 777 * The primary queue (Q0) represents the multi queue group context in GuC and
 778 * submission on any queue of the group must be through Q0 GuC interface only.
 779 *
 780 * As it is not required to register secondary queues with GuC, the secondary queue
 781 * context ids in the CGP are populated with Q0 context id.
 782 */
 783
 784#define CGP_VERSION_MAJOR_SHIFT	8
 785
 786static void xe_guc_exec_queue_group_cgp_update(struct xe_device *xe,
 787					       struct xe_exec_queue *q)
 788{
 789	struct xe_exec_queue_group *group = q->multi_queue.group;
 790	u32 guc_id = group->primary->guc->id;
 791
 792	/* Currently implementing CGP version 1.0 */
 793	xe_map_wr(xe, &group->cgp_bo->vmap, 0, u32,
 794		  1 << CGP_VERSION_MAJOR_SHIFT);
 795
 796	xe_map_wr(xe, &group->cgp_bo->vmap,
 797		  (32 + q->multi_queue.pos * 2) * sizeof(u32),
 798		  u32, lower_32_bits(xe_lrc_descriptor(q->lrc[0])));
 799
 800	xe_map_wr(xe, &group->cgp_bo->vmap,
 801		  (33 + q->multi_queue.pos * 2) * sizeof(u32),
 802		  u32, guc_id);
 803
 804	if (q->multi_queue.pos / 32) {
 805		xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32),
 806			  u32, BIT(q->multi_queue.pos % 32));
 807		xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), u32, 0);
 808	} else {
 809		xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32),
 810			  u32, BIT(q->multi_queue.pos));
 811		xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), u32, 0);
 812	}
 813}
 814
 815static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc,
 816					     struct xe_exec_queue *q,
 817					     const u32 *action, u32 len)
 818{
 819	struct xe_exec_queue_group *group = q->multi_queue.group;
 820	struct xe_device *xe = guc_to_xe(guc);
 821	enum xe_multi_queue_priority priority;
 822	long ret;
 823
 824	/*
 825	 * As all queues of a multi queue group use single drm scheduler
 826	 * submit workqueue, CGP synchronization with GuC are serialized.
 827	 * Hence, no locking is required here.
 828	 * Wait for any pending CGP_SYNC_DONE response before updating the
 829	 * CGP page and sending CGP_SYNC message.
 830	 *
 831	 * FIXME: Support VF migration
 832	 */
 833	ret = wait_event_timeout(guc->ct.wq,
 834				 !READ_ONCE(group->sync_pending) ||
 835				 xe_guc_read_stopped(guc), HZ);
 836	if (!ret || xe_guc_read_stopped(guc)) {
 837		/* CGP_SYNC failed. Reset gt, cleanup the group */
 838		xe_gt_warn(guc_to_gt(guc), "Wait for CGP_SYNC_DONE response failed!\n");
 839		set_exec_queue_group_banned(q);
 840		xe_gt_reset_async(q->gt);
 841		xe_guc_exec_queue_group_trigger_cleanup(q);
 842		return;
 843	}
 844
 845	scoped_guard(spinlock, &q->multi_queue.lock)
 846		priority = q->multi_queue.priority;
 847
 848	xe_lrc_set_multi_queue_priority(q->lrc[0], priority);
 849	xe_guc_exec_queue_group_cgp_update(xe, q);
 850
 851	WRITE_ONCE(group->sync_pending, true);
 852	xe_guc_ct_send(&guc->ct, action, len, G2H_LEN_DW_MULTI_QUEUE_CONTEXT, 1);
 853}
 854
 855static void __register_exec_queue_group(struct xe_guc *guc,
 856					struct xe_exec_queue *q,
 857					struct guc_ctxt_registration_info *info)
 858{
 859#define MAX_MULTI_QUEUE_REG_SIZE	(8)
 860	u32 action[MAX_MULTI_QUEUE_REG_SIZE];
 861	int len = 0;
 862
 863	action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE;
 864	action[len++] = info->flags;
 865	action[len++] = info->context_idx;
 866	action[len++] = info->engine_class;
 867	action[len++] = info->engine_submit_mask;
 868	action[len++] = 0; /* Reserved */
 869	action[len++] = info->cgp_lo;
 870	action[len++] = info->cgp_hi;
 871
 872	xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_REG_SIZE);
 873#undef MAX_MULTI_QUEUE_REG_SIZE
 874
 875	/*
 876	 * The above XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE do expect a
 877	 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response
 878	 * from guc.
 879	 */
 880	xe_guc_exec_queue_group_cgp_sync(guc, q, action, len);
 881}
 882
 883static void xe_guc_exec_queue_group_add(struct xe_guc *guc,
 884					struct xe_exec_queue *q)
 885{
 886#define MAX_MULTI_QUEUE_CGP_SYNC_SIZE  (2)
 887	u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE];
 888	int len = 0;
 889
 890	xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_multi_queue_secondary(q));
 891
 892	action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC;
 893	action[len++] = q->multi_queue.group->primary->guc->id;
 894
 895	xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE);
 896#undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE
 897
 898	/*
 899	 * The above XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC do expect a
 900	 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response
 901	 * from guc.
 902	 */
 903	xe_guc_exec_queue_group_cgp_sync(guc, q, action, len);
 904}
 905
 906static void __register_mlrc_exec_queue(struct xe_guc *guc,
 907				       struct xe_exec_queue *q,
 908				       struct guc_ctxt_registration_info *info)
 909{
 910#define MAX_MLRC_REG_SIZE      (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
 911	u32 action[MAX_MLRC_REG_SIZE];
 912	int len = 0;
 913	int i;
 914
 915	xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q));
 916
 917	action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
 918	action[len++] = info->flags;
 919	action[len++] = info->context_idx;
 920	action[len++] = info->engine_class;
 921	action[len++] = info->engine_submit_mask;
 922	action[len++] = info->wq_desc_lo;
 923	action[len++] = info->wq_desc_hi;
 924	action[len++] = info->wq_base_lo;
 925	action[len++] = info->wq_base_hi;
 926	action[len++] = info->wq_size;
 927	action[len++] = q->width;
 928	action[len++] = info->hwlrca_lo;
 929	action[len++] = info->hwlrca_hi;
 930
 931	for (i = 1; i < q->width; ++i) {
 932		struct xe_lrc *lrc = q->lrc[i];
 933
 934		action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
 935		action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
 936	}
 937
 938	/* explicitly checks some fields that we might fixup later */
 939	xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo ==
 940		     action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]);
 941	xe_gt_assert(guc_to_gt(guc), info->wq_base_lo ==
 942		     action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]);
 943	xe_gt_assert(guc_to_gt(guc), q->width ==
 944		     action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]);
 945	xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo ==
 946		     action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]);
 947	xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE);
 948#undef MAX_MLRC_REG_SIZE
 949
 950	xe_guc_ct_send(&guc->ct, action, len, 0, 0);
 951}
 952
 953static void __register_exec_queue(struct xe_guc *guc,
 954				  struct guc_ctxt_registration_info *info)
 955{
 956	u32 action[] = {
 957		XE_GUC_ACTION_REGISTER_CONTEXT,
 958		info->flags,
 959		info->context_idx,
 960		info->engine_class,
 961		info->engine_submit_mask,
 962		info->wq_desc_lo,
 963		info->wq_desc_hi,
 964		info->wq_base_lo,
 965		info->wq_base_hi,
 966		info->wq_size,
 967		info->hwlrca_lo,
 968		info->hwlrca_hi,
 969	};
 970
 971	/* explicitly checks some fields that we might fixup later */
 972	xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo ==
 973		     action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]);
 974	xe_gt_assert(guc_to_gt(guc), info->wq_base_lo ==
 975		     action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]);
 976	xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo ==
 977		     action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]);
 978
 979	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
 980}
 981
 982static void register_exec_queue(struct xe_exec_queue *q, int ctx_type)
 983{
 984	struct xe_guc *guc = exec_queue_to_guc(q);
 985	struct xe_device *xe = guc_to_xe(guc);
 986	struct xe_lrc *lrc = q->lrc[0];
 987	struct guc_ctxt_registration_info info;
 988
 989	xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q));
 990	xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT);
 991
 992	memset(&info, 0, sizeof(info));
 993	info.context_idx = q->guc->id;
 994	info.engine_class = xe_engine_class_to_guc_class(q->class);
 995	info.engine_submit_mask = q->logical_mask;
 996	info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
 997	info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
 998	info.flags = CONTEXT_REGISTRATION_FLAG_KMD |
 999		FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type);
1000
1001	if (xe_exec_queue_is_multi_queue(q)) {
1002		struct xe_exec_queue_group *group = q->multi_queue.group;
1003
1004		info.cgp_lo = xe_bo_ggtt_addr(group->cgp_bo);
1005		info.cgp_hi = 0;
1006	}
1007
1008	if (xe_exec_queue_is_parallel(q)) {
1009		u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
1010		struct iosys_map map = xe_lrc_parallel_map(lrc);
1011
1012		info.wq_desc_lo = lower_32_bits(ggtt_addr +
1013			offsetof(struct guc_submit_parallel_scratch, wq_desc));
1014		info.wq_desc_hi = upper_32_bits(ggtt_addr +
1015			offsetof(struct guc_submit_parallel_scratch, wq_desc));
1016		info.wq_base_lo = lower_32_bits(ggtt_addr +
1017			offsetof(struct guc_submit_parallel_scratch, wq[0]));
1018		info.wq_base_hi = upper_32_bits(ggtt_addr +
1019			offsetof(struct guc_submit_parallel_scratch, wq[0]));
1020		info.wq_size = WQ_SIZE;
1021
1022		q->guc->wqi_head = 0;
1023		q->guc->wqi_tail = 0;
1024		xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE);
1025		parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE);
1026	}
1027
1028	set_exec_queue_registered(q);
1029	trace_xe_exec_queue_register(q);
1030	if (xe_exec_queue_is_multi_queue_primary(q))
1031		__register_exec_queue_group(guc, q, &info);
1032	else if (xe_exec_queue_is_parallel(q))
1033		__register_mlrc_exec_queue(guc, q, &info);
1034	else if (!xe_exec_queue_is_multi_queue_secondary(q))
1035		__register_exec_queue(guc, &info);
1036
1037	if (!xe_exec_queue_is_multi_queue_secondary(q))
1038		init_policies(guc, q);
1039
1040	if (xe_exec_queue_is_multi_queue_secondary(q))
1041		xe_guc_exec_queue_group_add(guc, q);
1042}
1043
1044static u32 wq_space_until_wrap(struct xe_exec_queue *q)
1045{
1046	return (WQ_SIZE - q->guc->wqi_tail);
1047}
1048
1049static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size)
1050{
1051	struct xe_guc *guc = exec_queue_to_guc(q);
1052	struct xe_device *xe = guc_to_xe(guc);
1053	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
1054	unsigned int sleep_period_ms = 1, sleep_total_ms = 0;
1055
1056#define AVAILABLE_SPACE \
1057	CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE)
1058	if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) {
1059try_again:
1060		q->guc->wqi_head = parallel_read(xe, map, wq_desc.head);
1061		if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) {
1062			if (sleep_total_ms > 2000) {
1063				xe_gt_reset_async(q->gt);
1064				return -ENODEV;
1065			}
1066
1067			sleep_total_ms += xe_sleep_exponential_ms(&sleep_period_ms, 64);
1068			goto try_again;
1069		}
1070	}
1071#undef AVAILABLE_SPACE
1072
1073	return 0;
1074}
1075
1076static int wq_noop_append(struct xe_exec_queue *q)
1077{
1078	struct xe_guc *guc = exec_queue_to_guc(q);
1079	struct xe_device *xe = guc_to_xe(guc);
1080	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
1081	u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1;
1082
1083	if (wq_wait_for_space(q, wq_space_until_wrap(q)))
1084		return -ENODEV;
1085
1086	xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw));
1087
1088	parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)],
1089		       FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
1090		       FIELD_PREP(WQ_LEN_MASK, len_dw));
1091	q->guc->wqi_tail = 0;
1092
1093	return 0;
1094}
1095
1096static void wq_item_append(struct xe_exec_queue *q)
1097{
1098	struct xe_guc *guc = exec_queue_to_guc(q);
1099	struct xe_device *xe = guc_to_xe(guc);
1100	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
1101#define WQ_HEADER_SIZE	4	/* Includes 1 LRC address too */
1102	u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)];
1103	u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32);
1104	u32 len_dw = (wqi_size / sizeof(u32)) - 1;
1105	int i = 0, j;
1106
1107	if (wqi_size > wq_space_until_wrap(q)) {
1108		if (wq_noop_append(q))
1109			return;
1110	}
1111	if (wq_wait_for_space(q, wqi_size))
1112		return;
1113
1114	wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
1115		FIELD_PREP(WQ_LEN_MASK, len_dw);
1116	wqi[i++] = xe_lrc_descriptor(q->lrc[0]);
1117	wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) |
1118		FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64));
1119	wqi[i++] = 0;
1120	for (j = 1; j < q->width; ++j) {
1121		struct xe_lrc *lrc = q->lrc[j];
1122
1123		wqi[i++] = lrc->ring.tail / sizeof(u64);
1124	}
1125
1126	xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32));
1127
1128	iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch,
1129				      wq[q->guc->wqi_tail / sizeof(u32)]));
1130	xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
1131	q->guc->wqi_tail += wqi_size;
1132	xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE);
1133
1134	xe_device_wmb(xe);
1135
1136	map = xe_lrc_parallel_map(q->lrc[0]);
1137	parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail);
1138}
1139
1140#define RESUME_PENDING	~0x0ull
1141static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job)
1142{
1143	struct xe_guc *guc = exec_queue_to_guc(q);
1144	struct xe_lrc *lrc = q->lrc[0];
1145	u32 action[3];
1146	u32 g2h_len = 0;
1147	u32 num_g2h = 0;
1148	int len = 0;
1149	bool extra_submit = false;
1150
1151	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1152
1153	if (!job->restore_replay || job->last_replay) {
1154		if (xe_exec_queue_is_parallel(q))
1155			wq_item_append(q);
1156		else if (!exec_queue_idle_skip_suspend(q))
1157			xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
1158		job->last_replay = false;
1159	}
1160
1161	if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
1162		return;
1163
1164	/*
1165	 * All queues in a multi-queue group will use the primary queue
1166	 * of the group to interface with GuC.
1167	 */
1168	q = xe_exec_queue_multi_queue_primary(q);
1169
1170	if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) {
1171		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
1172		action[len++] = q->guc->id;
1173		action[len++] = GUC_CONTEXT_ENABLE;
1174		g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
1175		num_g2h = 1;
1176		if (xe_exec_queue_is_parallel(q))
1177			extra_submit = true;
1178
1179		q->guc->resume_time = RESUME_PENDING;
1180		set_exec_queue_pending_enable(q);
1181		set_exec_queue_enabled(q);
1182		trace_xe_exec_queue_scheduling_enable(q);
1183	} else {
1184		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
1185		action[len++] = q->guc->id;
1186		trace_xe_exec_queue_submit(q);
1187	}
1188
1189	xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h);
1190
1191	if (extra_submit) {
1192		len = 0;
1193		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
1194		action[len++] = q->guc->id;
1195		trace_xe_exec_queue_submit(q);
1196
1197		xe_guc_ct_send(&guc->ct, action, len, 0, 0);
1198	}
1199}
1200
1201static struct dma_fence *
1202guc_exec_queue_run_job(struct drm_sched_job *drm_job)
1203{
1204	struct xe_sched_job *job = to_xe_sched_job(drm_job);
1205	struct xe_exec_queue *q = job->q;
1206	struct xe_guc *guc = exec_queue_to_guc(q);
1207	bool killed_or_banned_or_wedged =
1208		exec_queue_killed_or_banned_or_wedged(q);
1209
1210	xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) ||
1211		     exec_queue_banned(q) || exec_queue_suspended(q));
1212
1213	trace_xe_sched_job_run(job);
1214
1215	if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) {
1216		if (xe_exec_queue_is_multi_queue_secondary(q)) {
1217			struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
1218
1219			if (exec_queue_killed_or_banned_or_wedged(primary)) {
1220				killed_or_banned_or_wedged = true;
1221				goto run_job_out;
1222			}
1223
1224			if (!exec_queue_registered(primary))
1225				register_exec_queue(primary, GUC_CONTEXT_NORMAL);
1226		}
1227
1228		if (!exec_queue_registered(q))
1229			register_exec_queue(q, GUC_CONTEXT_NORMAL);
1230		if (!job->restore_replay)
1231			q->ring_ops->emit_job(job);
1232		submit_exec_queue(q, job);
1233		job->restore_replay = false;
1234	}
1235
1236run_job_out:
1237
1238	return job->fence;
1239}
1240
1241static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
1242{
1243	struct xe_sched_job *job = to_xe_sched_job(drm_job);
1244
1245	trace_xe_sched_job_free(job);
1246	xe_sched_job_put(job);
1247}
1248
1249int xe_guc_read_stopped(struct xe_guc *guc)
1250{
1251	return atomic_read(&guc->submission_state.stopped);
1252}
1253
1254static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc,
1255						    struct xe_exec_queue *q,
1256						    u32 runnable_state);
1257static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q);
1258
1259#define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable)			\
1260	u32 action[] = {						\
1261		XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET,			\
1262		q->guc->id,						\
1263		GUC_CONTEXT_##enable_disable,				\
1264	}
1265
1266static void disable_scheduling_deregister(struct xe_guc *guc,
1267					  struct xe_exec_queue *q)
1268{
1269	MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
1270	int ret;
1271
1272	if (!xe_exec_queue_is_multi_queue_secondary(q))
1273		set_min_preemption_timeout(guc, q);
1274
1275	smp_rmb();
1276	ret = wait_event_timeout(guc->ct.wq,
1277				 (!exec_queue_pending_enable(q) &&
1278				  !exec_queue_pending_disable(q)) ||
1279					 xe_guc_read_stopped(guc) ||
1280					 vf_recovery(guc),
1281				 HZ * 5);
1282	if (!ret && !vf_recovery(guc)) {
1283		struct xe_gpu_scheduler *sched = &q->guc->sched;
1284
1285		xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n");
1286		xe_sched_submission_start(sched);
1287		xe_gt_reset_async(q->gt);
1288		xe_sched_tdr_queue_imm(sched);
1289		return;
1290	}
1291
1292	clear_exec_queue_enabled(q);
1293	set_exec_queue_pending_disable(q);
1294	set_exec_queue_destroyed(q);
1295	trace_xe_exec_queue_scheduling_disable(q);
1296
1297	/*
1298	 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H
1299	 * handler and we are not allowed to reserved G2H space in handlers.
1300	 */
1301	if (xe_exec_queue_is_multi_queue_secondary(q))
1302		handle_multi_queue_secondary_sched_done(guc, q, 0);
1303	else
1304		xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1305			       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
1306			       G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
1307}
1308
1309/**
1310 * xe_guc_submit_wedge() - Wedge GuC submission
1311 * @guc: the GuC object
1312 *
1313 * Save exec queue's registered with GuC state by taking a ref to each queue.
1314 * Register a DRMM handler to drop refs upon driver unload.
1315 */
1316void xe_guc_submit_wedge(struct xe_guc *guc)
1317{
1318	struct xe_device *xe = guc_to_xe(guc);
1319	struct xe_exec_queue *q;
1320	unsigned long index;
1321
1322	xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode);
1323
1324	/*
1325	 * If device is being wedged even before submission_state is
1326	 * initialized, there's nothing to do here.
1327	 */
1328	if (!guc->submission_state.initialized)
1329		return;
1330
1331	if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) {
1332		mutex_lock(&guc->submission_state.lock);
1333		xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
1334			if (xe_exec_queue_get_unless_zero(q))
1335				set_exec_queue_wedged(q);
1336		mutex_unlock(&guc->submission_state.lock);
1337	} else {
1338		/* Forcefully kill any remaining exec queues, signal fences */
1339		guc_submit_reset_prepare(guc);
1340		xe_guc_submit_stop(guc);
1341		xe_guc_softreset(guc);
1342		xe_uc_fw_sanitize(&guc->fw);
1343		xe_guc_submit_pause_abort(guc);
1344	}
1345}
1346
1347static bool guc_submit_hint_wedged(struct xe_guc *guc)
1348{
1349	struct xe_device *xe = guc_to_xe(guc);
1350
1351	if (xe->wedged.mode != XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET)
1352		return false;
1353
1354	if (xe_device_wedged(xe))
1355		return true;
1356
1357	xe_device_declare_wedged(xe);
1358
1359	return true;
1360}
1361
1362#define ADJUST_FIVE_PERCENT(__t)	mul_u64_u32_div(__t, 105, 100)
1363
1364static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job)
1365{
1366	struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q));
1367	u32 ctx_timestamp, ctx_job_timestamp;
1368	u32 timeout_ms = q->sched_props.job_timeout_ms;
1369	u32 diff;
1370	u64 running_time_ms;
1371
1372	if (!xe_sched_job_started(job)) {
1373		xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started",
1374			   xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1375			   q->guc->id);
1376
1377		return xe_sched_invalidate_job(job, 2);
1378	}
1379
1380	ctx_timestamp = lower_32_bits(xe_lrc_timestamp(q->lrc[0]));
1381	if (ctx_timestamp == job->sample_timestamp) {
1382		if (IS_SRIOV_VF(gt_to_xe(gt)))
1383			xe_gt_notice(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck",
1384				     xe_sched_job_seqno(job),
1385				     xe_sched_job_lrc_seqno(job), q->guc->id);
1386		else
1387			xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck",
1388				   xe_sched_job_seqno(job),
1389				   xe_sched_job_lrc_seqno(job), q->guc->id);
1390
1391		return xe_sched_invalidate_job(job, 0);
1392	}
1393
1394	job->sample_timestamp = ctx_timestamp;
1395	ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
1396
1397	/*
1398	 * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch
1399	 * possible overflows with a high timeout.
1400	 */
1401	xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC);
1402
1403	diff = ctx_timestamp - ctx_job_timestamp;
1404
1405	/*
1406	 * Ensure timeout is within 5% to account for an GuC scheduling latency
1407	 */
1408	running_time_ms =
1409		ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff));
1410
1411	xe_gt_dbg(gt,
1412		  "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x",
1413		  xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1414		  q->guc->id, running_time_ms, timeout_ms, diff);
1415
1416	return running_time_ms >= timeout_ms;
1417}
1418
1419static void enable_scheduling(struct xe_exec_queue *q)
1420{
1421	MAKE_SCHED_CONTEXT_ACTION(q, ENABLE);
1422	struct xe_guc *guc = exec_queue_to_guc(q);
1423	int ret;
1424
1425	xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1426	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1427	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1428	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
1429
1430	set_exec_queue_pending_enable(q);
1431	set_exec_queue_enabled(q);
1432	trace_xe_exec_queue_scheduling_enable(q);
1433
1434	if (xe_exec_queue_is_multi_queue_secondary(q))
1435		handle_multi_queue_secondary_sched_done(guc, q, 1);
1436	else
1437		xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1438			       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
1439
1440	ret = wait_event_timeout(guc->ct.wq,
1441				 !exec_queue_pending_enable(q) ||
1442				 xe_guc_read_stopped(guc) ||
1443				 vf_recovery(guc), HZ * 5);
1444	if ((!ret && !vf_recovery(guc)) || xe_guc_read_stopped(guc)) {
1445		xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond");
1446		set_exec_queue_banned(q);
1447		xe_gt_reset_async(q->gt);
1448		xe_sched_tdr_queue_imm(&q->guc->sched);
1449	}
1450}
1451
1452static void disable_scheduling(struct xe_exec_queue *q, bool immediate)
1453{
1454	MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
1455	struct xe_guc *guc = exec_queue_to_guc(q);
1456
1457	xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1458	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1459	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1460
1461	if (immediate && !xe_exec_queue_is_multi_queue_secondary(q))
1462		set_min_preemption_timeout(guc, q);
1463	clear_exec_queue_enabled(q);
1464	set_exec_queue_pending_disable(q);
1465	trace_xe_exec_queue_scheduling_disable(q);
1466
1467	if (xe_exec_queue_is_multi_queue_secondary(q))
1468		handle_multi_queue_secondary_sched_done(guc, q, 0);
1469	else
1470		xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1471			       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
1472}
1473
1474static enum drm_gpu_sched_stat
1475guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
1476{
1477	struct xe_sched_job *job = to_xe_sched_job(drm_job);
1478	struct drm_sched_job *tmp_job;
1479	struct xe_exec_queue *q = job->q, *primary;
1480	struct xe_gpu_scheduler *sched = &q->guc->sched;
1481	struct xe_guc *guc = exec_queue_to_guc(q);
1482	const char *process_name = "no process";
1483	struct xe_device *xe = guc_to_xe(guc);
1484	int err = -ETIME;
1485	pid_t pid = -1;
1486	bool wedged = false, skip_timeout_check;
1487
1488	xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1489
1490	primary = xe_exec_queue_multi_queue_primary(q);
1491
1492	/*
1493	 * TDR has fired before free job worker. Common if exec queue
1494	 * immediately closed after last fence signaled. Add back to pending
1495	 * list so job can be freed and kick scheduler ensuring free job is not
1496	 * lost.
1497	 */
1498	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags) ||
1499	    vf_recovery(guc))
1500		return DRM_GPU_SCHED_STAT_NO_HANG;
1501
1502	/* Kill the run_job entry point */
1503	if (xe_exec_queue_is_multi_queue(q))
1504		xe_guc_exec_queue_group_stop(q);
1505	else
1506		xe_sched_submission_stop(sched);
1507
1508	/* Must check all state after stopping scheduler */
1509	skip_timeout_check = exec_queue_reset(q) ||
1510		exec_queue_killed_or_banned_or_wedged(q);
1511
1512	/* Skip timeout check if multi-queue group is banned */
1513	if (xe_exec_queue_is_multi_queue(q) &&
1514	    READ_ONCE(q->multi_queue.group->banned))
1515		skip_timeout_check = true;
1516
1517	/* LR jobs can only get here if queue has been killed or hit an error */
1518	if (xe_exec_queue_is_lr(q))
1519		xe_gt_assert(guc_to_gt(guc), skip_timeout_check);
1520
1521	/*
1522	 * If devcoredump not captured and GuC capture for the job is not ready
1523	 * do manual capture first and decide later if we need to use it
1524	 */
1525	if (!exec_queue_killed(q) && !xe->devcoredump.captured &&
1526	    !xe_guc_capture_get_matching_and_lock(q)) {
1527		/* take force wake before engine register manual capture */
1528		CLASS(xe_force_wake, fw_ref)(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
1529		if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL))
1530			xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n");
1531
1532		xe_engine_snapshot_capture_for_queue(q);
1533	}
1534
1535	/*
1536	 * Check if job is actually timed out, if so restart job execution and TDR
1537	 */
1538	if (!skip_timeout_check && !check_timeout(q, job))
1539		goto rearm;
1540
1541	if (!exec_queue_killed(q))
1542		wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
1543
1544	set_exec_queue_banned(q);
1545
1546	/* Kick job / queue off hardware */
1547	if (!wedged && (exec_queue_enabled(primary) ||
1548			exec_queue_pending_disable(primary))) {
1549		int ret;
1550
1551		if (exec_queue_reset(primary))
1552			err = -EIO;
1553
1554		if (xe_uc_fw_is_running(&guc->fw)) {
1555			/*
1556			 * Wait for any pending G2H to flush out before
1557			 * modifying state
1558			 */
1559			ret = wait_event_timeout(guc->ct.wq,
1560						 (!exec_queue_pending_enable(primary) &&
1561						  !exec_queue_pending_disable(primary)) ||
1562						 xe_guc_read_stopped(guc) ||
1563						 vf_recovery(guc), HZ * 5);
1564			if (vf_recovery(guc))
1565				goto handle_vf_resume;
1566			if (!ret || xe_guc_read_stopped(guc))
1567				goto trigger_reset;
1568
1569			disable_scheduling(primary, skip_timeout_check);
1570		}
1571
1572		/*
1573		 * Must wait for scheduling to be disabled before signalling
1574		 * any fences, if GT broken the GT reset code should signal us.
1575		 *
1576		 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault
1577		 * error) messages which can cause the schedule disable to get
1578		 * lost. If this occurs, trigger a GT reset to recover.
1579		 */
1580		smp_rmb();
1581		ret = wait_event_timeout(guc->ct.wq,
1582					 !xe_uc_fw_is_running(&guc->fw) ||
1583					 !exec_queue_pending_disable(primary) ||
1584					 xe_guc_read_stopped(guc) ||
1585					 vf_recovery(guc), HZ * 5);
1586		if (vf_recovery(guc))
1587			goto handle_vf_resume;
1588		if (!ret || xe_guc_read_stopped(guc)) {
1589trigger_reset:
1590			if (!ret)
1591				xe_gt_warn(guc_to_gt(guc),
1592					   "Schedule disable failed to respond, guc_id=%d",
1593					   primary->guc->id);
1594			xe_devcoredump(primary, job,
1595				       "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d",
1596				       primary->guc->id, ret, xe_guc_read_stopped(guc));
1597			xe_gt_reset_async(primary->gt);
1598			xe_sched_tdr_queue_imm(sched);
1599			goto rearm;
1600		}
1601	}
1602
1603	if (q->vm && q->vm->xef) {
1604		process_name = q->vm->xef->process_name;
1605		pid = q->vm->xef->pid;
1606	}
1607
1608	if (!exec_queue_killed(q))
1609		xe_gt_notice(guc_to_gt(guc),
1610			     "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]",
1611			     xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1612			     q->guc->id, q->flags, process_name, pid);
1613
1614	trace_xe_sched_job_timedout(job);
1615
1616	if (!exec_queue_killed(q))
1617		xe_devcoredump(q, job,
1618			       "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx",
1619			       xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1620			       q->guc->id, q->flags);
1621
1622	/*
1623	 * Kernel jobs should never fail, nor should VM jobs if they do
1624	 * somethings has gone wrong and the GT needs a reset
1625	 */
1626	xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL,
1627		   "Kernel-submitted job timed out\n");
1628	xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
1629		   "VM job timed out on non-killed execqueue\n");
1630	if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL ||
1631			(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) {
1632		if (!xe_sched_invalidate_job(job, 2)) {
1633			xe_gt_reset_async(q->gt);
1634			goto rearm;
1635		}
1636	}
1637
1638	/* Mark all outstanding jobs as bad, thus completing them */
1639	xe_sched_job_set_error(job, err);
1640	drm_sched_for_each_pending_job(tmp_job, &sched->base, NULL)
1641		xe_sched_job_set_error(to_xe_sched_job(tmp_job), -ECANCELED);
1642
1643	if (xe_exec_queue_is_multi_queue(q)) {
1644		xe_guc_exec_queue_group_start(q);
1645		xe_guc_exec_queue_group_trigger_cleanup(q);
1646	} else {
1647		xe_sched_submission_start(sched);
1648		xe_guc_exec_queue_trigger_cleanup(q);
1649	}
1650
1651	/*
1652	 * We want the job added back to the pending list so it gets freed; this
1653	 * is what DRM_GPU_SCHED_STAT_NO_HANG does.
1654	 */
1655	return DRM_GPU_SCHED_STAT_NO_HANG;
1656
1657rearm:
1658	/*
1659	 * XXX: Ideally want to adjust timeout based on current execution time
1660	 * but there is not currently an easy way to do in DRM scheduler. With
1661	 * some thought, do this in a follow up.
1662	 */
1663	if (xe_exec_queue_is_multi_queue(q))
1664		xe_guc_exec_queue_group_start(q);
1665	else
1666		xe_sched_submission_start(sched);
1667handle_vf_resume:
1668	return DRM_GPU_SCHED_STAT_NO_HANG;
1669}
1670
1671static void guc_exec_queue_fini(struct xe_exec_queue *q)
1672{
1673	struct xe_guc_exec_queue *ge = q->guc;
1674	struct xe_guc *guc = exec_queue_to_guc(q);
1675
1676	release_guc_id(guc, q);
1677	xe_sched_entity_fini(&ge->entity);
1678	xe_sched_fini(&ge->sched);
1679
1680	/*
1681	 * RCU free due sched being exported via DRM scheduler fences
1682	 * (timeline name).
1683	 */
1684	kfree_rcu(ge, rcu);
1685}
1686
1687static void __guc_exec_queue_destroy_async(struct work_struct *w)
1688{
1689	struct xe_guc_exec_queue *ge =
1690		container_of(w, struct xe_guc_exec_queue, destroy_async);
1691	struct xe_exec_queue *q = ge->q;
1692	struct xe_guc *guc = exec_queue_to_guc(q);
1693
1694	guard(xe_pm_runtime)(guc_to_xe(guc));
1695	trace_xe_exec_queue_destroy(q);
1696
1697	if (xe_exec_queue_is_multi_queue_secondary(q)) {
1698		struct xe_exec_queue_group *group = q->multi_queue.group;
1699
1700		mutex_lock(&group->list_lock);
1701		list_del(&q->multi_queue.link);
1702		mutex_unlock(&group->list_lock);
1703	}
1704
1705	/* Confirm no work left behind accessing device structures */
1706	cancel_delayed_work_sync(&ge->sched.base.work_tdr);
1707
1708	xe_exec_queue_fini(q);
1709}
1710
1711static void guc_exec_queue_destroy_async(struct xe_exec_queue *q)
1712{
1713	struct xe_guc *guc = exec_queue_to_guc(q);
1714	struct xe_device *xe = guc_to_xe(guc);
1715
1716	INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async);
1717
1718	/* We must block on kernel engines so slabs are empty on driver unload */
1719	if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q))
1720		__guc_exec_queue_destroy_async(&q->guc->destroy_async);
1721	else
1722		queue_work(xe->destroy_wq, &q->guc->destroy_async);
1723}
1724
1725static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q)
1726{
1727	/*
1728	 * Might be done from within the GPU scheduler, need to do async as we
1729	 * fini the scheduler when the engine is fini'd, the scheduler can't
1730	 * complete fini within itself (circular dependency). Async resolves
1731	 * this we and don't really care when everything is fini'd, just that it
1732	 * is.
1733	 */
1734	guc_exec_queue_destroy_async(q);
1735}
1736
1737static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
1738{
1739	struct xe_exec_queue *q = msg->private_data;
1740	struct xe_guc *guc = exec_queue_to_guc(q);
1741
1742	xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
1743	trace_xe_exec_queue_cleanup_entity(q);
1744
1745	/*
1746	 * Expected state transitions for cleanup:
1747	 * - If the exec queue is registered and GuC firmware is running, we must first
1748	 *   disable scheduling and deregister the queue to ensure proper teardown and
1749	 *   resource release in the GuC, then destroy the exec queue on driver side.
1750	 * - If the GuC is already stopped (e.g., during driver unload or GPU reset),
1751	 *   we cannot expect a response for the deregister request. In this case,
1752	 *   it is safe to directly destroy the exec queue on driver side, as the GuC
1753	 *   will not process further requests and all resources must be cleaned up locally.
1754	 */
1755	if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw))
1756		disable_scheduling_deregister(guc, q);
1757	else
1758		__guc_exec_queue_destroy(guc, q);
1759}
1760
1761static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q)
1762{
1763	return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q);
1764}
1765
1766static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg)
1767{
1768	struct xe_exec_queue *q = msg->private_data;
1769	struct xe_guc *guc = exec_queue_to_guc(q);
1770
1771	if (guc_exec_queue_allowed_to_change_state(q))
1772		init_policies(guc, q);
1773	kfree(msg);
1774}
1775
1776static void __suspend_fence_signal(struct xe_exec_queue *q)
1777{
1778	struct xe_guc *guc = exec_queue_to_guc(q);
1779	struct xe_device *xe = guc_to_xe(guc);
1780
1781	if (!q->guc->suspend_pending)
1782		return;
1783
1784	WRITE_ONCE(q->guc->suspend_pending, false);
1785
1786	/*
1787	 * We use a GuC shared wait queue for VFs because the VF resfix start
1788	 * interrupt must be able to wake all instances of suspend_wait. This
1789	 * prevents the VF migration worker from being starved during
1790	 * scheduling.
1791	 */
1792	if (IS_SRIOV_VF(xe))
1793		wake_up_all(&guc->ct.wq);
1794	else
1795		wake_up(&q->guc->suspend_wait);
1796}
1797
1798static void suspend_fence_signal(struct xe_exec_queue *q)
1799{
1800	struct xe_guc *guc = exec_queue_to_guc(q);
1801
1802	xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) ||
1803		     xe_guc_read_stopped(guc));
1804	xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending);
1805
1806	__suspend_fence_signal(q);
1807}
1808
1809static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
1810{
1811	struct xe_exec_queue *q = msg->private_data;
1812	struct xe_guc *guc = exec_queue_to_guc(q);
1813	bool idle_skip_suspend = xe_exec_queue_idle_skip_suspend(q);
1814
1815	if (!idle_skip_suspend && guc_exec_queue_allowed_to_change_state(q) &&
1816	    !exec_queue_suspended(q) && exec_queue_enabled(q)) {
1817		wait_event(guc->ct.wq, vf_recovery(guc) ||
1818			   ((q->guc->resume_time != RESUME_PENDING ||
1819			   xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q)));
1820
1821		if (!xe_guc_read_stopped(guc)) {
1822			s64 since_resume_ms =
1823				ktime_ms_delta(ktime_get(),
1824					       q->guc->resume_time);
1825			s64 wait_ms = q->vm->preempt.min_run_period_ms -
1826				since_resume_ms;
1827
1828			if (wait_ms > 0 && q->guc->resume_time)
1829				xe_sleep_relaxed_ms(wait_ms);
1830
1831			set_exec_queue_suspended(q);
1832			disable_scheduling(q, false);
1833		}
1834	} else if (q->guc->suspend_pending) {
1835		if (idle_skip_suspend)
1836			set_exec_queue_idle_skip_suspend(q);
1837		set_exec_queue_suspended(q);
1838		suspend_fence_signal(q);
1839	}
1840}
1841
1842static void sched_context(struct xe_exec_queue *q)
1843{
1844	struct xe_guc *guc = exec_queue_to_guc(q);
1845	struct xe_lrc *lrc = q->lrc[0];
1846	u32 action[] = {
1847		XE_GUC_ACTION_SCHED_CONTEXT,
1848		q->guc->id,
1849	};
1850
1851	xe_gt_assert(guc_to_gt(guc), !xe_exec_queue_is_parallel(q));
1852	xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1853	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1854	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1855
1856	trace_xe_exec_queue_submit(q);
1857
1858	xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
1859	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
1860}
1861
1862static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
1863{
1864	struct xe_exec_queue *q = msg->private_data;
1865
1866	if (guc_exec_queue_allowed_to_change_state(q)) {
1867		clear_exec_queue_suspended(q);
1868		if (!exec_queue_enabled(q)) {
1869			if (exec_queue_idle_skip_suspend(q)) {
1870				struct xe_lrc *lrc = q->lrc[0];
1871
1872				clear_exec_queue_idle_skip_suspend(q);
1873				xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
1874			}
1875			q->guc->resume_time = RESUME_PENDING;
1876			set_exec_queue_pending_resume(q);
1877			enable_scheduling(q);
1878		} else if (exec_queue_idle_skip_suspend(q)) {
1879			clear_exec_queue_idle_skip_suspend(q);
1880			sched_context(q);
1881		}
1882	} else {
1883		clear_exec_queue_suspended(q);
1884		clear_exec_queue_idle_skip_suspend(q);
1885	}
1886}
1887
1888static void __guc_exec_queue_process_msg_set_multi_queue_priority(struct xe_sched_msg *msg)
1889{
1890	struct xe_exec_queue *q = msg->private_data;
1891
1892	if (guc_exec_queue_allowed_to_change_state(q)) {
1893#define MAX_MULTI_QUEUE_CGP_SYNC_SIZE        (2)
1894		struct xe_guc *guc = exec_queue_to_guc(q);
1895		struct xe_exec_queue_group *group = q->multi_queue.group;
1896		u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE];
1897		int len = 0;
1898
1899		action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC;
1900		action[len++] = group->primary->guc->id;
1901
1902		xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE);
1903#undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE
1904
1905		xe_guc_exec_queue_group_cgp_sync(guc, q, action, len);
1906	}
1907
1908	kfree(msg);
1909}
1910
1911#define CLEANUP				1	/* Non-zero values to catch uninitialized msg */
1912#define SET_SCHED_PROPS			2
1913#define SUSPEND				3
1914#define RESUME				4
1915#define SET_MULTI_QUEUE_PRIORITY	5
1916#define OPCODE_MASK	0xf
1917#define MSG_LOCKED	BIT(8)
1918#define MSG_HEAD	BIT(9)
1919
1920static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
1921{
1922	struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data));
1923
1924	trace_xe_sched_msg_recv(msg);
1925
1926	switch (msg->opcode) {
1927	case CLEANUP:
1928		__guc_exec_queue_process_msg_cleanup(msg);
1929		break;
1930	case SET_SCHED_PROPS:
1931		__guc_exec_queue_process_msg_set_sched_props(msg);
1932		break;
1933	case SUSPEND:
1934		__guc_exec_queue_process_msg_suspend(msg);
1935		break;
1936	case RESUME:
1937		__guc_exec_queue_process_msg_resume(msg);
1938		break;
1939	case SET_MULTI_QUEUE_PRIORITY:
1940		__guc_exec_queue_process_msg_set_multi_queue_priority(msg);
1941		break;
1942	default:
1943		XE_WARN_ON("Unknown message type");
1944	}
1945
1946	xe_pm_runtime_put(xe);
1947}
1948
1949static const struct drm_sched_backend_ops drm_sched_ops = {
1950	.run_job = guc_exec_queue_run_job,
1951	.free_job = guc_exec_queue_free_job,
1952	.timedout_job = guc_exec_queue_timedout_job,
1953};
1954
1955static const struct xe_sched_backend_ops xe_sched_ops = {
1956	.process_msg = guc_exec_queue_process_msg,
1957};
1958
1959static int guc_exec_queue_init(struct xe_exec_queue *q)
1960{
1961	struct xe_gpu_scheduler *sched;
1962	struct xe_guc *guc = exec_queue_to_guc(q);
1963	struct workqueue_struct *submit_wq = NULL;
1964	struct xe_guc_exec_queue *ge;
1965	long timeout;
1966	int err, i;
1967
1968	xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc)));
1969
1970	ge = kzalloc_obj(*ge);
1971	if (!ge)
1972		return -ENOMEM;
1973
1974	q->guc = ge;
1975	ge->q = q;
1976	init_rcu_head(&ge->rcu);
1977	init_waitqueue_head(&ge->suspend_wait);
1978
1979	for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i)
1980		INIT_LIST_HEAD(&ge->static_msgs[i].link);
1981
1982	timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
1983		  msecs_to_jiffies(q->sched_props.job_timeout_ms);
1984
1985	/*
1986	 * Use primary queue's submit_wq for all secondary queues of a
1987	 * multi queue group. This serialization avoids any locking around
1988	 * CGP synchronization with GuC.
1989	 */
1990	if (xe_exec_queue_is_multi_queue_secondary(q)) {
1991		struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
1992
1993		submit_wq = primary->guc->sched.base.submit_wq;
1994	}
1995
1996	err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
1997			    submit_wq, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64,
1998			    timeout, guc_to_gt(guc)->ordered_wq, NULL,
1999			    q->name, gt_to_xe(q->gt)->drm.dev);
2000	if (err)
2001		goto err_free;
2002
2003	sched = &ge->sched;
2004	err = xe_sched_entity_init(&ge->entity, sched);
2005	if (err)
2006		goto err_sched;
2007
2008	mutex_lock(&guc->submission_state.lock);
2009
2010	err = alloc_guc_id(guc, q);
2011	if (err)
2012		goto err_entity;
2013
2014	q->entity = &ge->entity;
2015
2016	if (xe_guc_read_stopped(guc) || vf_recovery(guc))
2017		xe_sched_stop(sched);
2018
2019	mutex_unlock(&guc->submission_state.lock);
2020
2021	xe_exec_queue_assign_name(q, q->guc->id);
2022
2023	/*
2024	 * Maintain secondary queues of the multi queue group in a list
2025	 * for handling dependencies across the queues in the group.
2026	 */
2027	if (xe_exec_queue_is_multi_queue_secondary(q)) {
2028		struct xe_exec_queue_group *group = q->multi_queue.group;
2029
2030		INIT_LIST_HEAD(&q->multi_queue.link);
2031		mutex_lock(&group->list_lock);
2032		if (group->stopped)
2033			WRITE_ONCE(q->guc->sched.base.pause_submit, true);
2034		list_add_tail(&q->multi_queue.link, &group->list);
2035		mutex_unlock(&group->list_lock);
2036	}
2037
2038	if (xe_exec_queue_is_multi_queue(q))
2039		trace_xe_exec_queue_create_multi_queue(q);
2040	else
2041		trace_xe_exec_queue_create(q);
2042
2043	return 0;
2044
2045err_entity:
2046	mutex_unlock(&guc->submission_state.lock);
2047	xe_sched_entity_fini(&ge->entity);
2048err_sched:
2049	xe_sched_fini(&ge->sched);
2050err_free:
2051	kfree(ge);
2052
2053	return err;
2054}
2055
2056static void guc_exec_queue_kill(struct xe_exec_queue *q)
2057{
2058	trace_xe_exec_queue_kill(q);
2059	set_exec_queue_killed(q);
2060	__suspend_fence_signal(q);
2061	xe_guc_exec_queue_trigger_cleanup(q);
2062}
2063
2064static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg,
2065				   u32 opcode)
2066{
2067	xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q)));
2068
2069	INIT_LIST_HEAD(&msg->link);
2070	msg->opcode = opcode & OPCODE_MASK;
2071	msg->private_data = q;
2072
2073	trace_xe_sched_msg_add(msg);
2074	if (opcode & MSG_HEAD)
2075		xe_sched_add_msg_head(&q->guc->sched, msg);
2076	else if (opcode & MSG_LOCKED)
2077		xe_sched_add_msg_locked(&q->guc->sched, msg);
2078	else
2079		xe_sched_add_msg(&q->guc->sched, msg);
2080}
2081
2082static void guc_exec_queue_try_add_msg_head(struct xe_exec_queue *q,
2083					    struct xe_sched_msg *msg,
2084					    u32 opcode)
2085{
2086	if (!list_empty(&msg->link))
2087		return;
2088
2089	guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED | MSG_HEAD);
2090}
2091
2092static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q,
2093				       struct xe_sched_msg *msg,
2094				       u32 opcode)
2095{
2096	if (!list_empty(&msg->link))
2097		return false;
2098
2099	guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED);
2100
2101	return true;
2102}
2103
2104#define STATIC_MSG_CLEANUP	0
2105#define STATIC_MSG_SUSPEND	1
2106#define STATIC_MSG_RESUME	2
2107static void guc_exec_queue_destroy(struct xe_exec_queue *q)
2108{
2109	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
2110
2111	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q))
2112		guc_exec_queue_add_msg(q, msg, CLEANUP);
2113	else
2114		__guc_exec_queue_destroy(exec_queue_to_guc(q), q);
2115}
2116
2117static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
2118				       enum xe_exec_queue_priority priority)
2119{
2120	struct xe_sched_msg *msg;
2121
2122	if (q->sched_props.priority == priority ||
2123	    exec_queue_killed_or_banned_or_wedged(q))
2124		return 0;
2125
2126	msg = kmalloc_obj(*msg);
2127	if (!msg)
2128		return -ENOMEM;
2129
2130	q->sched_props.priority = priority;
2131	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
2132
2133	return 0;
2134}
2135
2136static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
2137{
2138	struct xe_sched_msg *msg;
2139
2140	if (q->sched_props.timeslice_us == timeslice_us ||
2141	    exec_queue_killed_or_banned_or_wedged(q))
2142		return 0;
2143
2144	msg = kmalloc_obj(*msg);
2145	if (!msg)
2146		return -ENOMEM;
2147
2148	q->sched_props.timeslice_us = timeslice_us;
2149	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
2150
2151	return 0;
2152}
2153
2154static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
2155					      u32 preempt_timeout_us)
2156{
2157	struct xe_sched_msg *msg;
2158
2159	if (q->sched_props.preempt_timeout_us == preempt_timeout_us ||
2160	    exec_queue_killed_or_banned_or_wedged(q))
2161		return 0;
2162
2163	msg = kmalloc_obj(*msg);
2164	if (!msg)
2165		return -ENOMEM;
2166
2167	q->sched_props.preempt_timeout_us = preempt_timeout_us;
2168	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
2169
2170	return 0;
2171}
2172
2173static int guc_exec_queue_set_multi_queue_priority(struct xe_exec_queue *q,
2174						   enum xe_multi_queue_priority priority)
2175{
2176	struct xe_sched_msg *msg;
2177
2178	xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), xe_exec_queue_is_multi_queue(q));
2179
2180	if (exec_queue_killed_or_banned_or_wedged(q))
2181		return 0;
2182
2183	msg = kmalloc_obj(*msg);
2184	if (!msg)
2185		return -ENOMEM;
2186
2187	scoped_guard(spinlock, &q->multi_queue.lock) {
2188		if (q->multi_queue.priority == priority) {
2189			kfree(msg);
2190			return 0;
2191		}
2192
2193		q->multi_queue.priority = priority;
2194	}
2195
2196	guc_exec_queue_add_msg(q, msg, SET_MULTI_QUEUE_PRIORITY);
2197
2198	return 0;
2199}
2200
2201static int guc_exec_queue_suspend(struct xe_exec_queue *q)
2202{
2203	struct xe_gpu_scheduler *sched = &q->guc->sched;
2204	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
2205
2206	if (exec_queue_killed_or_banned_or_wedged(q))
2207		return -EINVAL;
2208
2209	xe_sched_msg_lock(sched);
2210	if (guc_exec_queue_try_add_msg(q, msg, SUSPEND))
2211		q->guc->suspend_pending = true;
2212	xe_sched_msg_unlock(sched);
2213
2214	return 0;
2215}
2216
2217static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
2218{
2219	struct xe_guc *guc = exec_queue_to_guc(q);
2220	struct xe_device *xe = guc_to_xe(guc);
2221	int ret;
2222
2223	/*
2224	 * Likely don't need to check exec_queue_killed() as we clear
2225	 * suspend_pending upon kill but to be paranoid but races in which
2226	 * suspend_pending is set after kill also check kill here.
2227	 */
2228#define WAIT_COND \
2229	(!READ_ONCE(q->guc->suspend_pending) ||	exec_queue_killed(q) || \
2230	 xe_guc_read_stopped(guc))
2231
2232retry:
2233	if (IS_SRIOV_VF(xe))
2234		ret = wait_event_interruptible_timeout(guc->ct.wq, WAIT_COND ||
2235						       vf_recovery(guc),
2236						       HZ * 5);
2237	else
2238		ret = wait_event_interruptible_timeout(q->guc->suspend_wait,
2239						       WAIT_COND, HZ * 5);
2240
2241	if (vf_recovery(guc) && !xe_device_wedged((guc_to_xe(guc))))
2242		return -EAGAIN;
2243
2244	if (!ret) {
2245		xe_gt_warn(guc_to_gt(guc),
2246			   "Suspend fence, guc_id=%d, failed to respond",
2247			   q->guc->id);
2248		/* XXX: Trigger GT reset? */
2249		return -ETIME;
2250	} else if (IS_SRIOV_VF(xe) && !WAIT_COND) {
2251		/* Corner case on RESFIX DONE where vf_recovery() changes */
2252		goto retry;
2253	}
2254
2255#undef WAIT_COND
2256
2257	return ret < 0 ? ret : 0;
2258}
2259
2260static void guc_exec_queue_resume(struct xe_exec_queue *q)
2261{
2262	struct xe_gpu_scheduler *sched = &q->guc->sched;
2263	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME;
2264	struct xe_guc *guc = exec_queue_to_guc(q);
2265
2266	xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending);
2267
2268	xe_sched_msg_lock(sched);
2269	guc_exec_queue_try_add_msg(q, msg, RESUME);
2270	xe_sched_msg_unlock(sched);
2271}
2272
2273static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
2274{
2275	if (xe_exec_queue_is_multi_queue_secondary(q) &&
2276	    guc_exec_queue_reset_status(xe_exec_queue_multi_queue_primary(q)))
2277		return true;
2278
2279	return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q);
2280}
2281
2282static bool guc_exec_queue_active(struct xe_exec_queue *q)
2283{
2284	struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
2285
2286	return exec_queue_enabled(primary) &&
2287		!exec_queue_pending_disable(primary);
2288}
2289
2290/*
2291 * All of these functions are an abstraction layer which other parts of Xe can
2292 * use to trap into the GuC backend. All of these functions, aside from init,
2293 * really shouldn't do much other than trap into the DRM scheduler which
2294 * synchronizes these operations.
2295 */
2296static const struct xe_exec_queue_ops guc_exec_queue_ops = {
2297	.init = guc_exec_queue_init,
2298	.kill = guc_exec_queue_kill,
2299	.fini = guc_exec_queue_fini,
2300	.destroy = guc_exec_queue_destroy,
2301	.set_priority = guc_exec_queue_set_priority,
2302	.set_timeslice = guc_exec_queue_set_timeslice,
2303	.set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
2304	.set_multi_queue_priority = guc_exec_queue_set_multi_queue_priority,
2305	.suspend = guc_exec_queue_suspend,
2306	.suspend_wait = guc_exec_queue_suspend_wait,
2307	.resume = guc_exec_queue_resume,
2308	.reset_status = guc_exec_queue_reset_status,
2309	.active = guc_exec_queue_active,
2310};
2311
2312static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
2313{
2314	struct xe_gpu_scheduler *sched = &q->guc->sched;
2315	bool do_destroy = false;
2316
2317	/* Stop scheduling + flush any DRM scheduler operations */
2318	xe_sched_submission_stop(sched);
2319
2320	/* Clean up lost G2H + reset engine state */
2321	if (exec_queue_registered(q)) {
2322		if (exec_queue_destroyed(q))
2323			do_destroy = true;
2324	}
2325	if (q->guc->suspend_pending) {
2326		set_exec_queue_suspended(q);
2327		suspend_fence_signal(q);
2328	}
2329	atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED |
2330		   EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED |
2331		   EXEC_QUEUE_STATE_SUSPENDED,
2332		   &q->guc->state);
2333	q->guc->resume_time = 0;
2334	trace_xe_exec_queue_stop(q);
2335
2336	/*
2337	 * Ban any engine (aside from kernel and engines used for VM ops) with a
2338	 * started but not complete job or if a job has gone through a GT reset
2339	 * more than twice.
2340	 */
2341	if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
2342		struct xe_sched_job *job = xe_sched_first_pending_job(sched);
2343		bool ban = false;
2344
2345		if (job) {
2346			if ((xe_sched_job_started(job) &&
2347			    !xe_sched_job_completed(job)) ||
2348			    xe_sched_invalidate_job(job, 2)) {
2349				trace_xe_sched_job_ban(job);
2350				ban = true;
2351			}
2352		}
2353
2354		if (ban) {
2355			set_exec_queue_banned(q);
2356			xe_guc_exec_queue_trigger_cleanup(q);
2357		}
2358	}
2359
2360	if (do_destroy)
2361		__guc_exec_queue_destroy(guc, q);
2362}
2363
2364static int guc_submit_reset_prepare(struct xe_guc *guc)
2365{
2366	int ret;
2367
2368	/*
2369	 * Using an atomic here rather than submission_state.lock as this
2370	 * function can be called while holding the CT lock (engine reset
2371	 * failure). submission_state.lock needs the CT lock to resubmit jobs.
2372	 * Atomic is not ideal, but it works to prevent against concurrent reset
2373	 * and releasing any TDRs waiting on guc->submission_state.stopped.
2374	 */
2375	ret = atomic_fetch_or(1, &guc->submission_state.stopped);
2376	smp_wmb();
2377	wake_up_all(&guc->ct.wq);
2378
2379	return ret;
2380}
2381
2382int xe_guc_submit_reset_prepare(struct xe_guc *guc)
2383{
2384	if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc)))
2385		return 0;
2386
2387	if (!guc->submission_state.initialized)
2388		return 0;
2389
2390	return guc_submit_reset_prepare(guc);
2391}
2392
2393void xe_guc_submit_reset_wait(struct xe_guc *guc)
2394{
2395	wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) ||
2396		   !xe_guc_read_stopped(guc));
2397}
2398
2399void xe_guc_submit_stop(struct xe_guc *guc)
2400{
2401	struct xe_exec_queue *q;
2402	unsigned long index;
2403
2404	xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1);
2405
2406	mutex_lock(&guc->submission_state.lock);
2407
2408	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2409		/* Prevent redundant attempts to stop parallel queues */
2410		if (q->guc->id != index)
2411			continue;
2412
2413		guc_exec_queue_stop(guc, q);
2414	}
2415
2416	mutex_unlock(&guc->submission_state.lock);
2417
2418	/*
2419	 * No one can enter the backend at this point, aside from new engine
2420	 * creation which is protected by guc->submission_state.lock.
2421	 */
2422
2423}
2424
2425static void guc_exec_queue_revert_pending_state_change(struct xe_guc *guc,
2426						       struct xe_exec_queue *q)
2427{
2428	bool pending_enable, pending_disable, pending_resume;
2429
2430	pending_enable = exec_queue_pending_enable(q);
2431	pending_resume = exec_queue_pending_resume(q);
2432
2433	if (pending_enable && pending_resume) {
2434		q->guc->needs_resume = true;
2435		xe_gt_dbg(guc_to_gt(guc), "Replay RESUME - guc_id=%d",
2436			  q->guc->id);
2437	}
2438
2439	if (pending_enable && !pending_resume) {
2440		clear_exec_queue_registered(q);
2441		xe_gt_dbg(guc_to_gt(guc), "Replay REGISTER - guc_id=%d",
2442			  q->guc->id);
2443	}
2444
2445	if (pending_enable) {
2446		clear_exec_queue_enabled(q);
2447		clear_exec_queue_pending_resume(q);
2448		clear_exec_queue_pending_enable(q);
2449		xe_gt_dbg(guc_to_gt(guc), "Replay ENABLE - guc_id=%d",
2450			  q->guc->id);
2451	}
2452
2453	if (exec_queue_destroyed(q) && exec_queue_registered(q)) {
2454		clear_exec_queue_destroyed(q);
2455		q->guc->needs_cleanup = true;
2456		xe_gt_dbg(guc_to_gt(guc), "Replay CLEANUP - guc_id=%d",
2457			  q->guc->id);
2458	}
2459
2460	pending_disable = exec_queue_pending_disable(q);
2461
2462	if (pending_disable && exec_queue_suspended(q)) {
2463		clear_exec_queue_suspended(q);
2464		q->guc->needs_suspend = true;
2465		xe_gt_dbg(guc_to_gt(guc), "Replay SUSPEND - guc_id=%d",
2466			  q->guc->id);
2467	}
2468
2469	if (pending_disable) {
2470		if (!pending_enable)
2471			set_exec_queue_enabled(q);
2472		clear_exec_queue_pending_disable(q);
2473		xe_gt_dbg(guc_to_gt(guc), "Replay DISABLE - guc_id=%d",
2474			  q->guc->id);
2475	}
2476
2477	q->guc->resume_time = 0;
2478}
2479
2480static void lrc_parallel_clear(struct xe_lrc *lrc)
2481{
2482	struct xe_device *xe = gt_to_xe(lrc->gt);
2483	struct iosys_map map = xe_lrc_parallel_map(lrc);
2484	int i;
2485
2486	for (i = 0; i < WQ_SIZE / sizeof(u32); ++i)
2487		parallel_write(xe, map, wq[i],
2488			       FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
2489			       FIELD_PREP(WQ_LEN_MASK, 0));
2490}
2491
2492/*
2493 * This function is quite complex but only real way to ensure no state is lost
2494 * during VF resume flows. The function scans the queue state, make adjustments
2495 * as needed, and queues jobs / messages which replayed upon unpause.
2496 */
2497static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q)
2498{
2499	struct xe_gpu_scheduler *sched = &q->guc->sched;
2500	struct xe_sched_job *job;
2501	int i;
2502
2503	lockdep_assert_held(&guc->submission_state.lock);
2504
2505	/* Stop scheduling + flush any DRM scheduler operations */
2506	xe_sched_submission_stop(sched);
2507	cancel_delayed_work_sync(&sched->base.work_tdr);
2508
2509	guc_exec_queue_revert_pending_state_change(guc, q);
2510
2511	if (xe_exec_queue_is_parallel(q)) {
2512		/* Pairs with WRITE_ONCE in __xe_exec_queue_init  */
2513		struct xe_lrc *lrc = READ_ONCE(q->lrc[0]);
2514
2515		/*
2516		 * NOP existing WQ commands that may contain stale GGTT
2517		 * addresses. These will be replayed upon unpause. The hardware
2518		 * seems to get confused if the WQ head/tail pointers are
2519		 * adjusted.
2520		 */
2521		if (lrc)
2522			lrc_parallel_clear(lrc);
2523	}
2524
2525	job = xe_sched_first_pending_job(sched);
2526	if (job) {
2527		job->restore_replay = true;
2528
2529		/*
2530		 * Adjust software tail so jobs submitted overwrite previous
2531		 * position in ring buffer with new GGTT addresses.
2532		 */
2533		for (i = 0; i < q->width; ++i)
2534			q->lrc[i]->ring.tail = job->ptrs[i].head;
2535	}
2536}
2537
2538/**
2539 * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC.
2540 * @guc: the &xe_guc struct instance whose scheduler is to be disabled
2541 */
2542void xe_guc_submit_pause(struct xe_guc *guc)
2543{
2544	struct xe_exec_queue *q;
2545	unsigned long index;
2546
2547	mutex_lock(&guc->submission_state.lock);
2548	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
2549		xe_sched_submission_stop(&q->guc->sched);
2550	mutex_unlock(&guc->submission_state.lock);
2551}
2552
2553/**
2554 * xe_guc_submit_pause_vf - Stop further runs of submission tasks for VF.
2555 * @guc: the &xe_guc struct instance whose scheduler is to be disabled
2556 */
2557void xe_guc_submit_pause_vf(struct xe_guc *guc)
2558{
2559	struct xe_exec_queue *q;
2560	unsigned long index;
2561
2562	xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc)));
2563	xe_gt_assert(guc_to_gt(guc), vf_recovery(guc));
2564
2565	mutex_lock(&guc->submission_state.lock);
2566	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2567		/* Prevent redundant attempts to stop parallel queues */
2568		if (q->guc->id != index)
2569			continue;
2570
2571		guc_exec_queue_pause(guc, q);
2572	}
2573	mutex_unlock(&guc->submission_state.lock);
2574}
2575
2576static void guc_exec_queue_start(struct xe_exec_queue *q)
2577{
2578	struct xe_gpu_scheduler *sched = &q->guc->sched;
2579
2580	if (!exec_queue_killed_or_banned_or_wedged(q)) {
2581		struct xe_sched_job *job = xe_sched_first_pending_job(sched);
2582		int i;
2583
2584		trace_xe_exec_queue_resubmit(q);
2585		if (job) {
2586			for (i = 0; i < q->width; ++i) {
2587				/*
2588				 * The GuC context is unregistered at this point
2589				 * time, adjusting software ring tail ensures
2590				 * jobs are rewritten in original placement,
2591				 * adjusting LRC tail ensures the newly loaded
2592				 * GuC / contexts only view the LRC tail
2593				 * increasing as jobs are written out.
2594				 */
2595				q->lrc[i]->ring.tail = job->ptrs[i].head;
2596				xe_lrc_set_ring_tail(q->lrc[i],
2597						     xe_lrc_ring_head(q->lrc[i]));
2598			}
2599		}
2600		xe_sched_resubmit_jobs(sched);
2601	}
2602
2603	xe_sched_submission_start(sched);
2604	xe_sched_submission_resume_tdr(sched);
2605}
2606
2607int xe_guc_submit_start(struct xe_guc *guc)
2608{
2609	struct xe_exec_queue *q;
2610	unsigned long index;
2611
2612	xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1);
2613
2614	mutex_lock(&guc->submission_state.lock);
2615	atomic_dec(&guc->submission_state.stopped);
2616	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2617		/* Prevent redundant attempts to start parallel queues */
2618		if (q->guc->id != index)
2619			continue;
2620
2621		guc_exec_queue_start(q);
2622	}
2623	mutex_unlock(&guc->submission_state.lock);
2624
2625	wake_up_all(&guc->ct.wq);
2626
2627	return 0;
2628}
2629
2630static void guc_exec_queue_unpause_prepare(struct xe_guc *guc,
2631					   struct xe_exec_queue *q)
2632{
2633	struct xe_gpu_scheduler *sched = &q->guc->sched;
2634	struct xe_sched_job *job = NULL;
2635	struct drm_sched_job *s_job;
2636	bool restore_replay = false;
2637
2638	drm_sched_for_each_pending_job(s_job, &sched->base, NULL) {
2639		job = to_xe_sched_job(s_job);
2640		restore_replay |= job->restore_replay;
2641		if (restore_replay) {
2642			xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d",
2643				  q->guc->id, xe_sched_job_seqno(job));
2644
2645			q->ring_ops->emit_job(job);
2646			job->restore_replay = true;
2647		}
2648	}
2649
2650	if (job)
2651		job->last_replay = true;
2652}
2653
2654/**
2655 * xe_guc_submit_unpause_prepare_vf - Prepare unpause submission tasks for VF.
2656 * @guc: the &xe_guc struct instance whose scheduler is to be prepared for unpause
2657 */
2658void xe_guc_submit_unpause_prepare_vf(struct xe_guc *guc)
2659{
2660	struct xe_exec_queue *q;
2661	unsigned long index;
2662
2663	xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc)));
2664	xe_gt_assert(guc_to_gt(guc), vf_recovery(guc));
2665
2666	mutex_lock(&guc->submission_state.lock);
2667	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2668		/* Prevent redundant attempts to stop parallel queues */
2669		if (q->guc->id != index)
2670			continue;
2671
2672		guc_exec_queue_unpause_prepare(guc, q);
2673	}
2674	mutex_unlock(&guc->submission_state.lock);
2675}
2676
2677static void guc_exec_queue_replay_pending_state_change(struct xe_exec_queue *q)
2678{
2679	struct xe_gpu_scheduler *sched = &q->guc->sched;
2680	struct xe_sched_msg *msg;
2681
2682	if (q->guc->needs_cleanup) {
2683		msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
2684
2685		guc_exec_queue_add_msg(q, msg, CLEANUP);
2686		q->guc->needs_cleanup = false;
2687	}
2688
2689	if (q->guc->needs_suspend) {
2690		msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
2691
2692		xe_sched_msg_lock(sched);
2693		guc_exec_queue_try_add_msg_head(q, msg, SUSPEND);
2694		xe_sched_msg_unlock(sched);
2695
2696		q->guc->needs_suspend = false;
2697	}
2698
2699	/*
2700	 * The resume must be in the message queue before the suspend as it is
2701	 * not possible for a resume to be issued if a suspend pending is, but
2702	 * the inverse is possible.
2703	 */
2704	if (q->guc->needs_resume) {
2705		msg = q->guc->static_msgs + STATIC_MSG_RESUME;
2706
2707		xe_sched_msg_lock(sched);
2708		guc_exec_queue_try_add_msg_head(q, msg, RESUME);
2709		xe_sched_msg_unlock(sched);
2710
2711		q->guc->needs_resume = false;
2712	}
2713}
2714
2715static void guc_exec_queue_unpause(struct xe_guc *guc, struct xe_exec_queue *q)
2716{
2717	struct xe_gpu_scheduler *sched = &q->guc->sched;
2718	bool needs_tdr = exec_queue_killed_or_banned_or_wedged(q);
2719
2720	lockdep_assert_held(&guc->submission_state.lock);
2721
2722	xe_sched_resubmit_jobs(sched);
2723	guc_exec_queue_replay_pending_state_change(q);
2724	xe_sched_submission_start(sched);
2725	if (needs_tdr)
2726		xe_guc_exec_queue_trigger_cleanup(q);
2727	xe_sched_submission_resume_tdr(sched);
2728}
2729
2730/**
2731 * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC.
2732 * @guc: the &xe_guc struct instance whose scheduler is to be enabled
2733 */
2734void xe_guc_submit_unpause(struct xe_guc *guc)
2735{
2736	struct xe_exec_queue *q;
2737	unsigned long index;
2738
2739	mutex_lock(&guc->submission_state.lock);
2740	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
2741		xe_sched_submission_start(&q->guc->sched);
2742	mutex_unlock(&guc->submission_state.lock);
2743}
2744
2745/**
2746 * xe_guc_submit_unpause_vf - Allow further runs of submission tasks for VF.
2747 * @guc: the &xe_guc struct instance whose scheduler is to be enabled
2748 */
2749void xe_guc_submit_unpause_vf(struct xe_guc *guc)
2750{
2751	struct xe_exec_queue *q;
2752	unsigned long index;
2753
2754	xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc)));
2755
2756	mutex_lock(&guc->submission_state.lock);
2757	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2758		/*
2759		 * Prevent redundant attempts to stop parallel queues, or queues
2760		 * created after resfix done.
2761		 */
2762		if (q->guc->id != index ||
2763		    !drm_sched_is_stopped(&q->guc->sched.base))
2764			continue;
2765
2766		guc_exec_queue_unpause(guc, q);
2767	}
2768	mutex_unlock(&guc->submission_state.lock);
2769}
2770
2771/**
2772 * xe_guc_submit_pause_abort - Abort all paused submission task on given GuC.
2773 * @guc: the &xe_guc struct instance whose scheduler is to be aborted
2774 */
2775void xe_guc_submit_pause_abort(struct xe_guc *guc)
2776{
2777	struct xe_exec_queue *q;
2778	unsigned long index;
2779
2780	mutex_lock(&guc->submission_state.lock);
2781	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2782		struct xe_gpu_scheduler *sched = &q->guc->sched;
2783
2784		/* Prevent redundant attempts to stop parallel queues */
2785		if (q->guc->id != index)
2786			continue;
2787
2788		xe_sched_submission_start(sched);
2789		guc_exec_queue_kill(q);
2790	}
2791	mutex_unlock(&guc->submission_state.lock);
2792}
2793
2794static struct xe_exec_queue *
2795g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
2796{
2797	struct xe_gt *gt = guc_to_gt(guc);
2798	struct xe_exec_queue *q;
2799
2800	if (unlikely(guc_id >= GUC_ID_MAX)) {
2801		xe_gt_err(gt, "Invalid guc_id %u\n", guc_id);
2802		return NULL;
2803	}
2804
2805	q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id);
2806	if (unlikely(!q)) {
2807		xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id);
2808		return NULL;
2809	}
2810
2811	xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id);
2812	xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width));
2813
2814	return q;
2815}
2816
2817static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
2818{
2819	u32 action[] = {
2820		XE_GUC_ACTION_DEREGISTER_CONTEXT,
2821		q->guc->id,
2822	};
2823
2824	xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q));
2825	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
2826	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
2827	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
2828
2829	trace_xe_exec_queue_deregister(q);
2830
2831	if (xe_exec_queue_is_multi_queue_secondary(q))
2832		handle_deregister_done(guc, q);
2833	else
2834		xe_guc_ct_send_g2h_handler(&guc->ct, action,
2835					   ARRAY_SIZE(action));
2836}
2837
2838static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q,
2839			      u32 runnable_state)
2840{
2841	trace_xe_exec_queue_scheduling_done(q);
2842
2843	if (runnable_state == 1) {
2844		xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q));
2845
2846		q->guc->resume_time = ktime_get();
2847		clear_exec_queue_pending_resume(q);
2848		clear_exec_queue_pending_enable(q);
2849		smp_wmb();
2850		wake_up_all(&guc->ct.wq);
2851	} else {
2852		xe_gt_assert(guc_to_gt(guc), runnable_state == 0);
2853		xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q));
2854
2855		if (q->guc->suspend_pending) {
2856			suspend_fence_signal(q);
2857			clear_exec_queue_pending_disable(q);
2858		} else {
2859			if (exec_queue_banned(q)) {
2860				smp_wmb();
2861				wake_up_all(&guc->ct.wq);
2862			}
2863			if (exec_queue_destroyed(q)) {
2864				/*
2865				 * Make sure to clear the pending_disable only
2866				 * after sampling the destroyed state. We want
2867				 * to ensure we don't trigger the unregister too
2868				 * early with something intending to only
2869				 * disable scheduling. The caller doing the
2870				 * destroy must wait for an ongoing
2871				 * pending_disable before marking as destroyed.
2872				 */
2873				clear_exec_queue_pending_disable(q);
2874				deregister_exec_queue(guc, q);
2875			} else {
2876				clear_exec_queue_pending_disable(q);
2877			}
2878		}
2879	}
2880}
2881
2882static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc,
2883						    struct xe_exec_queue *q,
2884						    u32 runnable_state)
2885{
2886	/* Take CT lock here as handle_sched_done() do send a h2g message */
2887	mutex_lock(&guc->ct.lock);
2888	handle_sched_done(guc, q, runnable_state);
2889	mutex_unlock(&guc->ct.lock);
2890}
2891
2892int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
2893{
2894	struct xe_exec_queue *q;
2895	u32 guc_id, runnable_state;
2896
2897	if (unlikely(len < 2))
2898		return -EPROTO;
2899
2900	guc_id = msg[0];
2901	runnable_state = msg[1];
2902
2903	q = g2h_exec_queue_lookup(guc, guc_id);
2904	if (unlikely(!q))
2905		return -EPROTO;
2906
2907	if (unlikely(!exec_queue_pending_enable(q) &&
2908		     !exec_queue_pending_disable(q))) {
2909		xe_gt_err(guc_to_gt(guc),
2910			  "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u",
2911			  atomic_read(&q->guc->state), q->guc->id,
2912			  runnable_state);
2913		return -EPROTO;
2914	}
2915
2916	handle_sched_done(guc, q, runnable_state);
2917
2918	return 0;
2919}
2920
2921static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q)
2922{
2923	trace_xe_exec_queue_deregister_done(q);
2924
2925	clear_exec_queue_registered(q);
2926	__guc_exec_queue_destroy(guc, q);
2927}
2928
2929int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
2930{
2931	struct xe_exec_queue *q;
2932	u32 guc_id;
2933
2934	if (unlikely(len < 1))
2935		return -EPROTO;
2936
2937	guc_id = msg[0];
2938
2939	q = g2h_exec_queue_lookup(guc, guc_id);
2940	if (unlikely(!q))
2941		return -EPROTO;
2942
2943	if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) ||
2944	    exec_queue_pending_enable(q) || exec_queue_enabled(q)) {
2945		xe_gt_err(guc_to_gt(guc),
2946			  "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d",
2947			  atomic_read(&q->guc->state), q->guc->id);
2948		return -EPROTO;
2949	}
2950
2951	handle_deregister_done(guc, q);
2952
2953	return 0;
2954}
2955
2956int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
2957{
2958	struct xe_gt *gt = guc_to_gt(guc);
2959	struct xe_exec_queue *q;
2960	u32 guc_id;
2961
2962	if (unlikely(len < 1))
2963		return -EPROTO;
2964
2965	guc_id = msg[0];
2966
2967	q = g2h_exec_queue_lookup(guc, guc_id);
2968	if (unlikely(!q))
2969		return -EPROTO;
2970
2971	xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d, state=0x%0x",
2972		   xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id,
2973		   atomic_read(&q->guc->state));
2974
2975	trace_xe_exec_queue_reset(q);
2976
2977	/*
2978	 * A banned engine is a NOP at this point (came from
2979	 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel
2980	 * jobs by setting timeout of the job to the minimum value kicking
2981	 * guc_exec_queue_timedout_job.
2982	 */
2983	xe_guc_exec_queue_reset_trigger_cleanup(q);
2984
2985	return 0;
2986}
2987
2988/*
2989 * xe_guc_error_capture_handler - Handler of GuC captured message
2990 * @guc: The GuC object
2991 * @msg: Point to the message
2992 * @len: The message length
2993 *
2994 * When GuC captured data is ready, GuC will send message
2995 * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be
2996 * called 1st to check status before process the data comes with the message.
2997 *
2998 * Returns: error code. 0 if success
2999 */
3000int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len)
3001{
3002	u32 status;
3003
3004	if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN))
3005		return -EPROTO;
3006
3007	status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
3008	if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
3009		xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space");
3010
3011	xe_guc_capture_process(guc);
3012
3013	return 0;
3014}
3015
3016int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
3017					       u32 len)
3018{
3019	struct xe_gt *gt = guc_to_gt(guc);
3020	struct xe_exec_queue *q;
3021	u32 guc_id;
3022	u32 type = XE_GUC_CAT_ERR_TYPE_INVALID;
3023
3024	if (unlikely(!len || len > 2))
3025		return -EPROTO;
3026
3027	guc_id = msg[0];
3028
3029	if (len == 2)
3030		type = msg[1];
3031
3032	if (guc_id == GUC_ID_UNKNOWN) {
3033		/*
3034		 * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF
3035		 * context. In such case only PF will be notified about that fault.
3036		 */
3037		xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n");
3038		return 0;
3039	}
3040
3041	q = g2h_exec_queue_lookup(guc, guc_id);
3042	if (unlikely(!q))
3043		return -EPROTO;
3044
3045	/*
3046	 * The type is HW-defined and changes based on platform, so we don't
3047	 * decode it in the kernel and only check if it is valid.
3048	 * See bspec 54047 and 72187 for details.
3049	 */
3050	if (type != XE_GUC_CAT_ERR_TYPE_INVALID)
3051		xe_gt_info(gt,
3052			   "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d",
3053			   type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
3054	else
3055		xe_gt_info(gt,
3056			   "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d",
3057			   xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
3058
3059	trace_xe_exec_queue_memory_cat_error(q);
3060
3061	/* Treat the same as engine reset */
3062	xe_guc_exec_queue_reset_trigger_cleanup(q);
3063
3064	return 0;
3065}
3066
3067int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
3068{
3069	struct xe_gt *gt = guc_to_gt(guc);
3070	u8 guc_class, instance;
3071	u32 reason;
3072
3073	if (unlikely(len != 3))
3074		return -EPROTO;
3075
3076	guc_class = msg[0];
3077	instance = msg[1];
3078	reason = msg[2];
3079
3080	/* Unexpected failure of a hardware feature, log an actual error */
3081	xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X",
3082		  guc_class, instance, reason);
3083
3084	xe_gt_reset_async(gt);
3085
3086	return 0;
3087}
3088
3089int xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc *guc, u32 *msg,
3090						u32 len)
3091{
3092	struct xe_gt *gt = guc_to_gt(guc);
3093	struct xe_device *xe = guc_to_xe(guc);
3094	struct xe_exec_queue *q;
3095	u32 guc_id = msg[2];
3096
3097	if (unlikely(len != XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN)) {
3098		drm_err(&xe->drm, "Invalid length %u", len);
3099		return -EPROTO;
3100	}
3101
3102	q = g2h_exec_queue_lookup(guc, guc_id);
3103	if (unlikely(!q))
3104		return -EPROTO;
3105
3106	xe_gt_dbg(gt,
3107		  "CGP context error: [%s] err=0x%x, q0_id=0x%x LRCA=0x%x guc_id=0x%x",
3108		  msg[0] & 1 ? "uc" : "kmd", msg[1], msg[2], msg[3], msg[4]);
3109
3110	trace_xe_exec_queue_cgp_context_error(q);
3111
3112	/* Treat the same as engine reset */
3113	xe_guc_exec_queue_reset_trigger_cleanup(q);
3114
3115	return 0;
3116}
3117
3118/**
3119 * xe_guc_exec_queue_cgp_sync_done_handler - CGP synchronization done handler
3120 * @guc: guc
3121 * @msg: message indicating CGP sync done
3122 * @len: length of message
3123 *
3124 * Set multi queue group's sync_pending flag to false and wakeup anyone waiting
3125 * for CGP synchronization to complete.
3126 *
3127 * Return: 0 on success, -EPROTO for malformed messages.
3128 */
3129int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
3130{
3131	struct xe_device *xe = guc_to_xe(guc);
3132	struct xe_exec_queue *q;
3133	u32 guc_id = msg[0];
3134
3135	if (unlikely(len < 1)) {
3136		drm_err(&xe->drm, "Invalid CGP_SYNC_DONE length %u", len);
3137		return -EPROTO;
3138	}
3139
3140	q = g2h_exec_queue_lookup(guc, guc_id);
3141	if (unlikely(!q))
3142		return -EPROTO;
3143
3144	if (!xe_exec_queue_is_multi_queue_primary(q)) {
3145		drm_err(&xe->drm, "Unexpected CGP_SYNC_DONE response");
3146		return -EPROTO;
3147	}
3148
3149	/* Wakeup the serialized cgp update wait */
3150	WRITE_ONCE(q->multi_queue.group->sync_pending, false);
3151	xe_guc_ct_wake_waiters(&guc->ct);
3152
3153	return 0;
3154}
3155
3156static void
3157guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q,
3158				   struct xe_guc_submit_exec_queue_snapshot *snapshot)
3159{
3160	struct xe_guc *guc = exec_queue_to_guc(q);
3161	struct xe_device *xe = guc_to_xe(guc);
3162	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
3163	int i;
3164
3165	snapshot->guc.wqi_head = q->guc->wqi_head;
3166	snapshot->guc.wqi_tail = q->guc->wqi_tail;
3167	snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head);
3168	snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail);
3169	snapshot->parallel.wq_desc.status = parallel_read(xe, map,
3170							  wq_desc.wq_status);
3171
3172	if (snapshot->parallel.wq_desc.head !=
3173	    snapshot->parallel.wq_desc.tail) {
3174		for (i = snapshot->parallel.wq_desc.head;
3175		     i != snapshot->parallel.wq_desc.tail;
3176		     i = (i + sizeof(u32)) % WQ_SIZE)
3177			snapshot->parallel.wq[i / sizeof(u32)] =
3178				parallel_read(xe, map, wq[i / sizeof(u32)]);
3179	}
3180}
3181
3182static void
3183guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
3184				 struct drm_printer *p)
3185{
3186	int i;
3187
3188	drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n",
3189		   snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head);
3190	drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n",
3191		   snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail);
3192	drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status);
3193
3194	if (snapshot->parallel.wq_desc.head !=
3195	    snapshot->parallel.wq_desc.tail) {
3196		for (i = snapshot->parallel.wq_desc.head;
3197		     i != snapshot->parallel.wq_desc.tail;
3198		     i = (i + sizeof(u32)) % WQ_SIZE)
3199			drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32),
3200				   snapshot->parallel.wq[i / sizeof(u32)]);
3201	}
3202}
3203
3204/**
3205 * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine.
3206 * @q: faulty exec queue
3207 *
3208 * This can be printed out in a later stage like during dev_coredump
3209 * analysis.
3210 *
3211 * Returns: a GuC Submit Engine snapshot object that must be freed by the
3212 * caller, using `xe_guc_exec_queue_snapshot_free`.
3213 */
3214struct xe_guc_submit_exec_queue_snapshot *
3215xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
3216{
3217	struct xe_gpu_scheduler *sched = &q->guc->sched;
3218	struct xe_guc_submit_exec_queue_snapshot *snapshot;
3219	int i;
3220
3221	snapshot = kzalloc_obj(*snapshot, GFP_ATOMIC);
3222
3223	if (!snapshot)
3224		return NULL;
3225
3226	snapshot->guc.id = q->guc->id;
3227	memcpy(&snapshot->name, &q->name, sizeof(snapshot->name));
3228	snapshot->class = q->class;
3229	snapshot->logical_mask = q->logical_mask;
3230	snapshot->width = q->width;
3231	snapshot->refcount = kref_read(&q->refcount);
3232	snapshot->sched_timeout = sched->base.timeout;
3233	snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us;
3234	snapshot->sched_props.preempt_timeout_us =
3235		q->sched_props.preempt_timeout_us;
3236
3237	snapshot->lrc = kmalloc_objs(struct xe_lrc_snapshot *, q->width,
3238				     GFP_ATOMIC);
3239
3240	if (snapshot->lrc) {
3241		for (i = 0; i < q->width; ++i) {
3242			struct xe_lrc *lrc = q->lrc[i];
3243
3244			snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc);
3245		}
3246	}
3247
3248	snapshot->schedule_state = atomic_read(&q->guc->state);
3249	snapshot->exec_queue_flags = q->flags;
3250
3251	snapshot->parallel_execution = xe_exec_queue_is_parallel(q);
3252	if (snapshot->parallel_execution)
3253		guc_exec_queue_wq_snapshot_capture(q, snapshot);
3254
3255	if (xe_exec_queue_is_multi_queue(q)) {
3256		snapshot->multi_queue.valid = true;
3257		snapshot->multi_queue.primary = xe_exec_queue_multi_queue_primary(q)->guc->id;
3258		snapshot->multi_queue.pos = q->multi_queue.pos;
3259	}
3260
3261	return snapshot;
3262}
3263
3264/**
3265 * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine.
3266 * @snapshot: Previously captured snapshot of job.
3267 *
3268 * This captures some data that requires taking some locks, so it cannot be done in signaling path.
3269 */
3270void
3271xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot)
3272{
3273	int i;
3274
3275	if (!snapshot || !snapshot->lrc)
3276		return;
3277
3278	for (i = 0; i < snapshot->width; ++i)
3279		xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]);
3280}
3281
3282/**
3283 * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot.
3284 * @snapshot: GuC Submit Engine snapshot object.
3285 * @p: drm_printer where it will be printed out.
3286 *
3287 * This function prints out a given GuC Submit Engine snapshot object.
3288 */
3289void
3290xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
3291				 struct drm_printer *p)
3292{
3293	int i;
3294
3295	if (!snapshot)
3296		return;
3297
3298	drm_printf(p, "GuC ID: %d\n", snapshot->guc.id);
3299	drm_printf(p, "\tName: %s\n", snapshot->name);
3300	drm_printf(p, "\tClass: %d\n", snapshot->class);
3301	drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask);
3302	drm_printf(p, "\tWidth: %d\n", snapshot->width);
3303	drm_printf(p, "\tRef: %d\n", snapshot->refcount);
3304	drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout);
3305	drm_printf(p, "\tTimeslice: %u (us)\n",
3306		   snapshot->sched_props.timeslice_us);
3307	drm_printf(p, "\tPreempt timeout: %u (us)\n",
3308		   snapshot->sched_props.preempt_timeout_us);
3309
3310	for (i = 0; snapshot->lrc && i < snapshot->width; ++i)
3311		xe_lrc_snapshot_print(snapshot->lrc[i], p);
3312
3313	drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state);
3314	drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags);
3315
3316	if (snapshot->parallel_execution)
3317		guc_exec_queue_wq_snapshot_print(snapshot, p);
3318
3319	if (snapshot->multi_queue.valid) {
3320		drm_printf(p, "\tMulti queue primary GuC ID: %d\n", snapshot->multi_queue.primary);
3321		drm_printf(p, "\tMulti queue position: %d\n", snapshot->multi_queue.pos);
3322	}
3323}
3324
3325/**
3326 * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given
3327 * snapshot.
3328 * @snapshot: GuC Submit Engine snapshot object.
3329 *
3330 * This function free all the memory that needed to be allocated at capture
3331 * time.
3332 */
3333void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot)
3334{
3335	int i;
3336
3337	if (!snapshot)
3338		return;
3339
3340	if (snapshot->lrc) {
3341		for (i = 0; i < snapshot->width; i++)
3342			xe_lrc_snapshot_free(snapshot->lrc[i]);
3343		kfree(snapshot->lrc);
3344	}
3345	kfree(snapshot);
3346}
3347
3348static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
3349{
3350	struct xe_guc_submit_exec_queue_snapshot *snapshot;
3351
3352	snapshot = xe_guc_exec_queue_snapshot_capture(q);
3353	xe_guc_exec_queue_snapshot_print(snapshot, p);
3354	xe_guc_exec_queue_snapshot_free(snapshot);
3355}
3356
3357/**
3358 * xe_guc_register_vf_exec_queue - Register exec queue for a given context type.
3359 * @q: Execution queue
3360 * @ctx_type: Type of the context
3361 *
3362 * This function registers the execution queue with the guc. Special context
3363 * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE
3364 * are only applicable for IGPU and in the VF.
3365 * Submits the execution queue to GUC after registering it.
3366 *
3367 * Returns - None.
3368 */
3369void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type)
3370{
3371	struct xe_guc *guc = exec_queue_to_guc(q);
3372	struct xe_device *xe = guc_to_xe(guc);
3373	struct xe_gt *gt = guc_to_gt(guc);
3374
3375	xe_gt_assert(gt, IS_SRIOV_VF(xe));
3376	xe_gt_assert(gt, !IS_DGFX(xe));
3377	xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE ||
3378		     ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE);
3379	xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0));
3380
3381	register_exec_queue(q, ctx_type);
3382	enable_scheduling(q);
3383}
3384
3385/**
3386 * xe_guc_submit_print - GuC Submit Print.
3387 * @guc: GuC.
3388 * @p: drm_printer where it will be printed out.
3389 *
3390 * This function capture and prints snapshots of **all** GuC Engines.
3391 */
3392void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
3393{
3394	struct xe_exec_queue *q;
3395	unsigned long index;
3396
3397	if (!xe_device_uc_enabled(guc_to_xe(guc)))
3398		return;
3399
3400	mutex_lock(&guc->submission_state.lock);
3401	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
3402		guc_exec_queue_print(q, p);
3403	mutex_unlock(&guc->submission_state.lock);
3404}
3405
3406/**
3407 * xe_guc_has_registered_mlrc_queues - check whether there are any MLRC queues
3408 * registered with the GuC
3409 * @guc: GuC.
3410 *
3411 * Return: true if any MLRC queue is registered with the GuC, false otherwise.
3412 */
3413bool xe_guc_has_registered_mlrc_queues(struct xe_guc *guc)
3414{
3415	struct xe_exec_queue *q;
3416	unsigned long index;
3417
3418	guard(mutex)(&guc->submission_state.lock);
3419
3420	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
3421		if (q->width > 1)
3422			return true;
3423
3424	return false;
3425}
3426
3427/**
3428 * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all
3429 * exec queues registered to given GuC.
3430 * @guc: the &xe_guc struct instance
3431 * @scratch: scratch buffer to be used as temporary storage
3432 *
3433 * Returns: zero on success, negative error code on failure.
3434 */
3435int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch)
3436{
3437	struct xe_exec_queue *q;
3438	unsigned long index;
3439	int err = 0;
3440
3441	mutex_lock(&guc->submission_state.lock);
3442	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
3443		/* Prevent redundant attempts to stop parallel queues */
3444		if (q->guc->id != index)
3445			continue;
3446
3447		err = xe_exec_queue_contexts_hwsp_rebase(q, scratch);
3448		if (err)
3449			break;
3450	}
3451	mutex_unlock(&guc->submission_state.lock);
3452
3453	return err;
3454}
Configure Feed

Configure Feed