Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at master 3454 lines 100 kB view raw
1// SPDX-License-Identifier: MIT 2/* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6#include "xe_guc_submit.h" 7 8#include <linux/bitfield.h> 9#include <linux/bitmap.h> 10#include <linux/circ_buf.h> 11#include <linux/dma-fence-array.h> 12 13#include <drm/drm_managed.h> 14 15#include "abi/guc_actions_abi.h" 16#include "abi/guc_actions_slpc_abi.h" 17#include "abi/guc_klvs_abi.h" 18#include "xe_assert.h" 19#include "xe_bo.h" 20#include "xe_devcoredump.h" 21#include "xe_device.h" 22#include "xe_exec_queue.h" 23#include "xe_force_wake.h" 24#include "xe_gpu_scheduler.h" 25#include "xe_gt.h" 26#include "xe_gt_clock.h" 27#include "xe_gt_printk.h" 28#include "xe_guc.h" 29#include "xe_guc_capture.h" 30#include "xe_guc_ct.h" 31#include "xe_guc_exec_queue_types.h" 32#include "xe_guc_id_mgr.h" 33#include "xe_guc_klv_helpers.h" 34#include "xe_guc_submit_types.h" 35#include "xe_hw_engine.h" 36#include "xe_lrc.h" 37#include "xe_macros.h" 38#include "xe_map.h" 39#include "xe_mocs.h" 40#include "xe_pm.h" 41#include "xe_ring_ops_types.h" 42#include "xe_sched_job.h" 43#include "xe_sleep.h" 44#include "xe_trace.h" 45#include "xe_uc_fw.h" 46#include "xe_vm.h" 47 48#define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN 6 49 50static int guc_submit_reset_prepare(struct xe_guc *guc); 51 52static struct xe_guc * 53exec_queue_to_guc(struct xe_exec_queue *q) 54{ 55 return &q->gt->uc.guc; 56} 57 58/* 59 * Helpers for engine state, using an atomic as some of the bits can transition 60 * as the same time (e.g. a suspend can be happning at the same time as schedule 61 * engine done being processed). 62 */ 63#define EXEC_QUEUE_STATE_REGISTERED (1 << 0) 64#define EXEC_QUEUE_STATE_ENABLED (1 << 1) 65#define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) 66#define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) 67#define EXEC_QUEUE_STATE_DESTROYED (1 << 4) 68#define EXEC_QUEUE_STATE_SUSPENDED (1 << 5) 69#define EXEC_QUEUE_STATE_RESET (1 << 6) 70#define EXEC_QUEUE_STATE_KILLED (1 << 7) 71#define EXEC_QUEUE_STATE_WEDGED (1 << 8) 72#define EXEC_QUEUE_STATE_BANNED (1 << 9) 73#define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 10) 74#define EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND (1 << 11) 75 76static bool exec_queue_registered(struct xe_exec_queue *q) 77{ 78 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; 79} 80 81static void set_exec_queue_registered(struct xe_exec_queue *q) 82{ 83 atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 84} 85 86static void clear_exec_queue_registered(struct xe_exec_queue *q) 87{ 88 atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 89} 90 91static bool exec_queue_enabled(struct xe_exec_queue *q) 92{ 93 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED; 94} 95 96static void set_exec_queue_enabled(struct xe_exec_queue *q) 97{ 98 atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 99} 100 101static void clear_exec_queue_enabled(struct xe_exec_queue *q) 102{ 103 atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 104} 105 106static bool exec_queue_pending_enable(struct xe_exec_queue *q) 107{ 108 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; 109} 110 111static void set_exec_queue_pending_enable(struct xe_exec_queue *q) 112{ 113 atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 114} 115 116static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) 117{ 118 atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 119} 120 121static bool exec_queue_pending_disable(struct xe_exec_queue *q) 122{ 123 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; 124} 125 126static void set_exec_queue_pending_disable(struct xe_exec_queue *q) 127{ 128 atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 129} 130 131static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) 132{ 133 atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 134} 135 136static bool exec_queue_destroyed(struct xe_exec_queue *q) 137{ 138 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; 139} 140 141static void set_exec_queue_destroyed(struct xe_exec_queue *q) 142{ 143 atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 144} 145 146static void clear_exec_queue_destroyed(struct xe_exec_queue *q) 147{ 148 atomic_and(~EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 149} 150 151static bool exec_queue_banned(struct xe_exec_queue *q) 152{ 153 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED; 154} 155 156static void set_exec_queue_banned(struct xe_exec_queue *q) 157{ 158 atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state); 159} 160 161static bool exec_queue_suspended(struct xe_exec_queue *q) 162{ 163 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED; 164} 165 166static void set_exec_queue_suspended(struct xe_exec_queue *q) 167{ 168 atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 169} 170 171static void clear_exec_queue_suspended(struct xe_exec_queue *q) 172{ 173 atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 174} 175 176static bool exec_queue_reset(struct xe_exec_queue *q) 177{ 178 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; 179} 180 181static void set_exec_queue_reset(struct xe_exec_queue *q) 182{ 183 atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); 184} 185 186static bool exec_queue_killed(struct xe_exec_queue *q) 187{ 188 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED; 189} 190 191static void set_exec_queue_killed(struct xe_exec_queue *q) 192{ 193 atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state); 194} 195 196static bool exec_queue_wedged(struct xe_exec_queue *q) 197{ 198 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED; 199} 200 201static void set_exec_queue_wedged(struct xe_exec_queue *q) 202{ 203 atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); 204} 205 206static bool exec_queue_pending_resume(struct xe_exec_queue *q) 207{ 208 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME; 209} 210 211static void set_exec_queue_pending_resume(struct xe_exec_queue *q) 212{ 213 atomic_or(EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 214} 215 216static void clear_exec_queue_pending_resume(struct xe_exec_queue *q) 217{ 218 atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 219} 220 221static bool exec_queue_idle_skip_suspend(struct xe_exec_queue *q) 222{ 223 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND; 224} 225 226static void set_exec_queue_idle_skip_suspend(struct xe_exec_queue *q) 227{ 228 atomic_or(EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND, &q->guc->state); 229} 230 231static void clear_exec_queue_idle_skip_suspend(struct xe_exec_queue *q) 232{ 233 atomic_and(~EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND, &q->guc->state); 234} 235 236static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) 237{ 238 return (atomic_read(&q->guc->state) & 239 (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED | 240 EXEC_QUEUE_STATE_BANNED)); 241} 242 243static void guc_submit_sw_fini(struct drm_device *drm, void *arg) 244{ 245 struct xe_guc *guc = arg; 246 struct xe_device *xe = guc_to_xe(guc); 247 struct xe_gt *gt = guc_to_gt(guc); 248 int ret; 249 250 ret = wait_event_timeout(guc->submission_state.fini_wq, 251 xa_empty(&guc->submission_state.exec_queue_lookup), 252 HZ * 5); 253 254 drain_workqueue(xe->destroy_wq); 255 256 xe_gt_assert(gt, ret); 257 258 xa_destroy(&guc->submission_state.exec_queue_lookup); 259} 260 261static void guc_submit_fini(void *arg) 262{ 263 struct xe_guc *guc = arg; 264 struct xe_exec_queue *q; 265 unsigned long index; 266 267 /* Drop any wedged queue refs */ 268 mutex_lock(&guc->submission_state.lock); 269 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 270 if (exec_queue_wedged(q)) { 271 mutex_unlock(&guc->submission_state.lock); 272 xe_exec_queue_put(q); 273 mutex_lock(&guc->submission_state.lock); 274 } 275 } 276 mutex_unlock(&guc->submission_state.lock); 277 278 /* Forcefully kill any remaining exec queues */ 279 xe_guc_ct_stop(&guc->ct); 280 guc_submit_reset_prepare(guc); 281 xe_guc_softreset(guc); 282 xe_guc_submit_stop(guc); 283 xe_uc_fw_sanitize(&guc->fw); 284 xe_guc_submit_pause_abort(guc); 285} 286 287static const struct xe_exec_queue_ops guc_exec_queue_ops; 288 289static void primelockdep(struct xe_guc *guc) 290{ 291 if (!IS_ENABLED(CONFIG_LOCKDEP)) 292 return; 293 294 fs_reclaim_acquire(GFP_KERNEL); 295 296 mutex_lock(&guc->submission_state.lock); 297 mutex_unlock(&guc->submission_state.lock); 298 299 fs_reclaim_release(GFP_KERNEL); 300} 301 302/** 303 * xe_guc_submit_init() - Initialize GuC submission. 304 * @guc: the &xe_guc to initialize 305 * @num_ids: number of GuC context IDs to use 306 * 307 * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all 308 * GuC context IDs supported by the GuC firmware should be used for submission. 309 * 310 * Only VF drivers will have to provide explicit number of GuC context IDs 311 * that they can use for submission. 312 * 313 * Return: 0 on success or a negative error code on failure. 314 */ 315int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) 316{ 317 struct xe_device *xe = guc_to_xe(guc); 318 struct xe_gt *gt = guc_to_gt(guc); 319 int err; 320 321 err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock); 322 if (err) 323 return err; 324 325 err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids); 326 if (err) 327 return err; 328 329 gt->exec_queue_ops = &guc_exec_queue_ops; 330 331 xa_init(&guc->submission_state.exec_queue_lookup); 332 333 init_waitqueue_head(&guc->submission_state.fini_wq); 334 335 primelockdep(guc); 336 337 guc->submission_state.initialized = true; 338 339 err = drmm_add_action_or_reset(&xe->drm, guc_submit_sw_fini, guc); 340 if (err) 341 return err; 342 343 return devm_add_action_or_reset(xe->drm.dev, guc_submit_fini, guc); 344} 345 346/* 347 * Given that we want to guarantee enough RCS throughput to avoid missing 348 * frames, we set the yield policy to 20% of each 80ms interval. 349 */ 350#define RC_YIELD_DURATION 80 /* in ms */ 351#define RC_YIELD_RATIO 20 /* in percent */ 352static u32 *emit_render_compute_yield_klv(u32 *emit) 353{ 354 *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD); 355 *emit++ = RC_YIELD_DURATION; 356 *emit++ = RC_YIELD_RATIO; 357 358 return emit; 359} 360 361#define SCHEDULING_POLICY_MAX_DWORDS 16 362static int guc_init_global_schedule_policy(struct xe_guc *guc) 363{ 364 u32 data[SCHEDULING_POLICY_MAX_DWORDS]; 365 u32 *emit = data; 366 u32 count = 0; 367 int ret; 368 369 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 370 return 0; 371 372 *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 373 374 if (CCS_INSTANCES(guc_to_gt(guc))) 375 emit = emit_render_compute_yield_klv(emit); 376 377 count = emit - data; 378 if (count > 1) { 379 xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS); 380 381 ret = xe_guc_ct_send_block(&guc->ct, data, count); 382 if (ret < 0) { 383 xe_gt_err(guc_to_gt(guc), 384 "failed to enable GuC scheduling policies: %pe\n", 385 ERR_PTR(ret)); 386 return ret; 387 } 388 } 389 390 return 0; 391} 392 393int xe_guc_submit_enable(struct xe_guc *guc) 394{ 395 int ret; 396 397 ret = guc_init_global_schedule_policy(guc); 398 if (ret) 399 return ret; 400 401 guc->submission_state.enabled = true; 402 403 return 0; 404} 405 406void xe_guc_submit_disable(struct xe_guc *guc) 407{ 408 guc->submission_state.enabled = false; 409} 410 411static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) 412{ 413 int i; 414 415 lockdep_assert_held(&guc->submission_state.lock); 416 417 for (i = 0; i < xa_count; ++i) 418 xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); 419 420 xe_guc_id_mgr_release_locked(&guc->submission_state.idm, 421 q->guc->id, q->width); 422 423 if (xa_empty(&guc->submission_state.exec_queue_lookup)) 424 wake_up(&guc->submission_state.fini_wq); 425} 426 427static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 428{ 429 int ret; 430 int i; 431 432 /* 433 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, 434 * worse case user gets -ENOMEM on engine create and has to try again. 435 * 436 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent 437 * failure. 438 */ 439 lockdep_assert_held(&guc->submission_state.lock); 440 441 ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm, 442 q->width); 443 if (ret < 0) 444 return ret; 445 446 q->guc->id = ret; 447 448 for (i = 0; i < q->width; ++i) { 449 ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup, 450 q->guc->id + i, q, GFP_NOWAIT)); 451 if (ret) 452 goto err_release; 453 } 454 455 return 0; 456 457err_release: 458 __release_guc_id(guc, q, i); 459 460 return ret; 461} 462 463static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 464{ 465 mutex_lock(&guc->submission_state.lock); 466 __release_guc_id(guc, q, q->width); 467 mutex_unlock(&guc->submission_state.lock); 468} 469 470struct exec_queue_policy { 471 u32 count; 472 struct guc_update_exec_queue_policy h2g; 473}; 474 475static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) 476{ 477 size_t bytes = sizeof(policy->h2g.header) + 478 (sizeof(policy->h2g.klv[0]) * policy->count); 479 480 return bytes / sizeof(u32); 481} 482 483static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, 484 u16 guc_id) 485{ 486 policy->h2g.header.action = 487 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 488 policy->h2g.header.guc_id = guc_id; 489 policy->count = 0; 490} 491 492#define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ 493static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ 494 u32 data) \ 495{ \ 496 XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 497\ 498 policy->h2g.klv[policy->count].kl = \ 499 FIELD_PREP(GUC_KLV_0_KEY, \ 500 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 501 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 502 policy->h2g.klv[policy->count].value = data; \ 503 policy->count++; \ 504} 505 506MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 507MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 508MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) 509MAKE_EXEC_QUEUE_POLICY_ADD(slpc_exec_queue_freq_req, SLPM_GT_FREQUENCY) 510#undef MAKE_EXEC_QUEUE_POLICY_ADD 511 512static const int xe_exec_queue_prio_to_guc[] = { 513 [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, 514 [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, 515 [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, 516 [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, 517}; 518 519static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) 520{ 521 struct exec_queue_policy policy; 522 enum xe_exec_queue_priority prio = q->sched_props.priority; 523 u32 timeslice_us = q->sched_props.timeslice_us; 524 u32 slpc_exec_queue_freq_req = 0; 525 u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; 526 527 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q) && 528 !xe_exec_queue_is_multi_queue_secondary(q)); 529 530 if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY) 531 slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE; 532 533 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 534 __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); 535 __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); 536 __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); 537 __guc_exec_queue_policy_add_slpc_exec_queue_freq_req(&policy, 538 slpc_exec_queue_freq_req); 539 540 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 541 __guc_exec_queue_policy_action_size(&policy), 0, 0); 542} 543 544static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) 545{ 546 struct exec_queue_policy policy; 547 548 xe_assert(guc_to_xe(guc), !xe_exec_queue_is_multi_queue_secondary(q)); 549 550 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 551 __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); 552 553 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 554 __guc_exec_queue_policy_action_size(&policy), 0, 0); 555} 556 557static bool vf_recovery(struct xe_guc *guc) 558{ 559 return xe_gt_recovery_pending(guc_to_gt(guc)); 560} 561 562static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) 563{ 564 struct xe_guc *guc = exec_queue_to_guc(q); 565 struct xe_device *xe = guc_to_xe(guc); 566 567 /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ 568 wake_up_all(&xe->ufence_wq); 569 570 xe_sched_tdr_queue_imm(&q->guc->sched); 571} 572 573static void xe_guc_exec_queue_group_stop(struct xe_exec_queue *q) 574{ 575 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 576 struct xe_exec_queue_group *group = q->multi_queue.group; 577 struct xe_exec_queue *eq, *next; 578 LIST_HEAD(tmp); 579 580 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 581 xe_exec_queue_is_multi_queue(q)); 582 583 mutex_lock(&group->list_lock); 584 585 /* 586 * Stop all future queues being from executing while group is stopped. 587 */ 588 group->stopped = true; 589 590 list_for_each_entry_safe(eq, next, &group->list, multi_queue.link) 591 /* 592 * Refcount prevents an attempted removal from &group->list, 593 * temporary list allows safe iteration after dropping 594 * &group->list_lock. 595 */ 596 if (xe_exec_queue_get_unless_zero(eq)) 597 list_move_tail(&eq->multi_queue.link, &tmp); 598 599 mutex_unlock(&group->list_lock); 600 601 /* We cannot stop under list lock without getting inversions */ 602 xe_sched_submission_stop(&primary->guc->sched); 603 list_for_each_entry(eq, &tmp, multi_queue.link) 604 xe_sched_submission_stop(&eq->guc->sched); 605 606 mutex_lock(&group->list_lock); 607 list_for_each_entry_safe(eq, next, &tmp, multi_queue.link) { 608 /* 609 * Corner where we got banned while stopping and not on 610 * &group->list 611 */ 612 if (READ_ONCE(group->banned)) 613 xe_guc_exec_queue_trigger_cleanup(eq); 614 615 list_move_tail(&eq->multi_queue.link, &group->list); 616 xe_exec_queue_put(eq); 617 } 618 mutex_unlock(&group->list_lock); 619} 620 621static void xe_guc_exec_queue_group_start(struct xe_exec_queue *q) 622{ 623 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 624 struct xe_exec_queue_group *group = q->multi_queue.group; 625 struct xe_exec_queue *eq; 626 627 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 628 xe_exec_queue_is_multi_queue(q)); 629 630 xe_sched_submission_start(&primary->guc->sched); 631 632 mutex_lock(&group->list_lock); 633 group->stopped = false; 634 list_for_each_entry(eq, &group->list, multi_queue.link) 635 xe_sched_submission_start(&eq->guc->sched); 636 mutex_unlock(&group->list_lock); 637} 638 639static void xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue *q) 640{ 641 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 642 struct xe_exec_queue_group *group = q->multi_queue.group; 643 struct xe_exec_queue *eq; 644 645 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 646 xe_exec_queue_is_multi_queue(q)); 647 648 /* Group banned, skip timeout check in TDR */ 649 WRITE_ONCE(group->banned, true); 650 xe_guc_exec_queue_trigger_cleanup(primary); 651 652 mutex_lock(&group->list_lock); 653 list_for_each_entry(eq, &group->list, multi_queue.link) 654 xe_guc_exec_queue_trigger_cleanup(eq); 655 mutex_unlock(&group->list_lock); 656} 657 658static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q) 659{ 660 if (xe_exec_queue_is_multi_queue(q)) { 661 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 662 struct xe_exec_queue_group *group = q->multi_queue.group; 663 struct xe_exec_queue *eq; 664 665 /* Group banned, skip timeout check in TDR */ 666 WRITE_ONCE(group->banned, true); 667 668 set_exec_queue_reset(primary); 669 if (!exec_queue_banned(primary)) 670 xe_guc_exec_queue_trigger_cleanup(primary); 671 672 mutex_lock(&group->list_lock); 673 list_for_each_entry(eq, &group->list, multi_queue.link) { 674 set_exec_queue_reset(eq); 675 if (!exec_queue_banned(eq)) 676 xe_guc_exec_queue_trigger_cleanup(eq); 677 } 678 mutex_unlock(&group->list_lock); 679 } else { 680 set_exec_queue_reset(q); 681 if (!exec_queue_banned(q)) 682 xe_guc_exec_queue_trigger_cleanup(q); 683 } 684} 685 686static void set_exec_queue_group_banned(struct xe_exec_queue *q) 687{ 688 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 689 struct xe_exec_queue_group *group = q->multi_queue.group; 690 struct xe_exec_queue *eq; 691 692 /* Ban all queues of the multi-queue group */ 693 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 694 xe_exec_queue_is_multi_queue(q)); 695 set_exec_queue_banned(primary); 696 697 mutex_lock(&group->list_lock); 698 list_for_each_entry(eq, &group->list, multi_queue.link) 699 set_exec_queue_banned(eq); 700 mutex_unlock(&group->list_lock); 701} 702 703/* Helper for context registration H2G */ 704struct guc_ctxt_registration_info { 705 u32 flags; 706 u32 context_idx; 707 u32 engine_class; 708 u32 engine_submit_mask; 709 u32 wq_desc_lo; 710 u32 wq_desc_hi; 711 u32 wq_base_lo; 712 u32 wq_base_hi; 713 u32 wq_size; 714 u32 cgp_lo; 715 u32 cgp_hi; 716 u32 hwlrca_lo; 717 u32 hwlrca_hi; 718}; 719 720#define parallel_read(xe_, map_, field_) \ 721 xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 722 field_) 723#define parallel_write(xe_, map_, field_, val_) \ 724 xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 725 field_, val_) 726 727/** 728 * DOC: Multi Queue Group GuC interface 729 * 730 * The multi queue group coordination between KMD and GuC is through a software 731 * construct called Context Group Page (CGP). The CGP is a KMD managed 4KB page 732 * allocated in the global GTT. 733 * 734 * CGP format: 735 * 736 * +-----------+---------------------------+---------------------------------------------+ 737 * | DWORD | Name | Description | 738 * +-----------+---------------------------+---------------------------------------------+ 739 * | 0 | Version | Bits [15:8]=Major ver, [7:0]=Minor ver | 740 * +-----------+---------------------------+---------------------------------------------+ 741 * | 1..15 | RESERVED | MBZ | 742 * +-----------+---------------------------+---------------------------------------------+ 743 * | 16 | KMD_QUEUE_UPDATE_MASK_DW0 | KMD queue mask for queues 31..0 | 744 * +-----------+---------------------------+---------------------------------------------+ 745 * | 17 | KMD_QUEUE_UPDATE_MASK_DW1 | KMD queue mask for queues 63..32 | 746 * +-----------+---------------------------+---------------------------------------------+ 747 * | 18..31 | RESERVED | MBZ | 748 * +-----------+---------------------------+---------------------------------------------+ 749 * | 32 | Q0CD_DW0 | Queue 0 context LRC descriptor lower DWORD | 750 * +-----------+---------------------------+---------------------------------------------+ 751 * | 33 | Q0ContextIndex | Context ID for Queue 0 | 752 * +-----------+---------------------------+---------------------------------------------+ 753 * | 34 | Q1CD_DW0 | Queue 1 context LRC descriptor lower DWORD | 754 * +-----------+---------------------------+---------------------------------------------+ 755 * | 35 | Q1ContextIndex | Context ID for Queue 1 | 756 * +-----------+---------------------------+---------------------------------------------+ 757 * | ... |... | ... | 758 * +-----------+---------------------------+---------------------------------------------+ 759 * | 158 | Q63CD_DW0 | Queue 63 context LRC descriptor lower DWORD | 760 * +-----------+---------------------------+---------------------------------------------+ 761 * | 159 | Q63ContextIndex | Context ID for Queue 63 | 762 * +-----------+---------------------------+---------------------------------------------+ 763 * | 160..1024 | RESERVED | MBZ | 764 * +-----------+---------------------------+---------------------------------------------+ 765 * 766 * While registering Q0 with GuC, CGP is updated with Q0 entry and GuC is notified 767 * through XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE H2G message which specifies 768 * the CGP address. When the secondary queues are added to the group, the CGP is 769 * updated with entry for that queue and GuC is notified through the H2G interface 770 * XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC. GuC responds to these H2G messages 771 * with a XE_GUC_ACTION_NOTIFY_MULTIQ_CONTEXT_CGP_SYNC_DONE G2H message. GuC also 772 * sends a XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR notification for any 773 * error in the CGP. Only one of these CGP update messages can be outstanding 774 * (waiting for GuC response) at any time. The bits in KMD_QUEUE_UPDATE_MASK_DW* 775 * fields indicate which queue entry is being updated in the CGP. 776 * 777 * The primary queue (Q0) represents the multi queue group context in GuC and 778 * submission on any queue of the group must be through Q0 GuC interface only. 779 * 780 * As it is not required to register secondary queues with GuC, the secondary queue 781 * context ids in the CGP are populated with Q0 context id. 782 */ 783 784#define CGP_VERSION_MAJOR_SHIFT 8 785 786static void xe_guc_exec_queue_group_cgp_update(struct xe_device *xe, 787 struct xe_exec_queue *q) 788{ 789 struct xe_exec_queue_group *group = q->multi_queue.group; 790 u32 guc_id = group->primary->guc->id; 791 792 /* Currently implementing CGP version 1.0 */ 793 xe_map_wr(xe, &group->cgp_bo->vmap, 0, u32, 794 1 << CGP_VERSION_MAJOR_SHIFT); 795 796 xe_map_wr(xe, &group->cgp_bo->vmap, 797 (32 + q->multi_queue.pos * 2) * sizeof(u32), 798 u32, lower_32_bits(xe_lrc_descriptor(q->lrc[0]))); 799 800 xe_map_wr(xe, &group->cgp_bo->vmap, 801 (33 + q->multi_queue.pos * 2) * sizeof(u32), 802 u32, guc_id); 803 804 if (q->multi_queue.pos / 32) { 805 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), 806 u32, BIT(q->multi_queue.pos % 32)); 807 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), u32, 0); 808 } else { 809 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), 810 u32, BIT(q->multi_queue.pos)); 811 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), u32, 0); 812 } 813} 814 815static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc, 816 struct xe_exec_queue *q, 817 const u32 *action, u32 len) 818{ 819 struct xe_exec_queue_group *group = q->multi_queue.group; 820 struct xe_device *xe = guc_to_xe(guc); 821 enum xe_multi_queue_priority priority; 822 long ret; 823 824 /* 825 * As all queues of a multi queue group use single drm scheduler 826 * submit workqueue, CGP synchronization with GuC are serialized. 827 * Hence, no locking is required here. 828 * Wait for any pending CGP_SYNC_DONE response before updating the 829 * CGP page and sending CGP_SYNC message. 830 * 831 * FIXME: Support VF migration 832 */ 833 ret = wait_event_timeout(guc->ct.wq, 834 !READ_ONCE(group->sync_pending) || 835 xe_guc_read_stopped(guc), HZ); 836 if (!ret || xe_guc_read_stopped(guc)) { 837 /* CGP_SYNC failed. Reset gt, cleanup the group */ 838 xe_gt_warn(guc_to_gt(guc), "Wait for CGP_SYNC_DONE response failed!\n"); 839 set_exec_queue_group_banned(q); 840 xe_gt_reset_async(q->gt); 841 xe_guc_exec_queue_group_trigger_cleanup(q); 842 return; 843 } 844 845 scoped_guard(spinlock, &q->multi_queue.lock) 846 priority = q->multi_queue.priority; 847 848 xe_lrc_set_multi_queue_priority(q->lrc[0], priority); 849 xe_guc_exec_queue_group_cgp_update(xe, q); 850 851 WRITE_ONCE(group->sync_pending, true); 852 xe_guc_ct_send(&guc->ct, action, len, G2H_LEN_DW_MULTI_QUEUE_CONTEXT, 1); 853} 854 855static void __register_exec_queue_group(struct xe_guc *guc, 856 struct xe_exec_queue *q, 857 struct guc_ctxt_registration_info *info) 858{ 859#define MAX_MULTI_QUEUE_REG_SIZE (8) 860 u32 action[MAX_MULTI_QUEUE_REG_SIZE]; 861 int len = 0; 862 863 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE; 864 action[len++] = info->flags; 865 action[len++] = info->context_idx; 866 action[len++] = info->engine_class; 867 action[len++] = info->engine_submit_mask; 868 action[len++] = 0; /* Reserved */ 869 action[len++] = info->cgp_lo; 870 action[len++] = info->cgp_hi; 871 872 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_REG_SIZE); 873#undef MAX_MULTI_QUEUE_REG_SIZE 874 875 /* 876 * The above XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE do expect a 877 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response 878 * from guc. 879 */ 880 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 881} 882 883static void xe_guc_exec_queue_group_add(struct xe_guc *guc, 884 struct xe_exec_queue *q) 885{ 886#define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) 887 u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; 888 int len = 0; 889 890 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_multi_queue_secondary(q)); 891 892 action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; 893 action[len++] = q->multi_queue.group->primary->guc->id; 894 895 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); 896#undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE 897 898 /* 899 * The above XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC do expect a 900 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response 901 * from guc. 902 */ 903 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 904} 905 906static void __register_mlrc_exec_queue(struct xe_guc *guc, 907 struct xe_exec_queue *q, 908 struct guc_ctxt_registration_info *info) 909{ 910#define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) 911 u32 action[MAX_MLRC_REG_SIZE]; 912 int len = 0; 913 int i; 914 915 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q)); 916 917 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 918 action[len++] = info->flags; 919 action[len++] = info->context_idx; 920 action[len++] = info->engine_class; 921 action[len++] = info->engine_submit_mask; 922 action[len++] = info->wq_desc_lo; 923 action[len++] = info->wq_desc_hi; 924 action[len++] = info->wq_base_lo; 925 action[len++] = info->wq_base_hi; 926 action[len++] = info->wq_size; 927 action[len++] = q->width; 928 action[len++] = info->hwlrca_lo; 929 action[len++] = info->hwlrca_hi; 930 931 for (i = 1; i < q->width; ++i) { 932 struct xe_lrc *lrc = q->lrc[i]; 933 934 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); 935 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); 936 } 937 938 /* explicitly checks some fields that we might fixup later */ 939 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 940 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]); 941 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 942 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]); 943 xe_gt_assert(guc_to_gt(guc), q->width == 944 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]); 945 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 946 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]); 947 xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE); 948#undef MAX_MLRC_REG_SIZE 949 950 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 951} 952 953static void __register_exec_queue(struct xe_guc *guc, 954 struct guc_ctxt_registration_info *info) 955{ 956 u32 action[] = { 957 XE_GUC_ACTION_REGISTER_CONTEXT, 958 info->flags, 959 info->context_idx, 960 info->engine_class, 961 info->engine_submit_mask, 962 info->wq_desc_lo, 963 info->wq_desc_hi, 964 info->wq_base_lo, 965 info->wq_base_hi, 966 info->wq_size, 967 info->hwlrca_lo, 968 info->hwlrca_hi, 969 }; 970 971 /* explicitly checks some fields that we might fixup later */ 972 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 973 action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]); 974 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 975 action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]); 976 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 977 action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]); 978 979 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 980} 981 982static void register_exec_queue(struct xe_exec_queue *q, int ctx_type) 983{ 984 struct xe_guc *guc = exec_queue_to_guc(q); 985 struct xe_device *xe = guc_to_xe(guc); 986 struct xe_lrc *lrc = q->lrc[0]; 987 struct guc_ctxt_registration_info info; 988 989 xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q)); 990 xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT); 991 992 memset(&info, 0, sizeof(info)); 993 info.context_idx = q->guc->id; 994 info.engine_class = xe_engine_class_to_guc_class(q->class); 995 info.engine_submit_mask = q->logical_mask; 996 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); 997 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); 998 info.flags = CONTEXT_REGISTRATION_FLAG_KMD | 999 FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type); 1000 1001 if (xe_exec_queue_is_multi_queue(q)) { 1002 struct xe_exec_queue_group *group = q->multi_queue.group; 1003 1004 info.cgp_lo = xe_bo_ggtt_addr(group->cgp_bo); 1005 info.cgp_hi = 0; 1006 } 1007 1008 if (xe_exec_queue_is_parallel(q)) { 1009 u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); 1010 struct iosys_map map = xe_lrc_parallel_map(lrc); 1011 1012 info.wq_desc_lo = lower_32_bits(ggtt_addr + 1013 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 1014 info.wq_desc_hi = upper_32_bits(ggtt_addr + 1015 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 1016 info.wq_base_lo = lower_32_bits(ggtt_addr + 1017 offsetof(struct guc_submit_parallel_scratch, wq[0])); 1018 info.wq_base_hi = upper_32_bits(ggtt_addr + 1019 offsetof(struct guc_submit_parallel_scratch, wq[0])); 1020 info.wq_size = WQ_SIZE; 1021 1022 q->guc->wqi_head = 0; 1023 q->guc->wqi_tail = 0; 1024 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); 1025 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); 1026 } 1027 1028 set_exec_queue_registered(q); 1029 trace_xe_exec_queue_register(q); 1030 if (xe_exec_queue_is_multi_queue_primary(q)) 1031 __register_exec_queue_group(guc, q, &info); 1032 else if (xe_exec_queue_is_parallel(q)) 1033 __register_mlrc_exec_queue(guc, q, &info); 1034 else if (!xe_exec_queue_is_multi_queue_secondary(q)) 1035 __register_exec_queue(guc, &info); 1036 1037 if (!xe_exec_queue_is_multi_queue_secondary(q)) 1038 init_policies(guc, q); 1039 1040 if (xe_exec_queue_is_multi_queue_secondary(q)) 1041 xe_guc_exec_queue_group_add(guc, q); 1042} 1043 1044static u32 wq_space_until_wrap(struct xe_exec_queue *q) 1045{ 1046 return (WQ_SIZE - q->guc->wqi_tail); 1047} 1048 1049static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) 1050{ 1051 struct xe_guc *guc = exec_queue_to_guc(q); 1052 struct xe_device *xe = guc_to_xe(guc); 1053 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1054 unsigned int sleep_period_ms = 1, sleep_total_ms = 0; 1055 1056#define AVAILABLE_SPACE \ 1057 CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) 1058 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 1059try_again: 1060 q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); 1061 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 1062 if (sleep_total_ms > 2000) { 1063 xe_gt_reset_async(q->gt); 1064 return -ENODEV; 1065 } 1066 1067 sleep_total_ms += xe_sleep_exponential_ms(&sleep_period_ms, 64); 1068 goto try_again; 1069 } 1070 } 1071#undef AVAILABLE_SPACE 1072 1073 return 0; 1074} 1075 1076static int wq_noop_append(struct xe_exec_queue *q) 1077{ 1078 struct xe_guc *guc = exec_queue_to_guc(q); 1079 struct xe_device *xe = guc_to_xe(guc); 1080 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1081 u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; 1082 1083 if (wq_wait_for_space(q, wq_space_until_wrap(q))) 1084 return -ENODEV; 1085 1086 xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw)); 1087 1088 parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], 1089 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 1090 FIELD_PREP(WQ_LEN_MASK, len_dw)); 1091 q->guc->wqi_tail = 0; 1092 1093 return 0; 1094} 1095 1096static void wq_item_append(struct xe_exec_queue *q) 1097{ 1098 struct xe_guc *guc = exec_queue_to_guc(q); 1099 struct xe_device *xe = guc_to_xe(guc); 1100 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1101#define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ 1102 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; 1103 u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); 1104 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 1105 int i = 0, j; 1106 1107 if (wqi_size > wq_space_until_wrap(q)) { 1108 if (wq_noop_append(q)) 1109 return; 1110 } 1111 if (wq_wait_for_space(q, wqi_size)) 1112 return; 1113 1114 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 1115 FIELD_PREP(WQ_LEN_MASK, len_dw); 1116 wqi[i++] = xe_lrc_descriptor(q->lrc[0]); 1117 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | 1118 FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64)); 1119 wqi[i++] = 0; 1120 for (j = 1; j < q->width; ++j) { 1121 struct xe_lrc *lrc = q->lrc[j]; 1122 1123 wqi[i++] = lrc->ring.tail / sizeof(u64); 1124 } 1125 1126 xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32)); 1127 1128 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, 1129 wq[q->guc->wqi_tail / sizeof(u32)])); 1130 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); 1131 q->guc->wqi_tail += wqi_size; 1132 xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE); 1133 1134 xe_device_wmb(xe); 1135 1136 map = xe_lrc_parallel_map(q->lrc[0]); 1137 parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); 1138} 1139 1140#define RESUME_PENDING ~0x0ull 1141static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job) 1142{ 1143 struct xe_guc *guc = exec_queue_to_guc(q); 1144 struct xe_lrc *lrc = q->lrc[0]; 1145 u32 action[3]; 1146 u32 g2h_len = 0; 1147 u32 num_g2h = 0; 1148 int len = 0; 1149 bool extra_submit = false; 1150 1151 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1152 1153 if (!job->restore_replay || job->last_replay) { 1154 if (xe_exec_queue_is_parallel(q)) 1155 wq_item_append(q); 1156 else if (!exec_queue_idle_skip_suspend(q)) 1157 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1158 job->last_replay = false; 1159 } 1160 1161 if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) 1162 return; 1163 1164 /* 1165 * All queues in a multi-queue group will use the primary queue 1166 * of the group to interface with GuC. 1167 */ 1168 q = xe_exec_queue_multi_queue_primary(q); 1169 1170 if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { 1171 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 1172 action[len++] = q->guc->id; 1173 action[len++] = GUC_CONTEXT_ENABLE; 1174 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 1175 num_g2h = 1; 1176 if (xe_exec_queue_is_parallel(q)) 1177 extra_submit = true; 1178 1179 q->guc->resume_time = RESUME_PENDING; 1180 set_exec_queue_pending_enable(q); 1181 set_exec_queue_enabled(q); 1182 trace_xe_exec_queue_scheduling_enable(q); 1183 } else { 1184 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 1185 action[len++] = q->guc->id; 1186 trace_xe_exec_queue_submit(q); 1187 } 1188 1189 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); 1190 1191 if (extra_submit) { 1192 len = 0; 1193 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 1194 action[len++] = q->guc->id; 1195 trace_xe_exec_queue_submit(q); 1196 1197 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 1198 } 1199} 1200 1201static struct dma_fence * 1202guc_exec_queue_run_job(struct drm_sched_job *drm_job) 1203{ 1204 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1205 struct xe_exec_queue *q = job->q; 1206 struct xe_guc *guc = exec_queue_to_guc(q); 1207 bool killed_or_banned_or_wedged = 1208 exec_queue_killed_or_banned_or_wedged(q); 1209 1210 xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || 1211 exec_queue_banned(q) || exec_queue_suspended(q)); 1212 1213 trace_xe_sched_job_run(job); 1214 1215 if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) { 1216 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1217 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 1218 1219 if (exec_queue_killed_or_banned_or_wedged(primary)) { 1220 killed_or_banned_or_wedged = true; 1221 goto run_job_out; 1222 } 1223 1224 if (!exec_queue_registered(primary)) 1225 register_exec_queue(primary, GUC_CONTEXT_NORMAL); 1226 } 1227 1228 if (!exec_queue_registered(q)) 1229 register_exec_queue(q, GUC_CONTEXT_NORMAL); 1230 if (!job->restore_replay) 1231 q->ring_ops->emit_job(job); 1232 submit_exec_queue(q, job); 1233 job->restore_replay = false; 1234 } 1235 1236run_job_out: 1237 1238 return job->fence; 1239} 1240 1241static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) 1242{ 1243 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1244 1245 trace_xe_sched_job_free(job); 1246 xe_sched_job_put(job); 1247} 1248 1249int xe_guc_read_stopped(struct xe_guc *guc) 1250{ 1251 return atomic_read(&guc->submission_state.stopped); 1252} 1253 1254static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, 1255 struct xe_exec_queue *q, 1256 u32 runnable_state); 1257static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q); 1258 1259#define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ 1260 u32 action[] = { \ 1261 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ 1262 q->guc->id, \ 1263 GUC_CONTEXT_##enable_disable, \ 1264 } 1265 1266static void disable_scheduling_deregister(struct xe_guc *guc, 1267 struct xe_exec_queue *q) 1268{ 1269 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1270 int ret; 1271 1272 if (!xe_exec_queue_is_multi_queue_secondary(q)) 1273 set_min_preemption_timeout(guc, q); 1274 1275 smp_rmb(); 1276 ret = wait_event_timeout(guc->ct.wq, 1277 (!exec_queue_pending_enable(q) && 1278 !exec_queue_pending_disable(q)) || 1279 xe_guc_read_stopped(guc) || 1280 vf_recovery(guc), 1281 HZ * 5); 1282 if (!ret && !vf_recovery(guc)) { 1283 struct xe_gpu_scheduler *sched = &q->guc->sched; 1284 1285 xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n"); 1286 xe_sched_submission_start(sched); 1287 xe_gt_reset_async(q->gt); 1288 xe_sched_tdr_queue_imm(sched); 1289 return; 1290 } 1291 1292 clear_exec_queue_enabled(q); 1293 set_exec_queue_pending_disable(q); 1294 set_exec_queue_destroyed(q); 1295 trace_xe_exec_queue_scheduling_disable(q); 1296 1297 /* 1298 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H 1299 * handler and we are not allowed to reserved G2H space in handlers. 1300 */ 1301 if (xe_exec_queue_is_multi_queue_secondary(q)) 1302 handle_multi_queue_secondary_sched_done(guc, q, 0); 1303 else 1304 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1305 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + 1306 G2H_LEN_DW_DEREGISTER_CONTEXT, 2); 1307} 1308 1309/** 1310 * xe_guc_submit_wedge() - Wedge GuC submission 1311 * @guc: the GuC object 1312 * 1313 * Save exec queue's registered with GuC state by taking a ref to each queue. 1314 * Register a DRMM handler to drop refs upon driver unload. 1315 */ 1316void xe_guc_submit_wedge(struct xe_guc *guc) 1317{ 1318 struct xe_device *xe = guc_to_xe(guc); 1319 struct xe_exec_queue *q; 1320 unsigned long index; 1321 1322 xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); 1323 1324 /* 1325 * If device is being wedged even before submission_state is 1326 * initialized, there's nothing to do here. 1327 */ 1328 if (!guc->submission_state.initialized) 1329 return; 1330 1331 if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) { 1332 mutex_lock(&guc->submission_state.lock); 1333 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1334 if (xe_exec_queue_get_unless_zero(q)) 1335 set_exec_queue_wedged(q); 1336 mutex_unlock(&guc->submission_state.lock); 1337 } else { 1338 /* Forcefully kill any remaining exec queues, signal fences */ 1339 guc_submit_reset_prepare(guc); 1340 xe_guc_submit_stop(guc); 1341 xe_guc_softreset(guc); 1342 xe_uc_fw_sanitize(&guc->fw); 1343 xe_guc_submit_pause_abort(guc); 1344 } 1345} 1346 1347static bool guc_submit_hint_wedged(struct xe_guc *guc) 1348{ 1349 struct xe_device *xe = guc_to_xe(guc); 1350 1351 if (xe->wedged.mode != XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) 1352 return false; 1353 1354 if (xe_device_wedged(xe)) 1355 return true; 1356 1357 xe_device_declare_wedged(xe); 1358 1359 return true; 1360} 1361 1362#define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100) 1363 1364static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) 1365{ 1366 struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); 1367 u32 ctx_timestamp, ctx_job_timestamp; 1368 u32 timeout_ms = q->sched_props.job_timeout_ms; 1369 u32 diff; 1370 u64 running_time_ms; 1371 1372 if (!xe_sched_job_started(job)) { 1373 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started", 1374 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1375 q->guc->id); 1376 1377 return xe_sched_invalidate_job(job, 2); 1378 } 1379 1380 ctx_timestamp = lower_32_bits(xe_lrc_timestamp(q->lrc[0])); 1381 if (ctx_timestamp == job->sample_timestamp) { 1382 if (IS_SRIOV_VF(gt_to_xe(gt))) 1383 xe_gt_notice(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck", 1384 xe_sched_job_seqno(job), 1385 xe_sched_job_lrc_seqno(job), q->guc->id); 1386 else 1387 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck", 1388 xe_sched_job_seqno(job), 1389 xe_sched_job_lrc_seqno(job), q->guc->id); 1390 1391 return xe_sched_invalidate_job(job, 0); 1392 } 1393 1394 job->sample_timestamp = ctx_timestamp; 1395 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); 1396 1397 /* 1398 * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch 1399 * possible overflows with a high timeout. 1400 */ 1401 xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); 1402 1403 diff = ctx_timestamp - ctx_job_timestamp; 1404 1405 /* 1406 * Ensure timeout is within 5% to account for an GuC scheduling latency 1407 */ 1408 running_time_ms = 1409 ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff)); 1410 1411 xe_gt_dbg(gt, 1412 "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x", 1413 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1414 q->guc->id, running_time_ms, timeout_ms, diff); 1415 1416 return running_time_ms >= timeout_ms; 1417} 1418 1419static void enable_scheduling(struct xe_exec_queue *q) 1420{ 1421 MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); 1422 struct xe_guc *guc = exec_queue_to_guc(q); 1423 int ret; 1424 1425 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1426 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1427 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1428 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 1429 1430 set_exec_queue_pending_enable(q); 1431 set_exec_queue_enabled(q); 1432 trace_xe_exec_queue_scheduling_enable(q); 1433 1434 if (xe_exec_queue_is_multi_queue_secondary(q)) 1435 handle_multi_queue_secondary_sched_done(guc, q, 1); 1436 else 1437 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1438 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1439 1440 ret = wait_event_timeout(guc->ct.wq, 1441 !exec_queue_pending_enable(q) || 1442 xe_guc_read_stopped(guc) || 1443 vf_recovery(guc), HZ * 5); 1444 if ((!ret && !vf_recovery(guc)) || xe_guc_read_stopped(guc)) { 1445 xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); 1446 set_exec_queue_banned(q); 1447 xe_gt_reset_async(q->gt); 1448 xe_sched_tdr_queue_imm(&q->guc->sched); 1449 } 1450} 1451 1452static void disable_scheduling(struct xe_exec_queue *q, bool immediate) 1453{ 1454 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1455 struct xe_guc *guc = exec_queue_to_guc(q); 1456 1457 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1458 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1459 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1460 1461 if (immediate && !xe_exec_queue_is_multi_queue_secondary(q)) 1462 set_min_preemption_timeout(guc, q); 1463 clear_exec_queue_enabled(q); 1464 set_exec_queue_pending_disable(q); 1465 trace_xe_exec_queue_scheduling_disable(q); 1466 1467 if (xe_exec_queue_is_multi_queue_secondary(q)) 1468 handle_multi_queue_secondary_sched_done(guc, q, 0); 1469 else 1470 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1471 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1472} 1473 1474static enum drm_gpu_sched_stat 1475guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) 1476{ 1477 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1478 struct drm_sched_job *tmp_job; 1479 struct xe_exec_queue *q = job->q, *primary; 1480 struct xe_gpu_scheduler *sched = &q->guc->sched; 1481 struct xe_guc *guc = exec_queue_to_guc(q); 1482 const char *process_name = "no process"; 1483 struct xe_device *xe = guc_to_xe(guc); 1484 int err = -ETIME; 1485 pid_t pid = -1; 1486 bool wedged = false, skip_timeout_check; 1487 1488 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1489 1490 primary = xe_exec_queue_multi_queue_primary(q); 1491 1492 /* 1493 * TDR has fired before free job worker. Common if exec queue 1494 * immediately closed after last fence signaled. Add back to pending 1495 * list so job can be freed and kick scheduler ensuring free job is not 1496 * lost. 1497 */ 1498 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags) || 1499 vf_recovery(guc)) 1500 return DRM_GPU_SCHED_STAT_NO_HANG; 1501 1502 /* Kill the run_job entry point */ 1503 if (xe_exec_queue_is_multi_queue(q)) 1504 xe_guc_exec_queue_group_stop(q); 1505 else 1506 xe_sched_submission_stop(sched); 1507 1508 /* Must check all state after stopping scheduler */ 1509 skip_timeout_check = exec_queue_reset(q) || 1510 exec_queue_killed_or_banned_or_wedged(q); 1511 1512 /* Skip timeout check if multi-queue group is banned */ 1513 if (xe_exec_queue_is_multi_queue(q) && 1514 READ_ONCE(q->multi_queue.group->banned)) 1515 skip_timeout_check = true; 1516 1517 /* LR jobs can only get here if queue has been killed or hit an error */ 1518 if (xe_exec_queue_is_lr(q)) 1519 xe_gt_assert(guc_to_gt(guc), skip_timeout_check); 1520 1521 /* 1522 * If devcoredump not captured and GuC capture for the job is not ready 1523 * do manual capture first and decide later if we need to use it 1524 */ 1525 if (!exec_queue_killed(q) && !xe->devcoredump.captured && 1526 !xe_guc_capture_get_matching_and_lock(q)) { 1527 /* take force wake before engine register manual capture */ 1528 CLASS(xe_force_wake, fw_ref)(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); 1529 if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) 1530 xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n"); 1531 1532 xe_engine_snapshot_capture_for_queue(q); 1533 } 1534 1535 /* 1536 * Check if job is actually timed out, if so restart job execution and TDR 1537 */ 1538 if (!skip_timeout_check && !check_timeout(q, job)) 1539 goto rearm; 1540 1541 if (!exec_queue_killed(q)) 1542 wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); 1543 1544 set_exec_queue_banned(q); 1545 1546 /* Kick job / queue off hardware */ 1547 if (!wedged && (exec_queue_enabled(primary) || 1548 exec_queue_pending_disable(primary))) { 1549 int ret; 1550 1551 if (exec_queue_reset(primary)) 1552 err = -EIO; 1553 1554 if (xe_uc_fw_is_running(&guc->fw)) { 1555 /* 1556 * Wait for any pending G2H to flush out before 1557 * modifying state 1558 */ 1559 ret = wait_event_timeout(guc->ct.wq, 1560 (!exec_queue_pending_enable(primary) && 1561 !exec_queue_pending_disable(primary)) || 1562 xe_guc_read_stopped(guc) || 1563 vf_recovery(guc), HZ * 5); 1564 if (vf_recovery(guc)) 1565 goto handle_vf_resume; 1566 if (!ret || xe_guc_read_stopped(guc)) 1567 goto trigger_reset; 1568 1569 disable_scheduling(primary, skip_timeout_check); 1570 } 1571 1572 /* 1573 * Must wait for scheduling to be disabled before signalling 1574 * any fences, if GT broken the GT reset code should signal us. 1575 * 1576 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault 1577 * error) messages which can cause the schedule disable to get 1578 * lost. If this occurs, trigger a GT reset to recover. 1579 */ 1580 smp_rmb(); 1581 ret = wait_event_timeout(guc->ct.wq, 1582 !xe_uc_fw_is_running(&guc->fw) || 1583 !exec_queue_pending_disable(primary) || 1584 xe_guc_read_stopped(guc) || 1585 vf_recovery(guc), HZ * 5); 1586 if (vf_recovery(guc)) 1587 goto handle_vf_resume; 1588 if (!ret || xe_guc_read_stopped(guc)) { 1589trigger_reset: 1590 if (!ret) 1591 xe_gt_warn(guc_to_gt(guc), 1592 "Schedule disable failed to respond, guc_id=%d", 1593 primary->guc->id); 1594 xe_devcoredump(primary, job, 1595 "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d", 1596 primary->guc->id, ret, xe_guc_read_stopped(guc)); 1597 xe_gt_reset_async(primary->gt); 1598 xe_sched_tdr_queue_imm(sched); 1599 goto rearm; 1600 } 1601 } 1602 1603 if (q->vm && q->vm->xef) { 1604 process_name = q->vm->xef->process_name; 1605 pid = q->vm->xef->pid; 1606 } 1607 1608 if (!exec_queue_killed(q)) 1609 xe_gt_notice(guc_to_gt(guc), 1610 "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]", 1611 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1612 q->guc->id, q->flags, process_name, pid); 1613 1614 trace_xe_sched_job_timedout(job); 1615 1616 if (!exec_queue_killed(q)) 1617 xe_devcoredump(q, job, 1618 "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", 1619 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1620 q->guc->id, q->flags); 1621 1622 /* 1623 * Kernel jobs should never fail, nor should VM jobs if they do 1624 * somethings has gone wrong and the GT needs a reset 1625 */ 1626 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, 1627 "Kernel-submitted job timed out\n"); 1628 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), 1629 "VM job timed out on non-killed execqueue\n"); 1630 if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || 1631 (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { 1632 if (!xe_sched_invalidate_job(job, 2)) { 1633 xe_gt_reset_async(q->gt); 1634 goto rearm; 1635 } 1636 } 1637 1638 /* Mark all outstanding jobs as bad, thus completing them */ 1639 xe_sched_job_set_error(job, err); 1640 drm_sched_for_each_pending_job(tmp_job, &sched->base, NULL) 1641 xe_sched_job_set_error(to_xe_sched_job(tmp_job), -ECANCELED); 1642 1643 if (xe_exec_queue_is_multi_queue(q)) { 1644 xe_guc_exec_queue_group_start(q); 1645 xe_guc_exec_queue_group_trigger_cleanup(q); 1646 } else { 1647 xe_sched_submission_start(sched); 1648 xe_guc_exec_queue_trigger_cleanup(q); 1649 } 1650 1651 /* 1652 * We want the job added back to the pending list so it gets freed; this 1653 * is what DRM_GPU_SCHED_STAT_NO_HANG does. 1654 */ 1655 return DRM_GPU_SCHED_STAT_NO_HANG; 1656 1657rearm: 1658 /* 1659 * XXX: Ideally want to adjust timeout based on current execution time 1660 * but there is not currently an easy way to do in DRM scheduler. With 1661 * some thought, do this in a follow up. 1662 */ 1663 if (xe_exec_queue_is_multi_queue(q)) 1664 xe_guc_exec_queue_group_start(q); 1665 else 1666 xe_sched_submission_start(sched); 1667handle_vf_resume: 1668 return DRM_GPU_SCHED_STAT_NO_HANG; 1669} 1670 1671static void guc_exec_queue_fini(struct xe_exec_queue *q) 1672{ 1673 struct xe_guc_exec_queue *ge = q->guc; 1674 struct xe_guc *guc = exec_queue_to_guc(q); 1675 1676 release_guc_id(guc, q); 1677 xe_sched_entity_fini(&ge->entity); 1678 xe_sched_fini(&ge->sched); 1679 1680 /* 1681 * RCU free due sched being exported via DRM scheduler fences 1682 * (timeline name). 1683 */ 1684 kfree_rcu(ge, rcu); 1685} 1686 1687static void __guc_exec_queue_destroy_async(struct work_struct *w) 1688{ 1689 struct xe_guc_exec_queue *ge = 1690 container_of(w, struct xe_guc_exec_queue, destroy_async); 1691 struct xe_exec_queue *q = ge->q; 1692 struct xe_guc *guc = exec_queue_to_guc(q); 1693 1694 guard(xe_pm_runtime)(guc_to_xe(guc)); 1695 trace_xe_exec_queue_destroy(q); 1696 1697 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1698 struct xe_exec_queue_group *group = q->multi_queue.group; 1699 1700 mutex_lock(&group->list_lock); 1701 list_del(&q->multi_queue.link); 1702 mutex_unlock(&group->list_lock); 1703 } 1704 1705 /* Confirm no work left behind accessing device structures */ 1706 cancel_delayed_work_sync(&ge->sched.base.work_tdr); 1707 1708 xe_exec_queue_fini(q); 1709} 1710 1711static void guc_exec_queue_destroy_async(struct xe_exec_queue *q) 1712{ 1713 struct xe_guc *guc = exec_queue_to_guc(q); 1714 struct xe_device *xe = guc_to_xe(guc); 1715 1716 INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async); 1717 1718 /* We must block on kernel engines so slabs are empty on driver unload */ 1719 if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) 1720 __guc_exec_queue_destroy_async(&q->guc->destroy_async); 1721 else 1722 queue_work(xe->destroy_wq, &q->guc->destroy_async); 1723} 1724 1725static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q) 1726{ 1727 /* 1728 * Might be done from within the GPU scheduler, need to do async as we 1729 * fini the scheduler when the engine is fini'd, the scheduler can't 1730 * complete fini within itself (circular dependency). Async resolves 1731 * this we and don't really care when everything is fini'd, just that it 1732 * is. 1733 */ 1734 guc_exec_queue_destroy_async(q); 1735} 1736 1737static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) 1738{ 1739 struct xe_exec_queue *q = msg->private_data; 1740 struct xe_guc *guc = exec_queue_to_guc(q); 1741 1742 xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 1743 trace_xe_exec_queue_cleanup_entity(q); 1744 1745 /* 1746 * Expected state transitions for cleanup: 1747 * - If the exec queue is registered and GuC firmware is running, we must first 1748 * disable scheduling and deregister the queue to ensure proper teardown and 1749 * resource release in the GuC, then destroy the exec queue on driver side. 1750 * - If the GuC is already stopped (e.g., during driver unload or GPU reset), 1751 * we cannot expect a response for the deregister request. In this case, 1752 * it is safe to directly destroy the exec queue on driver side, as the GuC 1753 * will not process further requests and all resources must be cleaned up locally. 1754 */ 1755 if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw)) 1756 disable_scheduling_deregister(guc, q); 1757 else 1758 __guc_exec_queue_destroy(guc, q); 1759} 1760 1761static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) 1762{ 1763 return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q); 1764} 1765 1766static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) 1767{ 1768 struct xe_exec_queue *q = msg->private_data; 1769 struct xe_guc *guc = exec_queue_to_guc(q); 1770 1771 if (guc_exec_queue_allowed_to_change_state(q)) 1772 init_policies(guc, q); 1773 kfree(msg); 1774} 1775 1776static void __suspend_fence_signal(struct xe_exec_queue *q) 1777{ 1778 struct xe_guc *guc = exec_queue_to_guc(q); 1779 struct xe_device *xe = guc_to_xe(guc); 1780 1781 if (!q->guc->suspend_pending) 1782 return; 1783 1784 WRITE_ONCE(q->guc->suspend_pending, false); 1785 1786 /* 1787 * We use a GuC shared wait queue for VFs because the VF resfix start 1788 * interrupt must be able to wake all instances of suspend_wait. This 1789 * prevents the VF migration worker from being starved during 1790 * scheduling. 1791 */ 1792 if (IS_SRIOV_VF(xe)) 1793 wake_up_all(&guc->ct.wq); 1794 else 1795 wake_up(&q->guc->suspend_wait); 1796} 1797 1798static void suspend_fence_signal(struct xe_exec_queue *q) 1799{ 1800 struct xe_guc *guc = exec_queue_to_guc(q); 1801 1802 xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) || 1803 xe_guc_read_stopped(guc)); 1804 xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending); 1805 1806 __suspend_fence_signal(q); 1807} 1808 1809static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) 1810{ 1811 struct xe_exec_queue *q = msg->private_data; 1812 struct xe_guc *guc = exec_queue_to_guc(q); 1813 bool idle_skip_suspend = xe_exec_queue_idle_skip_suspend(q); 1814 1815 if (!idle_skip_suspend && guc_exec_queue_allowed_to_change_state(q) && 1816 !exec_queue_suspended(q) && exec_queue_enabled(q)) { 1817 wait_event(guc->ct.wq, vf_recovery(guc) || 1818 ((q->guc->resume_time != RESUME_PENDING || 1819 xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q))); 1820 1821 if (!xe_guc_read_stopped(guc)) { 1822 s64 since_resume_ms = 1823 ktime_ms_delta(ktime_get(), 1824 q->guc->resume_time); 1825 s64 wait_ms = q->vm->preempt.min_run_period_ms - 1826 since_resume_ms; 1827 1828 if (wait_ms > 0 && q->guc->resume_time) 1829 xe_sleep_relaxed_ms(wait_ms); 1830 1831 set_exec_queue_suspended(q); 1832 disable_scheduling(q, false); 1833 } 1834 } else if (q->guc->suspend_pending) { 1835 if (idle_skip_suspend) 1836 set_exec_queue_idle_skip_suspend(q); 1837 set_exec_queue_suspended(q); 1838 suspend_fence_signal(q); 1839 } 1840} 1841 1842static void sched_context(struct xe_exec_queue *q) 1843{ 1844 struct xe_guc *guc = exec_queue_to_guc(q); 1845 struct xe_lrc *lrc = q->lrc[0]; 1846 u32 action[] = { 1847 XE_GUC_ACTION_SCHED_CONTEXT, 1848 q->guc->id, 1849 }; 1850 1851 xe_gt_assert(guc_to_gt(guc), !xe_exec_queue_is_parallel(q)); 1852 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1853 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1854 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1855 1856 trace_xe_exec_queue_submit(q); 1857 1858 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1859 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 1860} 1861 1862static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) 1863{ 1864 struct xe_exec_queue *q = msg->private_data; 1865 1866 if (guc_exec_queue_allowed_to_change_state(q)) { 1867 clear_exec_queue_suspended(q); 1868 if (!exec_queue_enabled(q)) { 1869 if (exec_queue_idle_skip_suspend(q)) { 1870 struct xe_lrc *lrc = q->lrc[0]; 1871 1872 clear_exec_queue_idle_skip_suspend(q); 1873 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1874 } 1875 q->guc->resume_time = RESUME_PENDING; 1876 set_exec_queue_pending_resume(q); 1877 enable_scheduling(q); 1878 } else if (exec_queue_idle_skip_suspend(q)) { 1879 clear_exec_queue_idle_skip_suspend(q); 1880 sched_context(q); 1881 } 1882 } else { 1883 clear_exec_queue_suspended(q); 1884 clear_exec_queue_idle_skip_suspend(q); 1885 } 1886} 1887 1888static void __guc_exec_queue_process_msg_set_multi_queue_priority(struct xe_sched_msg *msg) 1889{ 1890 struct xe_exec_queue *q = msg->private_data; 1891 1892 if (guc_exec_queue_allowed_to_change_state(q)) { 1893#define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) 1894 struct xe_guc *guc = exec_queue_to_guc(q); 1895 struct xe_exec_queue_group *group = q->multi_queue.group; 1896 u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; 1897 int len = 0; 1898 1899 action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; 1900 action[len++] = group->primary->guc->id; 1901 1902 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); 1903#undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE 1904 1905 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 1906 } 1907 1908 kfree(msg); 1909} 1910 1911#define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ 1912#define SET_SCHED_PROPS 2 1913#define SUSPEND 3 1914#define RESUME 4 1915#define SET_MULTI_QUEUE_PRIORITY 5 1916#define OPCODE_MASK 0xf 1917#define MSG_LOCKED BIT(8) 1918#define MSG_HEAD BIT(9) 1919 1920static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) 1921{ 1922 struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data)); 1923 1924 trace_xe_sched_msg_recv(msg); 1925 1926 switch (msg->opcode) { 1927 case CLEANUP: 1928 __guc_exec_queue_process_msg_cleanup(msg); 1929 break; 1930 case SET_SCHED_PROPS: 1931 __guc_exec_queue_process_msg_set_sched_props(msg); 1932 break; 1933 case SUSPEND: 1934 __guc_exec_queue_process_msg_suspend(msg); 1935 break; 1936 case RESUME: 1937 __guc_exec_queue_process_msg_resume(msg); 1938 break; 1939 case SET_MULTI_QUEUE_PRIORITY: 1940 __guc_exec_queue_process_msg_set_multi_queue_priority(msg); 1941 break; 1942 default: 1943 XE_WARN_ON("Unknown message type"); 1944 } 1945 1946 xe_pm_runtime_put(xe); 1947} 1948 1949static const struct drm_sched_backend_ops drm_sched_ops = { 1950 .run_job = guc_exec_queue_run_job, 1951 .free_job = guc_exec_queue_free_job, 1952 .timedout_job = guc_exec_queue_timedout_job, 1953}; 1954 1955static const struct xe_sched_backend_ops xe_sched_ops = { 1956 .process_msg = guc_exec_queue_process_msg, 1957}; 1958 1959static int guc_exec_queue_init(struct xe_exec_queue *q) 1960{ 1961 struct xe_gpu_scheduler *sched; 1962 struct xe_guc *guc = exec_queue_to_guc(q); 1963 struct workqueue_struct *submit_wq = NULL; 1964 struct xe_guc_exec_queue *ge; 1965 long timeout; 1966 int err, i; 1967 1968 xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc))); 1969 1970 ge = kzalloc_obj(*ge); 1971 if (!ge) 1972 return -ENOMEM; 1973 1974 q->guc = ge; 1975 ge->q = q; 1976 init_rcu_head(&ge->rcu); 1977 init_waitqueue_head(&ge->suspend_wait); 1978 1979 for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) 1980 INIT_LIST_HEAD(&ge->static_msgs[i].link); 1981 1982 timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : 1983 msecs_to_jiffies(q->sched_props.job_timeout_ms); 1984 1985 /* 1986 * Use primary queue's submit_wq for all secondary queues of a 1987 * multi queue group. This serialization avoids any locking around 1988 * CGP synchronization with GuC. 1989 */ 1990 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1991 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 1992 1993 submit_wq = primary->guc->sched.base.submit_wq; 1994 } 1995 1996 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, 1997 submit_wq, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64, 1998 timeout, guc_to_gt(guc)->ordered_wq, NULL, 1999 q->name, gt_to_xe(q->gt)->drm.dev); 2000 if (err) 2001 goto err_free; 2002 2003 sched = &ge->sched; 2004 err = xe_sched_entity_init(&ge->entity, sched); 2005 if (err) 2006 goto err_sched; 2007 2008 mutex_lock(&guc->submission_state.lock); 2009 2010 err = alloc_guc_id(guc, q); 2011 if (err) 2012 goto err_entity; 2013 2014 q->entity = &ge->entity; 2015 2016 if (xe_guc_read_stopped(guc) || vf_recovery(guc)) 2017 xe_sched_stop(sched); 2018 2019 mutex_unlock(&guc->submission_state.lock); 2020 2021 xe_exec_queue_assign_name(q, q->guc->id); 2022 2023 /* 2024 * Maintain secondary queues of the multi queue group in a list 2025 * for handling dependencies across the queues in the group. 2026 */ 2027 if (xe_exec_queue_is_multi_queue_secondary(q)) { 2028 struct xe_exec_queue_group *group = q->multi_queue.group; 2029 2030 INIT_LIST_HEAD(&q->multi_queue.link); 2031 mutex_lock(&group->list_lock); 2032 if (group->stopped) 2033 WRITE_ONCE(q->guc->sched.base.pause_submit, true); 2034 list_add_tail(&q->multi_queue.link, &group->list); 2035 mutex_unlock(&group->list_lock); 2036 } 2037 2038 if (xe_exec_queue_is_multi_queue(q)) 2039 trace_xe_exec_queue_create_multi_queue(q); 2040 else 2041 trace_xe_exec_queue_create(q); 2042 2043 return 0; 2044 2045err_entity: 2046 mutex_unlock(&guc->submission_state.lock); 2047 xe_sched_entity_fini(&ge->entity); 2048err_sched: 2049 xe_sched_fini(&ge->sched); 2050err_free: 2051 kfree(ge); 2052 2053 return err; 2054} 2055 2056static void guc_exec_queue_kill(struct xe_exec_queue *q) 2057{ 2058 trace_xe_exec_queue_kill(q); 2059 set_exec_queue_killed(q); 2060 __suspend_fence_signal(q); 2061 xe_guc_exec_queue_trigger_cleanup(q); 2062} 2063 2064static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, 2065 u32 opcode) 2066{ 2067 xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); 2068 2069 INIT_LIST_HEAD(&msg->link); 2070 msg->opcode = opcode & OPCODE_MASK; 2071 msg->private_data = q; 2072 2073 trace_xe_sched_msg_add(msg); 2074 if (opcode & MSG_HEAD) 2075 xe_sched_add_msg_head(&q->guc->sched, msg); 2076 else if (opcode & MSG_LOCKED) 2077 xe_sched_add_msg_locked(&q->guc->sched, msg); 2078 else 2079 xe_sched_add_msg(&q->guc->sched, msg); 2080} 2081 2082static void guc_exec_queue_try_add_msg_head(struct xe_exec_queue *q, 2083 struct xe_sched_msg *msg, 2084 u32 opcode) 2085{ 2086 if (!list_empty(&msg->link)) 2087 return; 2088 2089 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED | MSG_HEAD); 2090} 2091 2092static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, 2093 struct xe_sched_msg *msg, 2094 u32 opcode) 2095{ 2096 if (!list_empty(&msg->link)) 2097 return false; 2098 2099 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED); 2100 2101 return true; 2102} 2103 2104#define STATIC_MSG_CLEANUP 0 2105#define STATIC_MSG_SUSPEND 1 2106#define STATIC_MSG_RESUME 2 2107static void guc_exec_queue_destroy(struct xe_exec_queue *q) 2108{ 2109 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2110 2111 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) 2112 guc_exec_queue_add_msg(q, msg, CLEANUP); 2113 else 2114 __guc_exec_queue_destroy(exec_queue_to_guc(q), q); 2115} 2116 2117static int guc_exec_queue_set_priority(struct xe_exec_queue *q, 2118 enum xe_exec_queue_priority priority) 2119{ 2120 struct xe_sched_msg *msg; 2121 2122 if (q->sched_props.priority == priority || 2123 exec_queue_killed_or_banned_or_wedged(q)) 2124 return 0; 2125 2126 msg = kmalloc_obj(*msg); 2127 if (!msg) 2128 return -ENOMEM; 2129 2130 q->sched_props.priority = priority; 2131 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2132 2133 return 0; 2134} 2135 2136static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) 2137{ 2138 struct xe_sched_msg *msg; 2139 2140 if (q->sched_props.timeslice_us == timeslice_us || 2141 exec_queue_killed_or_banned_or_wedged(q)) 2142 return 0; 2143 2144 msg = kmalloc_obj(*msg); 2145 if (!msg) 2146 return -ENOMEM; 2147 2148 q->sched_props.timeslice_us = timeslice_us; 2149 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2150 2151 return 0; 2152} 2153 2154static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, 2155 u32 preempt_timeout_us) 2156{ 2157 struct xe_sched_msg *msg; 2158 2159 if (q->sched_props.preempt_timeout_us == preempt_timeout_us || 2160 exec_queue_killed_or_banned_or_wedged(q)) 2161 return 0; 2162 2163 msg = kmalloc_obj(*msg); 2164 if (!msg) 2165 return -ENOMEM; 2166 2167 q->sched_props.preempt_timeout_us = preempt_timeout_us; 2168 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2169 2170 return 0; 2171} 2172 2173static int guc_exec_queue_set_multi_queue_priority(struct xe_exec_queue *q, 2174 enum xe_multi_queue_priority priority) 2175{ 2176 struct xe_sched_msg *msg; 2177 2178 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), xe_exec_queue_is_multi_queue(q)); 2179 2180 if (exec_queue_killed_or_banned_or_wedged(q)) 2181 return 0; 2182 2183 msg = kmalloc_obj(*msg); 2184 if (!msg) 2185 return -ENOMEM; 2186 2187 scoped_guard(spinlock, &q->multi_queue.lock) { 2188 if (q->multi_queue.priority == priority) { 2189 kfree(msg); 2190 return 0; 2191 } 2192 2193 q->multi_queue.priority = priority; 2194 } 2195 2196 guc_exec_queue_add_msg(q, msg, SET_MULTI_QUEUE_PRIORITY); 2197 2198 return 0; 2199} 2200 2201static int guc_exec_queue_suspend(struct xe_exec_queue *q) 2202{ 2203 struct xe_gpu_scheduler *sched = &q->guc->sched; 2204 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2205 2206 if (exec_queue_killed_or_banned_or_wedged(q)) 2207 return -EINVAL; 2208 2209 xe_sched_msg_lock(sched); 2210 if (guc_exec_queue_try_add_msg(q, msg, SUSPEND)) 2211 q->guc->suspend_pending = true; 2212 xe_sched_msg_unlock(sched); 2213 2214 return 0; 2215} 2216 2217static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) 2218{ 2219 struct xe_guc *guc = exec_queue_to_guc(q); 2220 struct xe_device *xe = guc_to_xe(guc); 2221 int ret; 2222 2223 /* 2224 * Likely don't need to check exec_queue_killed() as we clear 2225 * suspend_pending upon kill but to be paranoid but races in which 2226 * suspend_pending is set after kill also check kill here. 2227 */ 2228#define WAIT_COND \ 2229 (!READ_ONCE(q->guc->suspend_pending) || exec_queue_killed(q) || \ 2230 xe_guc_read_stopped(guc)) 2231 2232retry: 2233 if (IS_SRIOV_VF(xe)) 2234 ret = wait_event_interruptible_timeout(guc->ct.wq, WAIT_COND || 2235 vf_recovery(guc), 2236 HZ * 5); 2237 else 2238 ret = wait_event_interruptible_timeout(q->guc->suspend_wait, 2239 WAIT_COND, HZ * 5); 2240 2241 if (vf_recovery(guc) && !xe_device_wedged((guc_to_xe(guc)))) 2242 return -EAGAIN; 2243 2244 if (!ret) { 2245 xe_gt_warn(guc_to_gt(guc), 2246 "Suspend fence, guc_id=%d, failed to respond", 2247 q->guc->id); 2248 /* XXX: Trigger GT reset? */ 2249 return -ETIME; 2250 } else if (IS_SRIOV_VF(xe) && !WAIT_COND) { 2251 /* Corner case on RESFIX DONE where vf_recovery() changes */ 2252 goto retry; 2253 } 2254 2255#undef WAIT_COND 2256 2257 return ret < 0 ? ret : 0; 2258} 2259 2260static void guc_exec_queue_resume(struct xe_exec_queue *q) 2261{ 2262 struct xe_gpu_scheduler *sched = &q->guc->sched; 2263 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2264 struct xe_guc *guc = exec_queue_to_guc(q); 2265 2266 xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending); 2267 2268 xe_sched_msg_lock(sched); 2269 guc_exec_queue_try_add_msg(q, msg, RESUME); 2270 xe_sched_msg_unlock(sched); 2271} 2272 2273static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) 2274{ 2275 if (xe_exec_queue_is_multi_queue_secondary(q) && 2276 guc_exec_queue_reset_status(xe_exec_queue_multi_queue_primary(q))) 2277 return true; 2278 2279 return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q); 2280} 2281 2282static bool guc_exec_queue_active(struct xe_exec_queue *q) 2283{ 2284 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 2285 2286 return exec_queue_enabled(primary) && 2287 !exec_queue_pending_disable(primary); 2288} 2289 2290/* 2291 * All of these functions are an abstraction layer which other parts of Xe can 2292 * use to trap into the GuC backend. All of these functions, aside from init, 2293 * really shouldn't do much other than trap into the DRM scheduler which 2294 * synchronizes these operations. 2295 */ 2296static const struct xe_exec_queue_ops guc_exec_queue_ops = { 2297 .init = guc_exec_queue_init, 2298 .kill = guc_exec_queue_kill, 2299 .fini = guc_exec_queue_fini, 2300 .destroy = guc_exec_queue_destroy, 2301 .set_priority = guc_exec_queue_set_priority, 2302 .set_timeslice = guc_exec_queue_set_timeslice, 2303 .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, 2304 .set_multi_queue_priority = guc_exec_queue_set_multi_queue_priority, 2305 .suspend = guc_exec_queue_suspend, 2306 .suspend_wait = guc_exec_queue_suspend_wait, 2307 .resume = guc_exec_queue_resume, 2308 .reset_status = guc_exec_queue_reset_status, 2309 .active = guc_exec_queue_active, 2310}; 2311 2312static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) 2313{ 2314 struct xe_gpu_scheduler *sched = &q->guc->sched; 2315 bool do_destroy = false; 2316 2317 /* Stop scheduling + flush any DRM scheduler operations */ 2318 xe_sched_submission_stop(sched); 2319 2320 /* Clean up lost G2H + reset engine state */ 2321 if (exec_queue_registered(q)) { 2322 if (exec_queue_destroyed(q)) 2323 do_destroy = true; 2324 } 2325 if (q->guc->suspend_pending) { 2326 set_exec_queue_suspended(q); 2327 suspend_fence_signal(q); 2328 } 2329 atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED | 2330 EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED | 2331 EXEC_QUEUE_STATE_SUSPENDED, 2332 &q->guc->state); 2333 q->guc->resume_time = 0; 2334 trace_xe_exec_queue_stop(q); 2335 2336 /* 2337 * Ban any engine (aside from kernel and engines used for VM ops) with a 2338 * started but not complete job or if a job has gone through a GT reset 2339 * more than twice. 2340 */ 2341 if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { 2342 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2343 bool ban = false; 2344 2345 if (job) { 2346 if ((xe_sched_job_started(job) && 2347 !xe_sched_job_completed(job)) || 2348 xe_sched_invalidate_job(job, 2)) { 2349 trace_xe_sched_job_ban(job); 2350 ban = true; 2351 } 2352 } 2353 2354 if (ban) { 2355 set_exec_queue_banned(q); 2356 xe_guc_exec_queue_trigger_cleanup(q); 2357 } 2358 } 2359 2360 if (do_destroy) 2361 __guc_exec_queue_destroy(guc, q); 2362} 2363 2364static int guc_submit_reset_prepare(struct xe_guc *guc) 2365{ 2366 int ret; 2367 2368 /* 2369 * Using an atomic here rather than submission_state.lock as this 2370 * function can be called while holding the CT lock (engine reset 2371 * failure). submission_state.lock needs the CT lock to resubmit jobs. 2372 * Atomic is not ideal, but it works to prevent against concurrent reset 2373 * and releasing any TDRs waiting on guc->submission_state.stopped. 2374 */ 2375 ret = atomic_fetch_or(1, &guc->submission_state.stopped); 2376 smp_wmb(); 2377 wake_up_all(&guc->ct.wq); 2378 2379 return ret; 2380} 2381 2382int xe_guc_submit_reset_prepare(struct xe_guc *guc) 2383{ 2384 if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) 2385 return 0; 2386 2387 if (!guc->submission_state.initialized) 2388 return 0; 2389 2390 return guc_submit_reset_prepare(guc); 2391} 2392 2393void xe_guc_submit_reset_wait(struct xe_guc *guc) 2394{ 2395 wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || 2396 !xe_guc_read_stopped(guc)); 2397} 2398 2399void xe_guc_submit_stop(struct xe_guc *guc) 2400{ 2401 struct xe_exec_queue *q; 2402 unsigned long index; 2403 2404 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2405 2406 mutex_lock(&guc->submission_state.lock); 2407 2408 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2409 /* Prevent redundant attempts to stop parallel queues */ 2410 if (q->guc->id != index) 2411 continue; 2412 2413 guc_exec_queue_stop(guc, q); 2414 } 2415 2416 mutex_unlock(&guc->submission_state.lock); 2417 2418 /* 2419 * No one can enter the backend at this point, aside from new engine 2420 * creation which is protected by guc->submission_state.lock. 2421 */ 2422 2423} 2424 2425static void guc_exec_queue_revert_pending_state_change(struct xe_guc *guc, 2426 struct xe_exec_queue *q) 2427{ 2428 bool pending_enable, pending_disable, pending_resume; 2429 2430 pending_enable = exec_queue_pending_enable(q); 2431 pending_resume = exec_queue_pending_resume(q); 2432 2433 if (pending_enable && pending_resume) { 2434 q->guc->needs_resume = true; 2435 xe_gt_dbg(guc_to_gt(guc), "Replay RESUME - guc_id=%d", 2436 q->guc->id); 2437 } 2438 2439 if (pending_enable && !pending_resume) { 2440 clear_exec_queue_registered(q); 2441 xe_gt_dbg(guc_to_gt(guc), "Replay REGISTER - guc_id=%d", 2442 q->guc->id); 2443 } 2444 2445 if (pending_enable) { 2446 clear_exec_queue_enabled(q); 2447 clear_exec_queue_pending_resume(q); 2448 clear_exec_queue_pending_enable(q); 2449 xe_gt_dbg(guc_to_gt(guc), "Replay ENABLE - guc_id=%d", 2450 q->guc->id); 2451 } 2452 2453 if (exec_queue_destroyed(q) && exec_queue_registered(q)) { 2454 clear_exec_queue_destroyed(q); 2455 q->guc->needs_cleanup = true; 2456 xe_gt_dbg(guc_to_gt(guc), "Replay CLEANUP - guc_id=%d", 2457 q->guc->id); 2458 } 2459 2460 pending_disable = exec_queue_pending_disable(q); 2461 2462 if (pending_disable && exec_queue_suspended(q)) { 2463 clear_exec_queue_suspended(q); 2464 q->guc->needs_suspend = true; 2465 xe_gt_dbg(guc_to_gt(guc), "Replay SUSPEND - guc_id=%d", 2466 q->guc->id); 2467 } 2468 2469 if (pending_disable) { 2470 if (!pending_enable) 2471 set_exec_queue_enabled(q); 2472 clear_exec_queue_pending_disable(q); 2473 xe_gt_dbg(guc_to_gt(guc), "Replay DISABLE - guc_id=%d", 2474 q->guc->id); 2475 } 2476 2477 q->guc->resume_time = 0; 2478} 2479 2480static void lrc_parallel_clear(struct xe_lrc *lrc) 2481{ 2482 struct xe_device *xe = gt_to_xe(lrc->gt); 2483 struct iosys_map map = xe_lrc_parallel_map(lrc); 2484 int i; 2485 2486 for (i = 0; i < WQ_SIZE / sizeof(u32); ++i) 2487 parallel_write(xe, map, wq[i], 2488 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 2489 FIELD_PREP(WQ_LEN_MASK, 0)); 2490} 2491 2492/* 2493 * This function is quite complex but only real way to ensure no state is lost 2494 * during VF resume flows. The function scans the queue state, make adjustments 2495 * as needed, and queues jobs / messages which replayed upon unpause. 2496 */ 2497static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q) 2498{ 2499 struct xe_gpu_scheduler *sched = &q->guc->sched; 2500 struct xe_sched_job *job; 2501 int i; 2502 2503 lockdep_assert_held(&guc->submission_state.lock); 2504 2505 /* Stop scheduling + flush any DRM scheduler operations */ 2506 xe_sched_submission_stop(sched); 2507 cancel_delayed_work_sync(&sched->base.work_tdr); 2508 2509 guc_exec_queue_revert_pending_state_change(guc, q); 2510 2511 if (xe_exec_queue_is_parallel(q)) { 2512 /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ 2513 struct xe_lrc *lrc = READ_ONCE(q->lrc[0]); 2514 2515 /* 2516 * NOP existing WQ commands that may contain stale GGTT 2517 * addresses. These will be replayed upon unpause. The hardware 2518 * seems to get confused if the WQ head/tail pointers are 2519 * adjusted. 2520 */ 2521 if (lrc) 2522 lrc_parallel_clear(lrc); 2523 } 2524 2525 job = xe_sched_first_pending_job(sched); 2526 if (job) { 2527 job->restore_replay = true; 2528 2529 /* 2530 * Adjust software tail so jobs submitted overwrite previous 2531 * position in ring buffer with new GGTT addresses. 2532 */ 2533 for (i = 0; i < q->width; ++i) 2534 q->lrc[i]->ring.tail = job->ptrs[i].head; 2535 } 2536} 2537 2538/** 2539 * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC. 2540 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2541 */ 2542void xe_guc_submit_pause(struct xe_guc *guc) 2543{ 2544 struct xe_exec_queue *q; 2545 unsigned long index; 2546 2547 mutex_lock(&guc->submission_state.lock); 2548 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2549 xe_sched_submission_stop(&q->guc->sched); 2550 mutex_unlock(&guc->submission_state.lock); 2551} 2552 2553/** 2554 * xe_guc_submit_pause_vf - Stop further runs of submission tasks for VF. 2555 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2556 */ 2557void xe_guc_submit_pause_vf(struct xe_guc *guc) 2558{ 2559 struct xe_exec_queue *q; 2560 unsigned long index; 2561 2562 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2563 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2564 2565 mutex_lock(&guc->submission_state.lock); 2566 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2567 /* Prevent redundant attempts to stop parallel queues */ 2568 if (q->guc->id != index) 2569 continue; 2570 2571 guc_exec_queue_pause(guc, q); 2572 } 2573 mutex_unlock(&guc->submission_state.lock); 2574} 2575 2576static void guc_exec_queue_start(struct xe_exec_queue *q) 2577{ 2578 struct xe_gpu_scheduler *sched = &q->guc->sched; 2579 2580 if (!exec_queue_killed_or_banned_or_wedged(q)) { 2581 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2582 int i; 2583 2584 trace_xe_exec_queue_resubmit(q); 2585 if (job) { 2586 for (i = 0; i < q->width; ++i) { 2587 /* 2588 * The GuC context is unregistered at this point 2589 * time, adjusting software ring tail ensures 2590 * jobs are rewritten in original placement, 2591 * adjusting LRC tail ensures the newly loaded 2592 * GuC / contexts only view the LRC tail 2593 * increasing as jobs are written out. 2594 */ 2595 q->lrc[i]->ring.tail = job->ptrs[i].head; 2596 xe_lrc_set_ring_tail(q->lrc[i], 2597 xe_lrc_ring_head(q->lrc[i])); 2598 } 2599 } 2600 xe_sched_resubmit_jobs(sched); 2601 } 2602 2603 xe_sched_submission_start(sched); 2604 xe_sched_submission_resume_tdr(sched); 2605} 2606 2607int xe_guc_submit_start(struct xe_guc *guc) 2608{ 2609 struct xe_exec_queue *q; 2610 unsigned long index; 2611 2612 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2613 2614 mutex_lock(&guc->submission_state.lock); 2615 atomic_dec(&guc->submission_state.stopped); 2616 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2617 /* Prevent redundant attempts to start parallel queues */ 2618 if (q->guc->id != index) 2619 continue; 2620 2621 guc_exec_queue_start(q); 2622 } 2623 mutex_unlock(&guc->submission_state.lock); 2624 2625 wake_up_all(&guc->ct.wq); 2626 2627 return 0; 2628} 2629 2630static void guc_exec_queue_unpause_prepare(struct xe_guc *guc, 2631 struct xe_exec_queue *q) 2632{ 2633 struct xe_gpu_scheduler *sched = &q->guc->sched; 2634 struct xe_sched_job *job = NULL; 2635 struct drm_sched_job *s_job; 2636 bool restore_replay = false; 2637 2638 drm_sched_for_each_pending_job(s_job, &sched->base, NULL) { 2639 job = to_xe_sched_job(s_job); 2640 restore_replay |= job->restore_replay; 2641 if (restore_replay) { 2642 xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d", 2643 q->guc->id, xe_sched_job_seqno(job)); 2644 2645 q->ring_ops->emit_job(job); 2646 job->restore_replay = true; 2647 } 2648 } 2649 2650 if (job) 2651 job->last_replay = true; 2652} 2653 2654/** 2655 * xe_guc_submit_unpause_prepare_vf - Prepare unpause submission tasks for VF. 2656 * @guc: the &xe_guc struct instance whose scheduler is to be prepared for unpause 2657 */ 2658void xe_guc_submit_unpause_prepare_vf(struct xe_guc *guc) 2659{ 2660 struct xe_exec_queue *q; 2661 unsigned long index; 2662 2663 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2664 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2665 2666 mutex_lock(&guc->submission_state.lock); 2667 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2668 /* Prevent redundant attempts to stop parallel queues */ 2669 if (q->guc->id != index) 2670 continue; 2671 2672 guc_exec_queue_unpause_prepare(guc, q); 2673 } 2674 mutex_unlock(&guc->submission_state.lock); 2675} 2676 2677static void guc_exec_queue_replay_pending_state_change(struct xe_exec_queue *q) 2678{ 2679 struct xe_gpu_scheduler *sched = &q->guc->sched; 2680 struct xe_sched_msg *msg; 2681 2682 if (q->guc->needs_cleanup) { 2683 msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2684 2685 guc_exec_queue_add_msg(q, msg, CLEANUP); 2686 q->guc->needs_cleanup = false; 2687 } 2688 2689 if (q->guc->needs_suspend) { 2690 msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2691 2692 xe_sched_msg_lock(sched); 2693 guc_exec_queue_try_add_msg_head(q, msg, SUSPEND); 2694 xe_sched_msg_unlock(sched); 2695 2696 q->guc->needs_suspend = false; 2697 } 2698 2699 /* 2700 * The resume must be in the message queue before the suspend as it is 2701 * not possible for a resume to be issued if a suspend pending is, but 2702 * the inverse is possible. 2703 */ 2704 if (q->guc->needs_resume) { 2705 msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2706 2707 xe_sched_msg_lock(sched); 2708 guc_exec_queue_try_add_msg_head(q, msg, RESUME); 2709 xe_sched_msg_unlock(sched); 2710 2711 q->guc->needs_resume = false; 2712 } 2713} 2714 2715static void guc_exec_queue_unpause(struct xe_guc *guc, struct xe_exec_queue *q) 2716{ 2717 struct xe_gpu_scheduler *sched = &q->guc->sched; 2718 bool needs_tdr = exec_queue_killed_or_banned_or_wedged(q); 2719 2720 lockdep_assert_held(&guc->submission_state.lock); 2721 2722 xe_sched_resubmit_jobs(sched); 2723 guc_exec_queue_replay_pending_state_change(q); 2724 xe_sched_submission_start(sched); 2725 if (needs_tdr) 2726 xe_guc_exec_queue_trigger_cleanup(q); 2727 xe_sched_submission_resume_tdr(sched); 2728} 2729 2730/** 2731 * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC. 2732 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2733 */ 2734void xe_guc_submit_unpause(struct xe_guc *guc) 2735{ 2736 struct xe_exec_queue *q; 2737 unsigned long index; 2738 2739 mutex_lock(&guc->submission_state.lock); 2740 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2741 xe_sched_submission_start(&q->guc->sched); 2742 mutex_unlock(&guc->submission_state.lock); 2743} 2744 2745/** 2746 * xe_guc_submit_unpause_vf - Allow further runs of submission tasks for VF. 2747 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2748 */ 2749void xe_guc_submit_unpause_vf(struct xe_guc *guc) 2750{ 2751 struct xe_exec_queue *q; 2752 unsigned long index; 2753 2754 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2755 2756 mutex_lock(&guc->submission_state.lock); 2757 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2758 /* 2759 * Prevent redundant attempts to stop parallel queues, or queues 2760 * created after resfix done. 2761 */ 2762 if (q->guc->id != index || 2763 !drm_sched_is_stopped(&q->guc->sched.base)) 2764 continue; 2765 2766 guc_exec_queue_unpause(guc, q); 2767 } 2768 mutex_unlock(&guc->submission_state.lock); 2769} 2770 2771/** 2772 * xe_guc_submit_pause_abort - Abort all paused submission task on given GuC. 2773 * @guc: the &xe_guc struct instance whose scheduler is to be aborted 2774 */ 2775void xe_guc_submit_pause_abort(struct xe_guc *guc) 2776{ 2777 struct xe_exec_queue *q; 2778 unsigned long index; 2779 2780 mutex_lock(&guc->submission_state.lock); 2781 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2782 struct xe_gpu_scheduler *sched = &q->guc->sched; 2783 2784 /* Prevent redundant attempts to stop parallel queues */ 2785 if (q->guc->id != index) 2786 continue; 2787 2788 xe_sched_submission_start(sched); 2789 guc_exec_queue_kill(q); 2790 } 2791 mutex_unlock(&guc->submission_state.lock); 2792} 2793 2794static struct xe_exec_queue * 2795g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) 2796{ 2797 struct xe_gt *gt = guc_to_gt(guc); 2798 struct xe_exec_queue *q; 2799 2800 if (unlikely(guc_id >= GUC_ID_MAX)) { 2801 xe_gt_err(gt, "Invalid guc_id %u\n", guc_id); 2802 return NULL; 2803 } 2804 2805 q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); 2806 if (unlikely(!q)) { 2807 xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id); 2808 return NULL; 2809 } 2810 2811 xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id); 2812 xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width)); 2813 2814 return q; 2815} 2816 2817static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) 2818{ 2819 u32 action[] = { 2820 XE_GUC_ACTION_DEREGISTER_CONTEXT, 2821 q->guc->id, 2822 }; 2823 2824 xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q)); 2825 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 2826 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 2827 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 2828 2829 trace_xe_exec_queue_deregister(q); 2830 2831 if (xe_exec_queue_is_multi_queue_secondary(q)) 2832 handle_deregister_done(guc, q); 2833 else 2834 xe_guc_ct_send_g2h_handler(&guc->ct, action, 2835 ARRAY_SIZE(action)); 2836} 2837 2838static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, 2839 u32 runnable_state) 2840{ 2841 trace_xe_exec_queue_scheduling_done(q); 2842 2843 if (runnable_state == 1) { 2844 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q)); 2845 2846 q->guc->resume_time = ktime_get(); 2847 clear_exec_queue_pending_resume(q); 2848 clear_exec_queue_pending_enable(q); 2849 smp_wmb(); 2850 wake_up_all(&guc->ct.wq); 2851 } else { 2852 xe_gt_assert(guc_to_gt(guc), runnable_state == 0); 2853 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); 2854 2855 if (q->guc->suspend_pending) { 2856 suspend_fence_signal(q); 2857 clear_exec_queue_pending_disable(q); 2858 } else { 2859 if (exec_queue_banned(q)) { 2860 smp_wmb(); 2861 wake_up_all(&guc->ct.wq); 2862 } 2863 if (exec_queue_destroyed(q)) { 2864 /* 2865 * Make sure to clear the pending_disable only 2866 * after sampling the destroyed state. We want 2867 * to ensure we don't trigger the unregister too 2868 * early with something intending to only 2869 * disable scheduling. The caller doing the 2870 * destroy must wait for an ongoing 2871 * pending_disable before marking as destroyed. 2872 */ 2873 clear_exec_queue_pending_disable(q); 2874 deregister_exec_queue(guc, q); 2875 } else { 2876 clear_exec_queue_pending_disable(q); 2877 } 2878 } 2879 } 2880} 2881 2882static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, 2883 struct xe_exec_queue *q, 2884 u32 runnable_state) 2885{ 2886 /* Take CT lock here as handle_sched_done() do send a h2g message */ 2887 mutex_lock(&guc->ct.lock); 2888 handle_sched_done(guc, q, runnable_state); 2889 mutex_unlock(&guc->ct.lock); 2890} 2891 2892int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2893{ 2894 struct xe_exec_queue *q; 2895 u32 guc_id, runnable_state; 2896 2897 if (unlikely(len < 2)) 2898 return -EPROTO; 2899 2900 guc_id = msg[0]; 2901 runnable_state = msg[1]; 2902 2903 q = g2h_exec_queue_lookup(guc, guc_id); 2904 if (unlikely(!q)) 2905 return -EPROTO; 2906 2907 if (unlikely(!exec_queue_pending_enable(q) && 2908 !exec_queue_pending_disable(q))) { 2909 xe_gt_err(guc_to_gt(guc), 2910 "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u", 2911 atomic_read(&q->guc->state), q->guc->id, 2912 runnable_state); 2913 return -EPROTO; 2914 } 2915 2916 handle_sched_done(guc, q, runnable_state); 2917 2918 return 0; 2919} 2920 2921static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) 2922{ 2923 trace_xe_exec_queue_deregister_done(q); 2924 2925 clear_exec_queue_registered(q); 2926 __guc_exec_queue_destroy(guc, q); 2927} 2928 2929int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2930{ 2931 struct xe_exec_queue *q; 2932 u32 guc_id; 2933 2934 if (unlikely(len < 1)) 2935 return -EPROTO; 2936 2937 guc_id = msg[0]; 2938 2939 q = g2h_exec_queue_lookup(guc, guc_id); 2940 if (unlikely(!q)) 2941 return -EPROTO; 2942 2943 if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || 2944 exec_queue_pending_enable(q) || exec_queue_enabled(q)) { 2945 xe_gt_err(guc_to_gt(guc), 2946 "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d", 2947 atomic_read(&q->guc->state), q->guc->id); 2948 return -EPROTO; 2949 } 2950 2951 handle_deregister_done(guc, q); 2952 2953 return 0; 2954} 2955 2956int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) 2957{ 2958 struct xe_gt *gt = guc_to_gt(guc); 2959 struct xe_exec_queue *q; 2960 u32 guc_id; 2961 2962 if (unlikely(len < 1)) 2963 return -EPROTO; 2964 2965 guc_id = msg[0]; 2966 2967 q = g2h_exec_queue_lookup(guc, guc_id); 2968 if (unlikely(!q)) 2969 return -EPROTO; 2970 2971 xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d, state=0x%0x", 2972 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id, 2973 atomic_read(&q->guc->state)); 2974 2975 trace_xe_exec_queue_reset(q); 2976 2977 /* 2978 * A banned engine is a NOP at this point (came from 2979 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel 2980 * jobs by setting timeout of the job to the minimum value kicking 2981 * guc_exec_queue_timedout_job. 2982 */ 2983 xe_guc_exec_queue_reset_trigger_cleanup(q); 2984 2985 return 0; 2986} 2987 2988/* 2989 * xe_guc_error_capture_handler - Handler of GuC captured message 2990 * @guc: The GuC object 2991 * @msg: Point to the message 2992 * @len: The message length 2993 * 2994 * When GuC captured data is ready, GuC will send message 2995 * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be 2996 * called 1st to check status before process the data comes with the message. 2997 * 2998 * Returns: error code. 0 if success 2999 */ 3000int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len) 3001{ 3002 u32 status; 3003 3004 if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) 3005 return -EPROTO; 3006 3007 status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 3008 if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 3009 xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space"); 3010 3011 xe_guc_capture_process(guc); 3012 3013 return 0; 3014} 3015 3016int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, 3017 u32 len) 3018{ 3019 struct xe_gt *gt = guc_to_gt(guc); 3020 struct xe_exec_queue *q; 3021 u32 guc_id; 3022 u32 type = XE_GUC_CAT_ERR_TYPE_INVALID; 3023 3024 if (unlikely(!len || len > 2)) 3025 return -EPROTO; 3026 3027 guc_id = msg[0]; 3028 3029 if (len == 2) 3030 type = msg[1]; 3031 3032 if (guc_id == GUC_ID_UNKNOWN) { 3033 /* 3034 * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF 3035 * context. In such case only PF will be notified about that fault. 3036 */ 3037 xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n"); 3038 return 0; 3039 } 3040 3041 q = g2h_exec_queue_lookup(guc, guc_id); 3042 if (unlikely(!q)) 3043 return -EPROTO; 3044 3045 /* 3046 * The type is HW-defined and changes based on platform, so we don't 3047 * decode it in the kernel and only check if it is valid. 3048 * See bspec 54047 and 72187 for details. 3049 */ 3050 if (type != XE_GUC_CAT_ERR_TYPE_INVALID) 3051 xe_gt_info(gt, 3052 "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", 3053 type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 3054 else 3055 xe_gt_info(gt, 3056 "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", 3057 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 3058 3059 trace_xe_exec_queue_memory_cat_error(q); 3060 3061 /* Treat the same as engine reset */ 3062 xe_guc_exec_queue_reset_trigger_cleanup(q); 3063 3064 return 0; 3065} 3066 3067int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) 3068{ 3069 struct xe_gt *gt = guc_to_gt(guc); 3070 u8 guc_class, instance; 3071 u32 reason; 3072 3073 if (unlikely(len != 3)) 3074 return -EPROTO; 3075 3076 guc_class = msg[0]; 3077 instance = msg[1]; 3078 reason = msg[2]; 3079 3080 /* Unexpected failure of a hardware feature, log an actual error */ 3081 xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X", 3082 guc_class, instance, reason); 3083 3084 xe_gt_reset_async(gt); 3085 3086 return 0; 3087} 3088 3089int xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc *guc, u32 *msg, 3090 u32 len) 3091{ 3092 struct xe_gt *gt = guc_to_gt(guc); 3093 struct xe_device *xe = guc_to_xe(guc); 3094 struct xe_exec_queue *q; 3095 u32 guc_id = msg[2]; 3096 3097 if (unlikely(len != XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN)) { 3098 drm_err(&xe->drm, "Invalid length %u", len); 3099 return -EPROTO; 3100 } 3101 3102 q = g2h_exec_queue_lookup(guc, guc_id); 3103 if (unlikely(!q)) 3104 return -EPROTO; 3105 3106 xe_gt_dbg(gt, 3107 "CGP context error: [%s] err=0x%x, q0_id=0x%x LRCA=0x%x guc_id=0x%x", 3108 msg[0] & 1 ? "uc" : "kmd", msg[1], msg[2], msg[3], msg[4]); 3109 3110 trace_xe_exec_queue_cgp_context_error(q); 3111 3112 /* Treat the same as engine reset */ 3113 xe_guc_exec_queue_reset_trigger_cleanup(q); 3114 3115 return 0; 3116} 3117 3118/** 3119 * xe_guc_exec_queue_cgp_sync_done_handler - CGP synchronization done handler 3120 * @guc: guc 3121 * @msg: message indicating CGP sync done 3122 * @len: length of message 3123 * 3124 * Set multi queue group's sync_pending flag to false and wakeup anyone waiting 3125 * for CGP synchronization to complete. 3126 * 3127 * Return: 0 on success, -EPROTO for malformed messages. 3128 */ 3129int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 3130{ 3131 struct xe_device *xe = guc_to_xe(guc); 3132 struct xe_exec_queue *q; 3133 u32 guc_id = msg[0]; 3134 3135 if (unlikely(len < 1)) { 3136 drm_err(&xe->drm, "Invalid CGP_SYNC_DONE length %u", len); 3137 return -EPROTO; 3138 } 3139 3140 q = g2h_exec_queue_lookup(guc, guc_id); 3141 if (unlikely(!q)) 3142 return -EPROTO; 3143 3144 if (!xe_exec_queue_is_multi_queue_primary(q)) { 3145 drm_err(&xe->drm, "Unexpected CGP_SYNC_DONE response"); 3146 return -EPROTO; 3147 } 3148 3149 /* Wakeup the serialized cgp update wait */ 3150 WRITE_ONCE(q->multi_queue.group->sync_pending, false); 3151 xe_guc_ct_wake_waiters(&guc->ct); 3152 3153 return 0; 3154} 3155 3156static void 3157guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, 3158 struct xe_guc_submit_exec_queue_snapshot *snapshot) 3159{ 3160 struct xe_guc *guc = exec_queue_to_guc(q); 3161 struct xe_device *xe = guc_to_xe(guc); 3162 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 3163 int i; 3164 3165 snapshot->guc.wqi_head = q->guc->wqi_head; 3166 snapshot->guc.wqi_tail = q->guc->wqi_tail; 3167 snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); 3168 snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); 3169 snapshot->parallel.wq_desc.status = parallel_read(xe, map, 3170 wq_desc.wq_status); 3171 3172 if (snapshot->parallel.wq_desc.head != 3173 snapshot->parallel.wq_desc.tail) { 3174 for (i = snapshot->parallel.wq_desc.head; 3175 i != snapshot->parallel.wq_desc.tail; 3176 i = (i + sizeof(u32)) % WQ_SIZE) 3177 snapshot->parallel.wq[i / sizeof(u32)] = 3178 parallel_read(xe, map, wq[i / sizeof(u32)]); 3179 } 3180} 3181 3182static void 3183guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 3184 struct drm_printer *p) 3185{ 3186 int i; 3187 3188 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", 3189 snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); 3190 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", 3191 snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); 3192 drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); 3193 3194 if (snapshot->parallel.wq_desc.head != 3195 snapshot->parallel.wq_desc.tail) { 3196 for (i = snapshot->parallel.wq_desc.head; 3197 i != snapshot->parallel.wq_desc.tail; 3198 i = (i + sizeof(u32)) % WQ_SIZE) 3199 drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), 3200 snapshot->parallel.wq[i / sizeof(u32)]); 3201 } 3202} 3203 3204/** 3205 * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. 3206 * @q: faulty exec queue 3207 * 3208 * This can be printed out in a later stage like during dev_coredump 3209 * analysis. 3210 * 3211 * Returns: a GuC Submit Engine snapshot object that must be freed by the 3212 * caller, using `xe_guc_exec_queue_snapshot_free`. 3213 */ 3214struct xe_guc_submit_exec_queue_snapshot * 3215xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) 3216{ 3217 struct xe_gpu_scheduler *sched = &q->guc->sched; 3218 struct xe_guc_submit_exec_queue_snapshot *snapshot; 3219 int i; 3220 3221 snapshot = kzalloc_obj(*snapshot, GFP_ATOMIC); 3222 3223 if (!snapshot) 3224 return NULL; 3225 3226 snapshot->guc.id = q->guc->id; 3227 memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); 3228 snapshot->class = q->class; 3229 snapshot->logical_mask = q->logical_mask; 3230 snapshot->width = q->width; 3231 snapshot->refcount = kref_read(&q->refcount); 3232 snapshot->sched_timeout = sched->base.timeout; 3233 snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; 3234 snapshot->sched_props.preempt_timeout_us = 3235 q->sched_props.preempt_timeout_us; 3236 3237 snapshot->lrc = kmalloc_objs(struct xe_lrc_snapshot *, q->width, 3238 GFP_ATOMIC); 3239 3240 if (snapshot->lrc) { 3241 for (i = 0; i < q->width; ++i) { 3242 struct xe_lrc *lrc = q->lrc[i]; 3243 3244 snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc); 3245 } 3246 } 3247 3248 snapshot->schedule_state = atomic_read(&q->guc->state); 3249 snapshot->exec_queue_flags = q->flags; 3250 3251 snapshot->parallel_execution = xe_exec_queue_is_parallel(q); 3252 if (snapshot->parallel_execution) 3253 guc_exec_queue_wq_snapshot_capture(q, snapshot); 3254 3255 if (xe_exec_queue_is_multi_queue(q)) { 3256 snapshot->multi_queue.valid = true; 3257 snapshot->multi_queue.primary = xe_exec_queue_multi_queue_primary(q)->guc->id; 3258 snapshot->multi_queue.pos = q->multi_queue.pos; 3259 } 3260 3261 return snapshot; 3262} 3263 3264/** 3265 * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine. 3266 * @snapshot: Previously captured snapshot of job. 3267 * 3268 * This captures some data that requires taking some locks, so it cannot be done in signaling path. 3269 */ 3270void 3271xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot) 3272{ 3273 int i; 3274 3275 if (!snapshot || !snapshot->lrc) 3276 return; 3277 3278 for (i = 0; i < snapshot->width; ++i) 3279 xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]); 3280} 3281 3282/** 3283 * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. 3284 * @snapshot: GuC Submit Engine snapshot object. 3285 * @p: drm_printer where it will be printed out. 3286 * 3287 * This function prints out a given GuC Submit Engine snapshot object. 3288 */ 3289void 3290xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 3291 struct drm_printer *p) 3292{ 3293 int i; 3294 3295 if (!snapshot) 3296 return; 3297 3298 drm_printf(p, "GuC ID: %d\n", snapshot->guc.id); 3299 drm_printf(p, "\tName: %s\n", snapshot->name); 3300 drm_printf(p, "\tClass: %d\n", snapshot->class); 3301 drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); 3302 drm_printf(p, "\tWidth: %d\n", snapshot->width); 3303 drm_printf(p, "\tRef: %d\n", snapshot->refcount); 3304 drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); 3305 drm_printf(p, "\tTimeslice: %u (us)\n", 3306 snapshot->sched_props.timeslice_us); 3307 drm_printf(p, "\tPreempt timeout: %u (us)\n", 3308 snapshot->sched_props.preempt_timeout_us); 3309 3310 for (i = 0; snapshot->lrc && i < snapshot->width; ++i) 3311 xe_lrc_snapshot_print(snapshot->lrc[i], p); 3312 3313 drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); 3314 drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); 3315 3316 if (snapshot->parallel_execution) 3317 guc_exec_queue_wq_snapshot_print(snapshot, p); 3318 3319 if (snapshot->multi_queue.valid) { 3320 drm_printf(p, "\tMulti queue primary GuC ID: %d\n", snapshot->multi_queue.primary); 3321 drm_printf(p, "\tMulti queue position: %d\n", snapshot->multi_queue.pos); 3322 } 3323} 3324 3325/** 3326 * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given 3327 * snapshot. 3328 * @snapshot: GuC Submit Engine snapshot object. 3329 * 3330 * This function free all the memory that needed to be allocated at capture 3331 * time. 3332 */ 3333void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) 3334{ 3335 int i; 3336 3337 if (!snapshot) 3338 return; 3339 3340 if (snapshot->lrc) { 3341 for (i = 0; i < snapshot->width; i++) 3342 xe_lrc_snapshot_free(snapshot->lrc[i]); 3343 kfree(snapshot->lrc); 3344 } 3345 kfree(snapshot); 3346} 3347 3348static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) 3349{ 3350 struct xe_guc_submit_exec_queue_snapshot *snapshot; 3351 3352 snapshot = xe_guc_exec_queue_snapshot_capture(q); 3353 xe_guc_exec_queue_snapshot_print(snapshot, p); 3354 xe_guc_exec_queue_snapshot_free(snapshot); 3355} 3356 3357/** 3358 * xe_guc_register_vf_exec_queue - Register exec queue for a given context type. 3359 * @q: Execution queue 3360 * @ctx_type: Type of the context 3361 * 3362 * This function registers the execution queue with the guc. Special context 3363 * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE 3364 * are only applicable for IGPU and in the VF. 3365 * Submits the execution queue to GUC after registering it. 3366 * 3367 * Returns - None. 3368 */ 3369void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type) 3370{ 3371 struct xe_guc *guc = exec_queue_to_guc(q); 3372 struct xe_device *xe = guc_to_xe(guc); 3373 struct xe_gt *gt = guc_to_gt(guc); 3374 3375 xe_gt_assert(gt, IS_SRIOV_VF(xe)); 3376 xe_gt_assert(gt, !IS_DGFX(xe)); 3377 xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE || 3378 ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE); 3379 xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0)); 3380 3381 register_exec_queue(q, ctx_type); 3382 enable_scheduling(q); 3383} 3384 3385/** 3386 * xe_guc_submit_print - GuC Submit Print. 3387 * @guc: GuC. 3388 * @p: drm_printer where it will be printed out. 3389 * 3390 * This function capture and prints snapshots of **all** GuC Engines. 3391 */ 3392void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) 3393{ 3394 struct xe_exec_queue *q; 3395 unsigned long index; 3396 3397 if (!xe_device_uc_enabled(guc_to_xe(guc))) 3398 return; 3399 3400 mutex_lock(&guc->submission_state.lock); 3401 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 3402 guc_exec_queue_print(q, p); 3403 mutex_unlock(&guc->submission_state.lock); 3404} 3405 3406/** 3407 * xe_guc_has_registered_mlrc_queues - check whether there are any MLRC queues 3408 * registered with the GuC 3409 * @guc: GuC. 3410 * 3411 * Return: true if any MLRC queue is registered with the GuC, false otherwise. 3412 */ 3413bool xe_guc_has_registered_mlrc_queues(struct xe_guc *guc) 3414{ 3415 struct xe_exec_queue *q; 3416 unsigned long index; 3417 3418 guard(mutex)(&guc->submission_state.lock); 3419 3420 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 3421 if (q->width > 1) 3422 return true; 3423 3424 return false; 3425} 3426 3427/** 3428 * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all 3429 * exec queues registered to given GuC. 3430 * @guc: the &xe_guc struct instance 3431 * @scratch: scratch buffer to be used as temporary storage 3432 * 3433 * Returns: zero on success, negative error code on failure. 3434 */ 3435int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch) 3436{ 3437 struct xe_exec_queue *q; 3438 unsigned long index; 3439 int err = 0; 3440 3441 mutex_lock(&guc->submission_state.lock); 3442 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 3443 /* Prevent redundant attempts to stop parallel queues */ 3444 if (q->guc->id != index) 3445 continue; 3446 3447 err = xe_exec_queue_contexts_hwsp_rebase(q, scratch); 3448 if (err) 3449 break; 3450 } 3451 mutex_unlock(&guc->submission_state.lock); 3452 3453 return err; 3454}