Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'drm-xe-next-2025-01-07' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next

UAPI Changes:
- OA new property: 'unblock after N reports' (Ashutosh)

i915 display Changes:
- UHBR rates for Thunderbolt (Kahola)

Driver Changes:
- IRQ related fixes and improvements (Ilia)
- Revert some changes that break a mesa debug tool (John)
- Fix migration issues (Nirmoy)
- Enable GuC's WA_DUAL_QUEUE for newer platforms (Daniele)
- Move shrink test out of xe_bo (Nirmoy)
- SRIOV PF: Use correct function to check LMEM provisioning (Michal)
- Fix a false-positive "Missing outer runtime PM protection" warning (Rodrigo)
- Make GSCCS disabling message less alarming (Daniele)
- Fix DG1 power gate sequence (Rodrigo)
- Xe files fixes (Lucas)
- Fix a potential TP_printk UAF (Thomas)
- OA Fixes (Umesh)
- Fix tlb invalidation when wedging (Lucas)
- Documentation fix (Lucas)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/Z31579j3V3XCPFaK@intel.com

+594 -286
+35 -4
drivers/gpu/drm/i915/display/intel_cx0_phy.c
··· 3070 3070 3071 3071 val = intel_de_read(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port)); 3072 3072 3073 - clock = REG_FIELD_GET(XELPDP_DDI_CLOCK_SELECT_MASK, val); 3073 + if (DISPLAY_VER(display) >= 30) 3074 + clock = REG_FIELD_GET(XE3_DDI_CLOCK_SELECT_MASK, val); 3075 + else 3076 + clock = REG_FIELD_GET(XELPDP_DDI_CLOCK_SELECT_MASK, val); 3074 3077 3075 3078 drm_WARN_ON(display->drm, !(val & XELPDP_FORWARD_CLOCK_UNGATE)); 3076 3079 drm_WARN_ON(display->drm, !(val & XELPDP_TBT_CLOCK_REQUEST)); ··· 3088 3085 return 540000; 3089 3086 case XELPDP_DDI_CLOCK_SELECT_TBT_810: 3090 3087 return 810000; 3088 + case XELPDP_DDI_CLOCK_SELECT_TBT_312_5: 3089 + return 1000000; 3090 + case XELPDP_DDI_CLOCK_SELECT_TBT_625: 3091 + return 2000000; 3091 3092 default: 3092 3093 MISSING_CASE(clock); 3093 3094 return 162000; 3094 3095 } 3095 3096 } 3096 3097 3097 - static int intel_mtl_tbt_clock_select(int clock) 3098 + static int intel_mtl_tbt_clock_select(struct intel_display *display, 3099 + int clock) 3098 3100 { 3099 3101 switch (clock) { 3100 3102 case 162000: ··· 3110 3102 return XELPDP_DDI_CLOCK_SELECT_TBT_540; 3111 3103 case 810000: 3112 3104 return XELPDP_DDI_CLOCK_SELECT_TBT_810; 3105 + case 1000000: 3106 + if (DISPLAY_VER(display) < 30) { 3107 + drm_WARN_ON(display->drm, "UHBR10 not supported for the platform\n"); 3108 + return XELPDP_DDI_CLOCK_SELECT_TBT_162; 3109 + } 3110 + return XELPDP_DDI_CLOCK_SELECT_TBT_312_5; 3111 + case 2000000: 3112 + if (DISPLAY_VER(display) < 30) { 3113 + drm_WARN_ON(display->drm, "UHBR20 not supported for the platform\n"); 3114 + return XELPDP_DDI_CLOCK_SELECT_TBT_162; 3115 + } 3116 + return XELPDP_DDI_CLOCK_SELECT_TBT_625; 3113 3117 default: 3114 3118 MISSING_CASE(clock); 3115 3119 return XELPDP_DDI_CLOCK_SELECT_TBT_162; ··· 3134 3114 struct intel_display *display = to_intel_display(encoder); 3135 3115 enum phy phy = intel_encoder_to_phy(encoder); 3136 3116 u32 val = 0; 3117 + u32 mask; 3137 3118 3138 3119 /* 3139 3120 * 1. Program PORT_CLOCK_CTL REGISTER to configure 3140 3121 * clock muxes, gating and SSC 3141 3122 */ 3142 - val |= XELPDP_DDI_CLOCK_SELECT(intel_mtl_tbt_clock_select(crtc_state->port_clock)); 3123 + 3124 + if (DISPLAY_VER(display) >= 30) { 3125 + mask = XE3_DDI_CLOCK_SELECT_MASK; 3126 + val |= XE3_DDI_CLOCK_SELECT(intel_mtl_tbt_clock_select(display, crtc_state->port_clock)); 3127 + } else { 3128 + mask = XELPDP_DDI_CLOCK_SELECT_MASK; 3129 + val |= XELPDP_DDI_CLOCK_SELECT(intel_mtl_tbt_clock_select(display, crtc_state->port_clock)); 3130 + } 3131 + 3132 + mask |= XELPDP_FORWARD_CLOCK_UNGATE; 3143 3133 val |= XELPDP_FORWARD_CLOCK_UNGATE; 3134 + 3144 3135 intel_de_rmw(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port), 3145 - XELPDP_DDI_CLOCK_SELECT_MASK | XELPDP_FORWARD_CLOCK_UNGATE, val); 3136 + mask, val); 3146 3137 3147 3138 /* 2. Read back PORT_CLOCK_CTL REGISTER */ 3148 3139 val = intel_de_read(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port));
+4
drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h
··· 187 187 #define XELPDP_TBT_CLOCK_REQUEST REG_BIT(19) 188 188 #define XELPDP_TBT_CLOCK_ACK REG_BIT(18) 189 189 #define XELPDP_DDI_CLOCK_SELECT_MASK REG_GENMASK(15, 12) 190 + #define XE3_DDI_CLOCK_SELECT_MASK REG_GENMASK(16, 12) 190 191 #define XELPDP_DDI_CLOCK_SELECT(val) REG_FIELD_PREP(XELPDP_DDI_CLOCK_SELECT_MASK, val) 192 + #define XE3_DDI_CLOCK_SELECT(val) REG_FIELD_PREP(XE3_DDI_CLOCK_SELECT_MASK, val) 191 193 #define XELPDP_DDI_CLOCK_SELECT_NONE 0x0 192 194 #define XELPDP_DDI_CLOCK_SELECT_MAXPCLK 0x8 193 195 #define XELPDP_DDI_CLOCK_SELECT_DIV18CLK 0x9 ··· 197 195 #define XELPDP_DDI_CLOCK_SELECT_TBT_270 0xd 198 196 #define XELPDP_DDI_CLOCK_SELECT_TBT_540 0xe 199 197 #define XELPDP_DDI_CLOCK_SELECT_TBT_810 0xf 198 + #define XELPDP_DDI_CLOCK_SELECT_TBT_312_5 0x18 199 + #define XELPDP_DDI_CLOCK_SELECT_TBT_625 0x19 200 200 #define XELPDP_FORWARD_CLOCK_UNGATE REG_BIT(10) 201 201 #define XELPDP_LANE1_PHY_CLOCK_SELECT REG_BIT(8) 202 202 #define XELPDP_SSC_ENABLE_PLLA REG_BIT(1)
+3
drivers/gpu/drm/xe/regs/xe_engine_regs.h
··· 83 83 #define RING_IMR(base) XE_REG((base) + 0xa8) 84 84 #define RING_INT_STATUS_RPT_PTR(base) XE_REG((base) + 0xac) 85 85 86 + #define CS_INT_VEC(base) XE_REG((base) + 0x1b8) 87 + 86 88 #define RING_EIR(base) XE_REG((base) + 0xb0) 87 89 #define RING_EMR(base) XE_REG((base) + 0xb4) 88 90 #define RING_ESR(base) XE_REG((base) + 0xb8) ··· 140 138 141 139 #define RING_MODE(base) XE_REG((base) + 0x29c) 142 140 #define GFX_DISABLE_LEGACY_MODE REG_BIT(3) 141 + #define GFX_MSIX_INTERRUPT_ENABLE REG_BIT(13) 143 142 144 143 #define RING_TIMESTAMP(base) XE_REG((base) + 0x358) 145 144
+3
drivers/gpu/drm/xe/regs/xe_lrc_layout.h
··· 25 25 #define CTX_INT_SRC_REPORT_REG (CTX_LRI_INT_REPORT_PTR + 3) 26 26 #define CTX_INT_SRC_REPORT_PTR (CTX_LRI_INT_REPORT_PTR + 4) 27 27 28 + #define CTX_CS_INT_VEC_REG 0x5a 29 + #define CTX_CS_INT_VEC_DATA (CTX_CS_INT_VEC_REG + 1) 30 + 28 31 #define INDIRECT_CTX_RING_HEAD (0x02 + 1) 29 32 #define INDIRECT_CTX_RING_TAIL (0x04 + 1) 30 33 #define INDIRECT_CTX_RING_START (0x06 + 1)
+14 -2
drivers/gpu/drm/xe/tests/xe_bo.c
··· 606 606 static struct kunit_case xe_bo_tests[] = { 607 607 KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param), 608 608 KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param), 609 - KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param, 610 - {.speed = KUNIT_SPEED_SLOW}), 611 609 {} 612 610 }; 613 611 ··· 616 618 .init = xe_kunit_helper_xe_device_live_test_init, 617 619 }; 618 620 EXPORT_SYMBOL_IF_KUNIT(xe_bo_test_suite); 621 + 622 + static struct kunit_case xe_bo_shrink_test[] = { 623 + KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param, 624 + {.speed = KUNIT_SPEED_SLOW}), 625 + {} 626 + }; 627 + 628 + VISIBLE_IF_KUNIT 629 + struct kunit_suite xe_bo_shrink_test_suite = { 630 + .name = "xe_bo_shrink", 631 + .test_cases = xe_bo_shrink_test, 632 + .init = xe_kunit_helper_xe_device_live_test_init, 633 + }; 634 + EXPORT_SYMBOL_IF_KUNIT(xe_bo_shrink_test_suite);
+2
drivers/gpu/drm/xe/tests/xe_live_test_mod.c
··· 6 6 #include <kunit/test.h> 7 7 8 8 extern struct kunit_suite xe_bo_test_suite; 9 + extern struct kunit_suite xe_bo_shrink_test_suite; 9 10 extern struct kunit_suite xe_dma_buf_test_suite; 10 11 extern struct kunit_suite xe_migrate_test_suite; 11 12 extern struct kunit_suite xe_mocs_test_suite; 12 13 13 14 kunit_test_suite(xe_bo_test_suite); 15 + kunit_test_suite(xe_bo_shrink_test_suite); 14 16 kunit_test_suite(xe_dma_buf_test_suite); 15 17 kunit_test_suite(xe_migrate_test_suite); 16 18 kunit_test_suite(xe_mocs_test_suite);
+10 -2
drivers/gpu/drm/xe/xe_bo.c
··· 733 733 new_mem->mem_type == XE_PL_SYSTEM) { 734 734 long timeout = dma_resv_wait_timeout(ttm_bo->base.resv, 735 735 DMA_RESV_USAGE_BOOKKEEP, 736 - true, 736 + false, 737 737 MAX_SCHEDULE_TIMEOUT); 738 738 if (timeout < 0) { 739 739 ret = timeout; ··· 857 857 858 858 out: 859 859 if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) && 860 - ttm_bo->ttm) 860 + ttm_bo->ttm) { 861 + long timeout = dma_resv_wait_timeout(ttm_bo->base.resv, 862 + DMA_RESV_USAGE_KERNEL, 863 + false, 864 + MAX_SCHEDULE_TIMEOUT); 865 + if (timeout < 0) 866 + ret = timeout; 867 + 861 868 xe_tt_unmap_sg(ttm_bo->ttm); 869 + } 862 870 863 871 return ret; 864 872 }
+14 -1
drivers/gpu/drm/xe/xe_devcoredump.c
··· 119 119 drm_puts(&p, "\n**** GuC CT ****\n"); 120 120 xe_guc_ct_snapshot_print(ss->guc.ct, &p); 121 121 122 - drm_puts(&p, "\n**** Contexts ****\n"); 122 + /* 123 + * Don't add a new section header here because the mesa debug decoder 124 + * tool expects the context information to be in the 'GuC CT' section. 125 + */ 126 + /* drm_puts(&p, "\n**** Contexts ****\n"); */ 123 127 xe_guc_exec_queue_snapshot_print(ss->ge, &p); 124 128 125 129 drm_puts(&p, "\n**** Job ****\n"); ··· 419 415 const u32 *blob32 = (const u32 *)blob; 420 416 char buff[ASCII85_BUFSZ], *line_buff; 421 417 size_t line_pos = 0; 418 + 419 + /* 420 + * Splitting blobs across multiple lines is not compatible with the mesa 421 + * debug decoder tool. Note that even dropping the explicit '\n' below 422 + * doesn't help because the GuC log is so big some underlying implementation 423 + * still splits the lines at 512K characters. So just bail completely for 424 + * the moment. 425 + */ 426 + return; 422 427 423 428 #define DMESG_MAX_LINE_LEN 800 424 429 #define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "\n\0" */
+3 -1
drivers/gpu/drm/xe/xe_device.c
··· 325 325 xe->info.revid = pdev->revision; 326 326 xe->info.force_execlist = xe_modparam.force_execlist; 327 327 328 - spin_lock_init(&xe->irq.lock); 328 + err = xe_irq_init(xe); 329 + if (err) 330 + goto err; 329 331 330 332 init_waitqueue_head(&xe->ufence_wq); 331 333
+1 -2
drivers/gpu/drm/xe/xe_device.h
··· 157 157 158 158 static inline bool xe_device_has_msix(struct xe_device *xe) 159 159 { 160 - /* TODO: change this when MSI-X support is fully integrated */ 161 - return false; 160 + return xe->irq.msix.nvec > 0; 162 161 } 163 162 164 163 static inline bool xe_device_has_memirq(struct xe_device *xe)
+8
drivers/gpu/drm/xe/xe_device_types.h
··· 348 348 349 349 /** @irq.enabled: interrupts enabled on this device */ 350 350 atomic_t enabled; 351 + 352 + /** @irq.msix: irq info for platforms that support MSI-X */ 353 + struct { 354 + /** @irq.msix.nvec: number of MSI-X interrupts */ 355 + u16 nvec; 356 + /** @irq.msix.indexes: used to allocate MSI-X indexes */ 357 + struct xarray indexes; 358 + } msix; 351 359 } irq; 352 360 353 361 /** @ttm: ttm device */
+15 -8
drivers/gpu/drm/xe/xe_exec_queue.c
··· 8 8 #include <linux/nospec.h> 9 9 10 10 #include <drm/drm_device.h> 11 + #include <drm/drm_drv.h> 11 12 #include <drm/drm_file.h> 12 13 #include <uapi/drm/xe_drm.h> 13 14 ··· 17 16 #include "xe_hw_engine_class_sysfs.h" 18 17 #include "xe_hw_engine_group.h" 19 18 #include "xe_hw_fence.h" 19 + #include "xe_irq.h" 20 20 #include "xe_lrc.h" 21 21 #include "xe_macros.h" 22 22 #include "xe_migrate.h" ··· 70 68 q->gt = gt; 71 69 q->class = hwe->class; 72 70 q->width = width; 71 + q->msix_vec = XE_IRQ_DEFAULT_MSIX; 73 72 q->logical_mask = logical_mask; 74 73 q->fence_irq = &gt->fence_irq[hwe->class]; 75 74 q->ring_ops = gt->ring_ops[hwe->class]; ··· 120 117 } 121 118 122 119 for (i = 0; i < q->width; ++i) { 123 - q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K); 120 + q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec); 124 121 if (IS_ERR(q->lrc[i])) { 125 122 err = PTR_ERR(q->lrc[i]); 126 123 goto err_unlock; ··· 769 766 */ 770 767 void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) 771 768 { 772 - struct xe_file *xef; 769 + struct xe_device *xe = gt_to_xe(q->gt); 773 770 struct xe_lrc *lrc; 774 771 u32 old_ts, new_ts; 772 + int idx; 775 773 776 774 /* 777 - * Jobs that are run during driver load may use an exec_queue, but are 778 - * not associated with a user xe file, so avoid accumulating busyness 779 - * for kernel specific work. 775 + * Jobs that are executed by kernel doesn't have a corresponding xe_file 776 + * and thus are not accounted. 780 777 */ 781 - if (!q->vm || !q->vm->xef) 778 + if (!q->xef) 782 779 return; 783 780 784 - xef = q->vm->xef; 781 + /* Synchronize with unbind while holding the xe file open */ 782 + if (!drm_dev_enter(&xe->drm, &idx)) 783 + return; 785 784 786 785 /* 787 786 * Only sample the first LRC. For parallel submission, all of them are ··· 795 790 */ 796 791 lrc = q->lrc[0]; 797 792 new_ts = xe_lrc_update_timestamp(lrc, &old_ts); 798 - xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; 793 + q->xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; 794 + 795 + drm_dev_exit(idx); 799 796 } 800 797 801 798 /**
+3 -1
drivers/gpu/drm/xe/xe_exec_queue_types.h
··· 41 41 /** @xef: Back pointer to xe file if this is user created exec queue */ 42 42 struct xe_file *xef; 43 43 44 - /** @gt: graphics tile this exec queue can submit to */ 44 + /** @gt: GT structure this exec queue can submit to */ 45 45 struct xe_gt *gt; 46 46 /** 47 47 * @hwe: A hardware of the same class. May (physical engine) or may not ··· 63 63 char name[MAX_FENCE_NAME_LEN]; 64 64 /** @width: width (number BB submitted per exec) of this exec queue */ 65 65 u16 width; 66 + /** @msix_vec: MSI-X vector (for platforms that support it) */ 67 + u16 msix_vec; 66 68 /** @fence_irq: fence IRQ used to signal job completion */ 67 69 struct xe_hw_fence_irq *fence_irq; 68 70
+7 -3
drivers/gpu/drm/xe/xe_execlist.c
··· 17 17 #include "xe_exec_queue.h" 18 18 #include "xe_gt.h" 19 19 #include "xe_hw_fence.h" 20 + #include "xe_irq.h" 20 21 #include "xe_lrc.h" 21 22 #include "xe_macros.h" 22 23 #include "xe_mmio.h" ··· 48 47 struct xe_mmio *mmio = &gt->mmio; 49 48 struct xe_device *xe = gt_to_xe(gt); 50 49 u64 lrc_desc; 50 + u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE); 51 51 52 52 lrc_desc = xe_lrc_descriptor(lrc); 53 53 ··· 82 80 xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base), 83 81 xe_bo_ggtt_addr(hwe->hwsp)); 84 82 xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base)); 85 - xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base), 86 - _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); 83 + 84 + if (xe_device_has_msix(gt_to_xe(hwe->gt))) 85 + ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE); 86 + xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base), ring_mode); 87 87 88 88 xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base), 89 89 lower_32_bits(lrc_desc)); ··· 269 265 270 266 port->hwe = hwe; 271 267 272 - port->lrc = xe_lrc_create(hwe, NULL, SZ_16K); 268 + port->lrc = xe_lrc_create(hwe, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX); 273 269 if (IS_ERR(port->lrc)) { 274 270 err = PTR_ERR(port->lrc); 275 271 goto err;
+4 -4
drivers/gpu/drm/xe/xe_gt.c
··· 387 387 xe_force_wake_init_gt(gt, gt_to_fw(gt)); 388 388 spin_lock_init(&gt->global_invl_lock); 389 389 390 + err = xe_gt_tlb_invalidation_init_early(gt); 391 + if (err) 392 + return err; 393 + 390 394 return 0; 391 395 } 392 396 ··· 591 587 gt->ring_ops[i] = xe_ring_ops_get(gt, i); 592 588 xe_hw_fence_irq_init(&gt->fence_irq[i]); 593 589 } 594 - 595 - err = xe_gt_tlb_invalidation_init(gt); 596 - if (err) 597 - return err; 598 590 599 591 err = xe_gt_pagefault_init(gt); 600 592 if (err)
+6 -4
drivers/gpu/drm/xe/xe_gt_idle.c
··· 122 122 if (!xe_gt_is_media_type(gt)) 123 123 gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE; 124 124 125 - for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { 126 - if ((gt->info.engine_mask & BIT(i))) 127 - gtidle->powergate_enable |= (VDN_HCP_POWERGATE_ENABLE(j) | 128 - VDN_MFXVDENC_POWERGATE_ENABLE(j)); 125 + if (xe->info.platform != XE_DG1) { 126 + for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { 127 + if ((gt->info.engine_mask & BIT(i))) 128 + gtidle->powergate_enable |= (VDN_HCP_POWERGATE_ENABLE(j) | 129 + VDN_MFXVDENC_POWERGATE_ENABLE(j)); 130 + } 129 131 } 130 132 131 133 fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+1 -1
drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
··· 2120 2120 valid_any = valid_any || (valid_ggtt && is_primary); 2121 2121 2122 2122 if (IS_DGFX(xe)) { 2123 - bool valid_lmem = pf_get_vf_config_ggtt(primary_gt, vfid); 2123 + bool valid_lmem = pf_get_vf_config_lmem(primary_gt, vfid); 2124 2124 2125 2125 valid_any = valid_any || (valid_lmem && is_primary); 2126 2126 valid_all = valid_all && valid_lmem;
+2 -2
drivers/gpu/drm/xe/xe_gt_stats.c
··· 12 12 13 13 /** 14 14 * xe_gt_stats_incr - Increments the specified stats counter 15 - * @gt: graphics tile 15 + * @gt: GT structure 16 16 * @id: xe_gt_stats_id type id that needs to be incremented 17 17 * @incr: value to be incremented with 18 18 * ··· 32 32 33 33 /** 34 34 * xe_gt_stats_print_info - Print the GT stats 35 - * @gt: graphics tile 35 + * @gt: GT structure 36 36 * @p: drm_printer where it will be printed out. 37 37 * 38 38 * This prints out all the available GT stats.
+8 -8
drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
··· 106 106 } 107 107 108 108 /** 109 - * xe_gt_tlb_invalidation_init - Initialize GT TLB invalidation state 110 - * @gt: graphics tile 109 + * xe_gt_tlb_invalidation_init_early - Initialize GT TLB invalidation state 110 + * @gt: GT structure 111 111 * 112 112 * Initialize GT TLB invalidation state, purely software initialization, should 113 113 * be called once during driver load. 114 114 * 115 115 * Return: 0 on success, negative error code on error. 116 116 */ 117 - int xe_gt_tlb_invalidation_init(struct xe_gt *gt) 117 + int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt) 118 118 { 119 119 gt->tlb_invalidation.seqno = 1; 120 120 INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences); ··· 128 128 129 129 /** 130 130 * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset 131 - * @gt: graphics tile 131 + * @gt: GT structure 132 132 * 133 133 * Signal any pending invalidation fences, should be called during a GT reset 134 134 */ ··· 244 244 245 245 /** 246 246 * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC 247 - * @gt: graphics tile 247 + * @gt: GT structure 248 248 * @fence: invalidation fence which will be signal on TLB invalidation 249 249 * completion 250 250 * ··· 277 277 278 278 /** 279 279 * xe_gt_tlb_invalidation_ggtt - Issue a TLB invalidation on this GT for the GGTT 280 - * @gt: graphics tile 280 + * @gt: GT structure 281 281 * 282 282 * Issue a TLB invalidation for the GGTT. Completion of TLB invalidation is 283 283 * synchronous. ··· 326 326 * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an 327 327 * address range 328 328 * 329 - * @gt: graphics tile 329 + * @gt: GT structure 330 330 * @fence: invalidation fence which will be signal on TLB invalidation 331 331 * completion 332 332 * @start: start address ··· 412 412 413 413 /** 414 414 * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA 415 - * @gt: graphics tile 415 + * @gt: GT structure 416 416 * @fence: invalidation fence which will be signal on TLB invalidation 417 417 * completion, can be NULL 418 418 * @vma: VMA to invalidate
+2 -1
drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
··· 14 14 struct xe_guc; 15 15 struct xe_vma; 16 16 17 - int xe_gt_tlb_invalidation_init(struct xe_gt *gt); 17 + int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt); 18 + 18 19 void xe_gt_tlb_invalidation_reset(struct xe_gt *gt); 19 20 int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt); 20 21 int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
+29 -1
drivers/gpu/drm/xe/xe_guc.c
··· 147 147 return flags; 148 148 } 149 149 150 + static bool needs_wa_dual_queue(struct xe_gt *gt) 151 + { 152 + /* 153 + * The DUAL_QUEUE_WA tells the GuC to not allow concurrent submissions 154 + * on RCS and CCSes with different address spaces, which on DG2 is 155 + * required as a WA for an HW bug. 156 + */ 157 + if (XE_WA(gt, 22011391025)) 158 + return true; 159 + 160 + /* 161 + * On newer platforms, the HW has been updated to not allow parallel 162 + * execution of different address spaces, so the RCS/CCS will stall the 163 + * context switch if one of the other RCS/CCSes is busy with a different 164 + * address space. While functionally correct, having a submission 165 + * stalled on the HW limits the GuC ability to shuffle things around and 166 + * can cause complications if the non-stalled submission runs for a long 167 + * time, because the GuC doesn't know that the stalled submission isn't 168 + * actually running and might declare it as hung. Therefore, we enable 169 + * the DUAL_QUEUE_WA on all newer platforms on GTs that have CCS engines 170 + * to move management back to the GuC. 171 + */ 172 + if (CCS_MASK(gt) && GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) 173 + return true; 174 + 175 + return false; 176 + } 177 + 150 178 static u32 guc_ctl_wa_flags(struct xe_guc *guc) 151 179 { 152 180 struct xe_device *xe = guc_to_xe(guc); ··· 187 159 if (XE_WA(gt, 14014475959)) 188 160 flags |= GUC_WA_HOLD_CCS_SWITCHOUT; 189 161 190 - if (XE_WA(gt, 22011391025)) 162 + if (needs_wa_dual_queue(gt)) 191 163 flags |= GUC_WA_DUAL_QUEUE; 192 164 193 165 /*
+6 -3
drivers/gpu/drm/xe/xe_hw_engine.c
··· 324 324 { 325 325 u32 ccs_mask = 326 326 xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE); 327 + u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE); 327 328 328 329 if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask) 329 330 xe_mmio_write32(&hwe->gt->mmio, RCU_MODE, ··· 333 332 xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); 334 333 xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), 335 334 xe_bo_ggtt_addr(hwe->hwsp)); 336 - xe_hw_engine_mmio_write32(hwe, RING_MODE(0), 337 - _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); 335 + 336 + if (xe_device_has_msix(gt_to_xe(hwe->gt))) 337 + ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE); 338 + xe_hw_engine_mmio_write32(hwe, RING_MODE(0), ring_mode); 338 339 xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0), 339 340 _MASKED_BIT_DISABLE(STOP_RING)); 340 341 xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); ··· 775 772 xe_mmio_write32(&gt->mmio, GUNIT_GSC_INTR_ENABLE, 0); 776 773 xe_mmio_write32(&gt->mmio, GUNIT_GSC_INTR_MASK, ~0); 777 774 778 - drm_info(&xe->drm, "gsccs disabled due to lack of FW\n"); 775 + drm_dbg(&xe->drm, "GSC FW not used, disabling gsccs\n"); 779 776 } 780 777 } 781 778
+1 -1
drivers/gpu/drm/xe/xe_hw_engine_types.h
··· 106 106 * Contains all the hardware engine state for physical instances. 107 107 */ 108 108 struct xe_hw_engine { 109 - /** @gt: graphics tile this hw engine belongs to */ 109 + /** @gt: GT structure this hw engine belongs to */ 110 110 struct xe_gt *gt; 111 111 /** @name: name of this hw engine */ 112 112 const char *name;
+1 -1
drivers/gpu/drm/xe/xe_hw_fence_types.h
··· 41 41 * to a xe_hw_fence_irq, maintains serial seqno. 42 42 */ 43 43 struct xe_hw_fence_ctx { 44 - /** @gt: graphics tile of hardware fence context */ 44 + /** @gt: GT structure of hardware fence context */ 45 45 struct xe_gt *gt; 46 46 /** @irq: fence irq handler */ 47 47 struct xe_hw_fence_irq *irq;
+285 -38
drivers/gpu/drm/xe/xe_irq.c
··· 10 10 #include <drm/drm_managed.h> 11 11 12 12 #include "display/xe_display.h" 13 + #include "regs/xe_guc_regs.h" 13 14 #include "regs/xe_irq_regs.h" 14 15 #include "xe_device.h" 15 16 #include "xe_drv.h" ··· 29 28 #define IMR(offset) XE_REG(offset + 0x4) 30 29 #define IIR(offset) XE_REG(offset + 0x8) 31 30 #define IER(offset) XE_REG(offset + 0xc) 31 + 32 + static int xe_irq_msix_init(struct xe_device *xe); 33 + static void xe_irq_msix_free(struct xe_device *xe); 34 + static int xe_irq_msix_request_irqs(struct xe_device *xe); 35 + static void xe_irq_msix_synchronize_irq(struct xe_device *xe); 32 36 33 37 static void assert_iir_is_zero(struct xe_mmio *mmio, struct xe_reg reg) 34 38 { ··· 578 572 if (IS_SRIOV_VF(xe)) 579 573 return vf_irq_reset(xe); 580 574 575 + if (xe_device_uses_memirq(xe)) { 576 + for_each_tile(tile, xe, id) 577 + xe_memirq_reset(&tile->memirq); 578 + } 579 + 581 580 for_each_tile(tile, xe, id) { 582 581 if (GRAPHICS_VERx100(xe) >= 1210) 583 582 dg1_irq_reset(tile); ··· 624 613 { 625 614 if (IS_SRIOV_VF(xe)) 626 615 return vf_irq_postinstall(xe); 616 + 617 + if (xe_device_uses_memirq(xe)) { 618 + struct xe_tile *tile; 619 + unsigned int id; 620 + 621 + for_each_tile(tile, xe, id) 622 + xe_memirq_postinstall(&tile->memirq); 623 + } 627 624 628 625 xe_display_irq_postinstall(xe, xe_root_mmio_gt(xe)); 629 626 ··· 675 656 return xelp_irq_handler; 676 657 } 677 658 678 - static void irq_uninstall(void *arg) 679 - { 680 - struct xe_device *xe = arg; 681 - struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 682 - int irq; 683 - 684 - if (!atomic_xchg(&xe->irq.enabled, 0)) 685 - return; 686 - 687 - xe_irq_reset(xe); 688 - 689 - irq = pci_irq_vector(pdev, 0); 690 - free_irq(irq, xe); 691 - } 692 - 693 - int xe_irq_install(struct xe_device *xe) 659 + static int xe_irq_msi_request_irqs(struct xe_device *xe) 694 660 { 695 661 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 696 - unsigned int irq_flags = PCI_IRQ_MSIX; 697 662 irq_handler_t irq_handler; 698 - int err, irq, nvec; 663 + int irq, err; 699 664 700 665 irq_handler = xe_irq_handler(xe); 701 666 if (!irq_handler) { ··· 687 684 return -EINVAL; 688 685 } 689 686 687 + irq = pci_irq_vector(pdev, 0); 688 + err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe); 689 + if (err < 0) { 690 + drm_err(&xe->drm, "Failed to request MSI IRQ %d\n", err); 691 + return err; 692 + } 693 + 694 + return 0; 695 + } 696 + 697 + static void xe_irq_msi_free(struct xe_device *xe) 698 + { 699 + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 700 + int irq; 701 + 702 + irq = pci_irq_vector(pdev, 0); 703 + free_irq(irq, xe); 704 + } 705 + 706 + static void irq_uninstall(void *arg) 707 + { 708 + struct xe_device *xe = arg; 709 + 710 + if (!atomic_xchg(&xe->irq.enabled, 0)) 711 + return; 712 + 690 713 xe_irq_reset(xe); 691 714 692 - nvec = pci_msix_vec_count(pdev); 693 - if (nvec <= 0) { 694 - if (nvec == -EINVAL) { 695 - /* MSIX capability is not supported in the device, using MSI */ 696 - irq_flags = PCI_IRQ_MSI; 697 - nvec = 1; 698 - } else { 699 - drm_err(&xe->drm, "MSIX: Failed getting count\n"); 700 - return nvec; 701 - } 715 + if (xe_device_has_msix(xe)) 716 + xe_irq_msix_free(xe); 717 + else 718 + xe_irq_msi_free(xe); 719 + } 720 + 721 + int xe_irq_init(struct xe_device *xe) 722 + { 723 + spin_lock_init(&xe->irq.lock); 724 + 725 + return xe_irq_msix_init(xe); 726 + } 727 + 728 + int xe_irq_install(struct xe_device *xe) 729 + { 730 + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 731 + unsigned int irq_flags = PCI_IRQ_MSI; 732 + int nvec = 1; 733 + int err; 734 + 735 + xe_irq_reset(xe); 736 + 737 + if (xe_device_has_msix(xe)) { 738 + nvec = xe->irq.msix.nvec; 739 + irq_flags = PCI_IRQ_MSIX; 702 740 } 703 741 704 742 err = pci_alloc_irq_vectors(pdev, nvec, nvec, irq_flags); 705 743 if (err < 0) { 706 - drm_err(&xe->drm, "MSI/MSIX: Failed to enable support %d\n", err); 744 + drm_err(&xe->drm, "Failed to allocate IRQ vectors: %d\n", err); 707 745 return err; 708 746 } 709 747 710 - irq = pci_irq_vector(pdev, 0); 711 - err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe); 712 - if (err < 0) { 713 - drm_err(&xe->drm, "Failed to request MSI/MSIX IRQ %d\n", err); 748 + err = xe_device_has_msix(xe) ? xe_irq_msix_request_irqs(xe) : 749 + xe_irq_msi_request_irqs(xe); 750 + if (err) 714 751 return err; 715 - } 716 752 717 753 atomic_set(&xe->irq.enabled, 1); 718 754 ··· 764 722 return 0; 765 723 766 724 free_irq_handler: 767 - free_irq(irq, xe); 725 + if (xe_device_has_msix(xe)) 726 + xe_irq_msix_free(xe); 727 + else 728 + xe_irq_msi_free(xe); 768 729 769 730 return err; 770 731 } 771 732 733 + static void xe_irq_msi_synchronize_irq(struct xe_device *xe) 734 + { 735 + synchronize_irq(to_pci_dev(xe->drm.dev)->irq); 736 + } 737 + 772 738 void xe_irq_suspend(struct xe_device *xe) 773 739 { 774 - int irq = to_pci_dev(xe->drm.dev)->irq; 775 - 776 740 atomic_set(&xe->irq.enabled, 0); /* no new irqs */ 777 741 778 - synchronize_irq(irq); /* flush irqs */ 742 + /* flush irqs */ 743 + if (xe_device_has_msix(xe)) 744 + xe_irq_msix_synchronize_irq(xe); 745 + else 746 + xe_irq_msi_synchronize_irq(xe); 779 747 xe_irq_reset(xe); /* turn irqs off */ 780 748 } 781 749 ··· 805 753 806 754 for_each_gt(gt, xe, id) 807 755 xe_irq_enable_hwe(gt); 756 + } 757 + 758 + /* MSI-X related definitions and functions below. */ 759 + 760 + enum xe_irq_msix_static { 761 + GUC2HOST_MSIX = 0, 762 + DEFAULT_MSIX = XE_IRQ_DEFAULT_MSIX, 763 + /* Must be last */ 764 + NUM_OF_STATIC_MSIX, 765 + }; 766 + 767 + static int xe_irq_msix_init(struct xe_device *xe) 768 + { 769 + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 770 + int nvec = pci_msix_vec_count(pdev); 771 + 772 + if (nvec == -EINVAL) 773 + return 0; /* MSI */ 774 + 775 + if (nvec < 0) { 776 + drm_err(&xe->drm, "Failed getting MSI-X vectors count: %d\n", nvec); 777 + return nvec; 778 + } 779 + 780 + xe->irq.msix.nvec = nvec; 781 + xa_init_flags(&xe->irq.msix.indexes, XA_FLAGS_ALLOC); 782 + return 0; 783 + } 784 + 785 + static irqreturn_t guc2host_irq_handler(int irq, void *arg) 786 + { 787 + struct xe_device *xe = arg; 788 + struct xe_tile *tile; 789 + u8 id; 790 + 791 + if (!atomic_read(&xe->irq.enabled)) 792 + return IRQ_NONE; 793 + 794 + for_each_tile(tile, xe, id) 795 + xe_guc_irq_handler(&tile->primary_gt->uc.guc, 796 + GUC_INTR_GUC2HOST); 797 + 798 + return IRQ_HANDLED; 799 + } 800 + 801 + static irqreturn_t xe_irq_msix_default_hwe_handler(int irq, void *arg) 802 + { 803 + unsigned int tile_id, gt_id; 804 + struct xe_device *xe = arg; 805 + struct xe_memirq *memirq; 806 + struct xe_hw_engine *hwe; 807 + enum xe_hw_engine_id id; 808 + struct xe_tile *tile; 809 + struct xe_gt *gt; 810 + 811 + if (!atomic_read(&xe->irq.enabled)) 812 + return IRQ_NONE; 813 + 814 + for_each_tile(tile, xe, tile_id) { 815 + memirq = &tile->memirq; 816 + if (!memirq->bo) 817 + continue; 818 + 819 + for_each_gt(gt, xe, gt_id) { 820 + if (gt->tile != tile) 821 + continue; 822 + 823 + for_each_hw_engine(hwe, gt, id) 824 + xe_memirq_hwe_handler(memirq, hwe); 825 + } 826 + } 827 + 828 + return IRQ_HANDLED; 829 + } 830 + 831 + static int xe_irq_msix_alloc_vector(struct xe_device *xe, void *irq_buf, 832 + bool dynamic_msix, u16 *msix) 833 + { 834 + struct xa_limit limit; 835 + int ret; 836 + u32 id; 837 + 838 + limit = (dynamic_msix) ? XA_LIMIT(NUM_OF_STATIC_MSIX, xe->irq.msix.nvec - 1) : 839 + XA_LIMIT(*msix, *msix); 840 + ret = xa_alloc(&xe->irq.msix.indexes, &id, irq_buf, limit, GFP_KERNEL); 841 + if (ret) 842 + return ret; 843 + 844 + if (dynamic_msix) 845 + *msix = id; 846 + 847 + return 0; 848 + } 849 + 850 + static void xe_irq_msix_release_vector(struct xe_device *xe, u16 msix) 851 + { 852 + xa_erase(&xe->irq.msix.indexes, msix); 853 + } 854 + 855 + static int xe_irq_msix_request_irq_internal(struct xe_device *xe, irq_handler_t handler, 856 + void *irq_buf, const char *name, u16 msix) 857 + { 858 + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 859 + int ret, irq; 860 + 861 + irq = pci_irq_vector(pdev, msix); 862 + if (irq < 0) 863 + return irq; 864 + 865 + ret = request_irq(irq, handler, IRQF_SHARED, name, irq_buf); 866 + if (ret < 0) 867 + return ret; 868 + 869 + return 0; 870 + } 871 + 872 + int xe_irq_msix_request_irq(struct xe_device *xe, irq_handler_t handler, void *irq_buf, 873 + const char *name, bool dynamic_msix, u16 *msix) 874 + { 875 + int ret; 876 + 877 + ret = xe_irq_msix_alloc_vector(xe, irq_buf, dynamic_msix, msix); 878 + if (ret) 879 + return ret; 880 + 881 + ret = xe_irq_msix_request_irq_internal(xe, handler, irq_buf, name, *msix); 882 + if (ret) { 883 + drm_err(&xe->drm, "Failed to request IRQ for MSI-X %u\n", *msix); 884 + xe_irq_msix_release_vector(xe, *msix); 885 + return ret; 886 + } 887 + 888 + return 0; 889 + } 890 + 891 + void xe_irq_msix_free_irq(struct xe_device *xe, u16 msix) 892 + { 893 + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 894 + int irq; 895 + void *irq_buf; 896 + 897 + irq_buf = xa_load(&xe->irq.msix.indexes, msix); 898 + if (!irq_buf) 899 + return; 900 + 901 + irq = pci_irq_vector(pdev, msix); 902 + if (irq < 0) { 903 + drm_err(&xe->drm, "MSI-X %u can't be released, there is no matching IRQ\n", msix); 904 + return; 905 + } 906 + 907 + free_irq(irq, irq_buf); 908 + xe_irq_msix_release_vector(xe, msix); 909 + } 910 + 911 + int xe_irq_msix_request_irqs(struct xe_device *xe) 912 + { 913 + int err; 914 + u16 msix; 915 + 916 + msix = GUC2HOST_MSIX; 917 + err = xe_irq_msix_request_irq(xe, guc2host_irq_handler, xe, 918 + DRIVER_NAME "-guc2host", false, &msix); 919 + if (err) 920 + return err; 921 + 922 + msix = DEFAULT_MSIX; 923 + err = xe_irq_msix_request_irq(xe, xe_irq_msix_default_hwe_handler, xe, 924 + DRIVER_NAME "-default-msix", false, &msix); 925 + if (err) { 926 + xe_irq_msix_free_irq(xe, GUC2HOST_MSIX); 927 + return err; 928 + } 929 + 930 + return 0; 931 + } 932 + 933 + void xe_irq_msix_free(struct xe_device *xe) 934 + { 935 + unsigned long msix; 936 + u32 *dummy; 937 + 938 + xa_for_each(&xe->irq.msix.indexes, msix, dummy) 939 + xe_irq_msix_free_irq(xe, msix); 940 + xa_destroy(&xe->irq.msix.indexes); 941 + } 942 + 943 + void xe_irq_msix_synchronize_irq(struct xe_device *xe) 944 + { 945 + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 946 + unsigned long msix; 947 + u32 *dummy; 948 + 949 + xa_for_each(&xe->irq.msix.indexes, msix, dummy) 950 + synchronize_irq(pci_irq_vector(pdev, msix)); 808 951 }
+8
drivers/gpu/drm/xe/xe_irq.h
··· 6 6 #ifndef _XE_IRQ_H_ 7 7 #define _XE_IRQ_H_ 8 8 9 + #include <linux/interrupt.h> 10 + 11 + #define XE_IRQ_DEFAULT_MSIX 1 12 + 9 13 struct xe_device; 10 14 struct xe_tile; 11 15 struct xe_gt; 12 16 17 + int xe_irq_init(struct xe_device *xe); 13 18 int xe_irq_install(struct xe_device *xe); 14 19 void xe_irq_suspend(struct xe_device *xe); 15 20 void xe_irq_resume(struct xe_device *xe); 16 21 void xe_irq_enable_hwe(struct xe_gt *gt); 22 + int xe_irq_msix_request_irq(struct xe_device *xe, irq_handler_t handler, void *irq_buf, 23 + const char *name, bool dynamic_msix, u16 *msix); 24 + void xe_irq_msix_free_irq(struct xe_device *xe, u16 msix); 17 25 18 26 #endif
+20 -4
drivers/gpu/drm/xe/xe_lrc.c
··· 584 584 { 585 585 struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->memirq; 586 586 struct xe_device *xe = gt_to_xe(hwe->gt); 587 + u8 num_regs; 587 588 588 589 if (!xe_device_uses_memirq(xe)) 589 590 return; ··· 594 593 regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr; 595 594 regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq); 596 595 597 - regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | 596 + num_regs = xe_device_has_msix(xe) ? 3 : 2; 597 + regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(num_regs) | 598 598 MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; 599 599 regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; 600 600 regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe); 601 601 regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; 602 602 regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe); 603 + 604 + if (xe_device_has_msix(xe)) { 605 + regs[CTX_CS_INT_VEC_REG] = CS_INT_VEC(0).addr; 606 + /* CTX_CS_INT_VEC_DATA will be set in xe_lrc_init */ 607 + } 603 608 } 604 609 605 610 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) ··· 883 876 #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) 884 877 885 878 static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 886 - struct xe_vm *vm, u32 ring_size) 879 + struct xe_vm *vm, u32 ring_size, u16 msix_vec) 887 880 { 888 881 struct xe_gt *gt = hwe->gt; 889 882 struct xe_tile *tile = gt_to_tile(gt); ··· 952 945 xe_drm_client_add_bo(vm->xef->client, lrc->bo); 953 946 } 954 947 948 + if (xe_device_has_msix(xe)) { 949 + xe_lrc_write_ctx_reg(lrc, CTX_INT_STATUS_REPORT_PTR, 950 + xe_memirq_status_ptr(&tile->memirq, hwe)); 951 + xe_lrc_write_ctx_reg(lrc, CTX_INT_SRC_REPORT_PTR, 952 + xe_memirq_source_ptr(&tile->memirq, hwe)); 953 + xe_lrc_write_ctx_reg(lrc, CTX_CS_INT_VEC_DATA, msix_vec << 16 | msix_vec); 954 + } 955 + 955 956 if (xe_gt_has_indirect_ring_state(gt)) { 956 957 xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, 957 958 __xe_lrc_indirect_ring_ggtt_addr(lrc)); ··· 1020 1005 * @hwe: Hardware Engine 1021 1006 * @vm: The VM (address space) 1022 1007 * @ring_size: LRC ring size 1008 + * @msix_vec: MSI-X interrupt vector (for platforms that support it) 1023 1009 * 1024 1010 * Allocate and initialize the Logical Ring Context (LRC). 1025 1011 * ··· 1028 1012 * upon failure. 1029 1013 */ 1030 1014 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, 1031 - u32 ring_size) 1015 + u32 ring_size, u16 msix_vec) 1032 1016 { 1033 1017 struct xe_lrc *lrc; 1034 1018 int err; ··· 1037 1021 if (!lrc) 1038 1022 return ERR_PTR(-ENOMEM); 1039 1023 1040 - err = xe_lrc_init(lrc, hwe, vm, ring_size); 1024 + err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec); 1041 1025 if (err) { 1042 1026 kfree(lrc); 1043 1027 return ERR_PTR(err);
+1 -1
drivers/gpu/drm/xe/xe_lrc.h
··· 42 42 #define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4) 43 43 44 44 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, 45 - u32 ring_size); 45 + u32 ring_size, u16 msix_vec); 46 46 void xe_lrc_destroy(struct kref *ref); 47 47 48 48 /**
+71 -181
drivers/gpu/drm/xe/xe_oa.c
··· 16 16 #include "instructions/xe_mi_commands.h" 17 17 #include "regs/xe_engine_regs.h" 18 18 #include "regs/xe_gt_regs.h" 19 - #include "regs/xe_lrc_layout.h" 20 19 #include "regs/xe_oa_regs.h" 21 20 #include "xe_assert.h" 22 21 #include "xe_bb.h" ··· 27 28 #include "xe_gt_mcr.h" 28 29 #include "xe_gt_printk.h" 29 30 #include "xe_guc_pc.h" 30 - #include "xe_lrc.h" 31 31 #include "xe_macros.h" 32 32 #include "xe_mmio.h" 33 33 #include "xe_oa.h" ··· 72 74 struct rcu_head rcu; 73 75 }; 74 76 75 - struct flex { 76 - struct xe_reg reg; 77 - u32 offset; 78 - u32 value; 79 - }; 80 - 81 77 struct xe_oa_open_param { 82 78 struct xe_file *xef; 83 79 u32 oa_unit_id; ··· 89 97 int num_syncs; 90 98 struct xe_sync_entry *syncs; 91 99 size_t oa_buffer_size; 100 + int wait_num_reports; 92 101 }; 93 102 94 103 struct xe_oa_config_bo { ··· 234 241 static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) 235 242 { 236 243 u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); 244 + u32 tail, hw_tail, partial_report_size, available; 237 245 int report_size = stream->oa_buffer.format->size; 238 - u32 tail, hw_tail; 239 246 unsigned long flags; 240 247 bool pollin; 241 - u32 partial_report_size; 242 248 243 249 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 244 250 ··· 281 289 282 290 stream->oa_buffer.tail = tail; 283 291 284 - pollin = xe_oa_circ_diff(stream, stream->oa_buffer.tail, 285 - stream->oa_buffer.head) >= report_size; 292 + available = xe_oa_circ_diff(stream, stream->oa_buffer.tail, stream->oa_buffer.head); 293 + pollin = available >= stream->wait_num_reports * report_size; 286 294 287 295 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 288 296 ··· 597 605 return ret; 598 606 } 599 607 608 + static void xe_oa_lock_vma(struct xe_exec_queue *q) 609 + { 610 + if (q->vm) { 611 + down_read(&q->vm->lock); 612 + xe_vm_lock(q->vm, false); 613 + } 614 + } 615 + 616 + static void xe_oa_unlock_vma(struct xe_exec_queue *q) 617 + { 618 + if (q->vm) { 619 + xe_vm_unlock(q->vm); 620 + up_read(&q->vm->lock); 621 + } 622 + } 623 + 600 624 static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa_submit_deps deps, 601 625 struct xe_bb *bb) 602 626 { 627 + struct xe_exec_queue *q = stream->exec_q ?: stream->k_exec_q; 603 628 struct xe_sched_job *job; 604 629 struct dma_fence *fence; 605 630 int err = 0; 606 631 607 - /* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */ 608 - job = xe_bb_create_job(stream->k_exec_q, bb); 632 + xe_oa_lock_vma(q); 633 + 634 + job = xe_bb_create_job(q, bb); 609 635 if (IS_ERR(job)) { 610 636 err = PTR_ERR(job); 611 637 goto exit; 612 638 } 639 + job->ggtt = true; 613 640 614 641 if (deps == XE_OA_SUBMIT_ADD_DEPS) { 615 642 for (int i = 0; i < stream->num_syncs && !err; i++) ··· 643 632 fence = dma_fence_get(&job->drm.s_fence->finished); 644 633 xe_sched_job_push(job); 645 634 635 + xe_oa_unlock_vma(q); 636 + 646 637 return fence; 647 638 err_put_job: 648 639 xe_sched_job_put(job); 649 640 exit: 641 + xe_oa_unlock_vma(q); 650 642 return ERR_PTR(err); 651 643 } 652 644 ··· 698 684 dma_fence_put(stream->last_fence); 699 685 } 700 686 701 - static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc, 702 - struct xe_bb *bb, const struct flex *flex, u32 count) 703 - { 704 - u32 offset = xe_bo_ggtt_addr(lrc->bo); 705 - 706 - do { 707 - bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); 708 - bb->cs[bb->len++] = offset + flex->offset * sizeof(u32); 709 - bb->cs[bb->len++] = 0; 710 - bb->cs[bb->len++] = flex->value; 711 - 712 - } while (flex++, --count); 713 - } 714 - 715 - static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc, 716 - const struct flex *flex, u32 count) 687 + static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri, u32 count) 717 688 { 718 689 struct dma_fence *fence; 719 690 struct xe_bb *bb; 720 691 int err; 721 692 722 - bb = xe_bb_new(stream->gt, 4 * count, false); 693 + bb = xe_bb_new(stream->gt, 2 * count + 1, false); 723 694 if (IS_ERR(bb)) { 724 695 err = PTR_ERR(bb); 725 696 goto exit; 726 697 } 727 698 728 - xe_oa_store_flex(stream, lrc, bb, flex, count); 729 - 730 - fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); 731 - if (IS_ERR(fence)) { 732 - err = PTR_ERR(fence); 733 - goto free_bb; 734 - } 735 - xe_bb_free(bb, fence); 736 - dma_fence_put(fence); 737 - 738 - return 0; 739 - free_bb: 740 - xe_bb_free(bb, NULL); 741 - exit: 742 - return err; 743 - } 744 - 745 - static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri) 746 - { 747 - struct dma_fence *fence; 748 - struct xe_bb *bb; 749 - int err; 750 - 751 - bb = xe_bb_new(stream->gt, 3, false); 752 - if (IS_ERR(bb)) { 753 - err = PTR_ERR(bb); 754 - goto exit; 755 - } 756 - 757 - write_cs_mi_lri(bb, reg_lri, 1); 699 + write_cs_mi_lri(bb, reg_lri, count); 758 700 759 701 fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); 760 702 if (IS_ERR(fence)) { ··· 730 760 static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable) 731 761 { 732 762 const struct xe_oa_format *format = stream->oa_buffer.format; 733 - struct xe_lrc *lrc = stream->exec_q->lrc[0]; 734 - u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); 735 763 u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | 736 764 (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); 737 765 738 - struct flex regs_context[] = { 766 + struct xe_oa_reg reg_lri[] = { 739 767 { 740 768 OACTXCONTROL(stream->hwe->mmio_base), 741 - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, 742 769 enable ? OA_COUNTER_RESUME : 0, 743 770 }, 744 771 { 772 + OAR_OACONTROL, 773 + oacontrol, 774 + }, 775 + { 745 776 RING_CONTEXT_CONTROL(stream->hwe->mmio_base), 746 - regs_offset + CTX_CONTEXT_CONTROL, 747 - _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE), 777 + _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, 778 + enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) 748 779 }, 749 780 }; 750 - struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol }; 751 - int err; 752 781 753 - /* Modify stream hwe context image with regs_context */ 754 - err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], 755 - regs_context, ARRAY_SIZE(regs_context)); 756 - if (err) 757 - return err; 758 - 759 - /* Apply reg_lri using LRI */ 760 - return xe_oa_load_with_lri(stream, &reg_lri); 782 + return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri)); 761 783 } 762 784 763 785 static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable) 764 786 { 765 787 const struct xe_oa_format *format = stream->oa_buffer.format; 766 - struct xe_lrc *lrc = stream->exec_q->lrc[0]; 767 - u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); 768 788 u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | 769 789 (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); 770 - struct flex regs_context[] = { 790 + struct xe_oa_reg reg_lri[] = { 771 791 { 772 792 OACTXCONTROL(stream->hwe->mmio_base), 773 - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, 774 793 enable ? OA_COUNTER_RESUME : 0, 775 794 }, 776 795 { 796 + OAC_OACONTROL, 797 + oacontrol 798 + }, 799 + { 777 800 RING_CONTEXT_CONTROL(stream->hwe->mmio_base), 778 - regs_offset + CTX_CONTEXT_CONTROL, 779 - _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE) | 801 + _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, 802 + enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) | 780 803 _MASKED_FIELD(CTX_CTRL_RUN_ALONE, enable ? CTX_CTRL_RUN_ALONE : 0), 781 804 }, 782 805 }; 783 - struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol }; 784 - int err; 785 806 786 807 /* Set ccs select to enable programming of OAC_OACONTROL */ 787 808 xe_mmio_write32(&stream->gt->mmio, __oa_regs(stream)->oa_ctrl, 788 809 __oa_ccs_select(stream)); 789 810 790 - /* Modify stream hwe context image with regs_context */ 791 - err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], 792 - regs_context, ARRAY_SIZE(regs_context)); 793 - if (err) 794 - return err; 795 - 796 - /* Apply reg_lri using LRI */ 797 - return xe_oa_load_with_lri(stream, &reg_lri); 811 + return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri)); 798 812 } 799 813 800 814 static int xe_oa_configure_oa_context(struct xe_oa_stream *stream, bool enable) ··· 1239 1285 return 0; 1240 1286 } 1241 1287 1288 + static int xe_oa_set_prop_wait_num_reports(struct xe_oa *oa, u64 value, 1289 + struct xe_oa_open_param *param) 1290 + { 1291 + if (!value) { 1292 + drm_dbg(&oa->xe->drm, "wait_num_reports %llu\n", value); 1293 + return -EINVAL; 1294 + } 1295 + param->wait_num_reports = value; 1296 + return 0; 1297 + } 1298 + 1242 1299 static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value, 1243 1300 struct xe_oa_open_param *param) 1244 1301 { ··· 1271 1306 [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, 1272 1307 [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, 1273 1308 [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_oa_buffer_size, 1309 + [DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_wait_num_reports, 1274 1310 }; 1275 1311 1276 1312 static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = { ··· 1287 1321 [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, 1288 1322 [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, 1289 1323 [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_ret_inval, 1324 + [DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_ret_inval, 1290 1325 }; 1291 1326 1292 1327 static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from, ··· 1671 1704 .mmap = xe_oa_mmap, 1672 1705 }; 1673 1706 1674 - static bool engine_supports_mi_query(struct xe_hw_engine *hwe) 1675 - { 1676 - return hwe->class == XE_ENGINE_CLASS_RENDER || 1677 - hwe->class == XE_ENGINE_CLASS_COMPUTE; 1678 - } 1679 - 1680 - static bool xe_oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end) 1681 - { 1682 - u32 idx = *offset; 1683 - u32 len = min(MI_LRI_LEN(state[idx]) + idx, end); 1684 - bool found = false; 1685 - 1686 - idx++; 1687 - for (; idx < len; idx += 2) { 1688 - if (state[idx] == reg) { 1689 - found = true; 1690 - break; 1691 - } 1692 - } 1693 - 1694 - *offset = idx; 1695 - return found; 1696 - } 1697 - 1698 - #define IS_MI_LRI_CMD(x) (REG_FIELD_GET(MI_OPCODE, (x)) == \ 1699 - REG_FIELD_GET(MI_OPCODE, MI_LOAD_REGISTER_IMM)) 1700 - 1701 - static u32 xe_oa_context_image_offset(struct xe_oa_stream *stream, u32 reg) 1702 - { 1703 - struct xe_lrc *lrc = stream->exec_q->lrc[0]; 1704 - u32 len = (xe_gt_lrc_size(stream->gt, stream->hwe->class) + 1705 - lrc->ring.size) / sizeof(u32); 1706 - u32 offset = xe_lrc_regs_offset(lrc) / sizeof(u32); 1707 - u32 *state = (u32 *)lrc->bo->vmap.vaddr; 1708 - 1709 - if (drm_WARN_ON(&stream->oa->xe->drm, !state)) 1710 - return U32_MAX; 1711 - 1712 - for (; offset < len; ) { 1713 - if (IS_MI_LRI_CMD(state[offset])) { 1714 - /* 1715 - * We expect reg-value pairs in MI_LRI command, so 1716 - * MI_LRI_LEN() should be even 1717 - */ 1718 - drm_WARN_ON(&stream->oa->xe->drm, 1719 - MI_LRI_LEN(state[offset]) & 0x1); 1720 - 1721 - if (xe_oa_find_reg_in_lri(state, reg, &offset, len)) 1722 - break; 1723 - } else { 1724 - offset++; 1725 - } 1726 - } 1727 - 1728 - return offset < len ? offset : U32_MAX; 1729 - } 1730 - 1731 - static int xe_oa_set_ctx_ctrl_offset(struct xe_oa_stream *stream) 1732 - { 1733 - struct xe_reg reg = OACTXCONTROL(stream->hwe->mmio_base); 1734 - u32 offset = stream->oa->ctx_oactxctrl_offset[stream->hwe->class]; 1735 - 1736 - /* Do this only once. Failure is stored as offset of U32_MAX */ 1737 - if (offset) 1738 - goto exit; 1739 - 1740 - offset = xe_oa_context_image_offset(stream, reg.addr); 1741 - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] = offset; 1742 - 1743 - drm_dbg(&stream->oa->xe->drm, "%s oa ctx control at 0x%08x dword offset\n", 1744 - stream->hwe->name, offset); 1745 - exit: 1746 - return offset && offset != U32_MAX ? 0 : -ENODEV; 1747 - } 1748 - 1749 1707 static int xe_oa_stream_init(struct xe_oa_stream *stream, 1750 1708 struct xe_oa_open_param *param) 1751 1709 { ··· 1689 1797 stream->periodic = param->period_exponent > 0; 1690 1798 stream->period_exponent = param->period_exponent; 1691 1799 stream->no_preempt = param->no_preempt; 1800 + stream->wait_num_reports = param->wait_num_reports; 1692 1801 1693 1802 stream->xef = xe_file_get(param->xef); 1694 1803 stream->num_syncs = param->num_syncs; ··· 1707 1814 param->oa_buffer_size % stream->oa_buffer.format->size; 1708 1815 else 1709 1816 stream->oa_buffer.circ_size = param->oa_buffer_size; 1710 - 1711 - if (stream->exec_q && engine_supports_mi_query(stream->hwe)) { 1712 - /* If we don't find the context offset, just return error */ 1713 - ret = xe_oa_set_ctx_ctrl_offset(stream); 1714 - if (ret) { 1715 - drm_err(&stream->oa->xe->drm, 1716 - "xe_oa_set_ctx_ctrl_offset failed for %s\n", 1717 - stream->hwe->name); 1718 - goto exit; 1719 - } 1720 - } 1721 1817 1722 1818 stream->oa_config = xe_oa_get_oa_config(stream->oa, param->metric_set); 1723 1819 if (!stream->oa_config) { ··· 1976 2094 if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) 1977 2095 return -ENOENT; 1978 2096 1979 - if (param.exec_q->width > 1) 1980 - drm_dbg(&oa->xe->drm, "exec_q->width > 1, programming only exec_q->lrc[0]\n"); 2097 + if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1)) 2098 + return -EOPNOTSUPP; 1981 2099 } 1982 2100 1983 2101 /* ··· 2037 2155 2038 2156 if (!param.oa_buffer_size) 2039 2157 param.oa_buffer_size = DEFAULT_XE_OA_BUFFER_SIZE; 2158 + 2159 + if (!param.wait_num_reports) 2160 + param.wait_num_reports = 1; 2161 + if (param.wait_num_reports > param.oa_buffer_size / f->size) { 2162 + drm_dbg(&oa->xe->drm, "wait_num_reports %d\n", param.wait_num_reports); 2163 + ret = -EINVAL; 2164 + goto err_exec_q; 2165 + } 2040 2166 2041 2167 ret = xe_oa_parse_syncs(oa, &param); 2042 2168 if (ret)
+3 -3
drivers/gpu/drm/xe/xe_oa_types.h
··· 138 138 /** @metrics_idr: List of dynamic configurations (struct xe_oa_config) */ 139 139 struct idr metrics_idr; 140 140 141 - /** @ctx_oactxctrl_offset: offset of OACTXCONTROL register in context image */ 142 - u32 ctx_oactxctrl_offset[XE_ENGINE_CLASS_MAX]; 143 - 144 141 /** @oa_formats: tracks all OA formats across platforms */ 145 142 const struct xe_oa_format *oa_formats; 146 143 ··· 214 217 215 218 /** @pollin: Whether there is data available to read */ 216 219 bool pollin; 220 + 221 + /** @wait_num_reports: Number of reports to wait for before signalling pollin */ 222 + int wait_num_reports; 217 223 218 224 /** @periodic: Whether periodic sampling is currently enabled */ 219 225 bool periodic;
+3 -1
drivers/gpu/drm/xe/xe_pm.c
··· 7 7 8 8 #include <linux/fault-inject.h> 9 9 #include <linux/pm_runtime.h> 10 + #include <linux/suspend.h> 10 11 11 12 #include <drm/drm_managed.h> 12 13 #include <drm/ttm/ttm_placement.h> ··· 608 607 struct device *dev = xe->drm.dev; 609 608 610 609 return dev->power.runtime_status == RPM_SUSPENDING || 611 - dev->power.runtime_status == RPM_RESUMING; 610 + dev->power.runtime_status == RPM_RESUMING || 611 + pm_suspend_target_state != PM_SUSPEND_ON; 612 612 #else 613 613 return false; 614 614 #endif
+2 -1
drivers/gpu/drm/xe/xe_query.c
··· 672 672 du->oa_unit_type = u->type; 673 673 du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); 674 674 du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS | 675 - DRM_XE_OA_CAPS_OA_BUFFER_SIZE; 675 + DRM_XE_OA_CAPS_OA_BUFFER_SIZE | 676 + DRM_XE_OA_CAPS_WAIT_NUM_REPORTS; 676 677 677 678 j = 0; 678 679 for_each_hw_engine(hwe, gt, hwe_id) {
+4 -1
drivers/gpu/drm/xe/xe_ring_ops.c
··· 221 221 222 222 static u32 get_ppgtt_flag(struct xe_sched_job *job) 223 223 { 224 - return job->q->vm ? BIT(8) : 0; 224 + if (job->q->vm && !job->ggtt) 225 + return BIT(8); 226 + 227 + return 0; 225 228 } 226 229 227 230 static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i)
+2
drivers/gpu/drm/xe/xe_sched_job_types.h
··· 56 56 u32 migrate_flush_flags; 57 57 /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */ 58 58 bool ring_ops_flush_tlb; 59 + /** @ggtt: mapped in ggtt. */ 60 + bool ggtt; 59 61 /** @ptrs: per instance pointers. */ 60 62 struct xe_job_ptrs ptrs[]; 61 63 };
+6 -6
drivers/gpu/drm/xe/xe_trace_bo.h
··· 60 60 TP_STRUCT__entry( 61 61 __field(struct xe_bo *, bo) 62 62 __field(size_t, size) 63 - __field(u32, new_placement) 64 - __field(u32, old_placement) 63 + __string(new_placement_name, xe_mem_type_to_name[new_placement]) 64 + __string(old_placement_name, xe_mem_type_to_name[old_placement]) 65 65 __string(device_id, __dev_name_bo(bo)) 66 66 __field(bool, move_lacks_source) 67 67 ), ··· 69 69 TP_fast_assign( 70 70 __entry->bo = bo; 71 71 __entry->size = bo->size; 72 - __entry->new_placement = new_placement; 73 - __entry->old_placement = old_placement; 72 + __assign_str(new_placement_name); 73 + __assign_str(old_placement_name); 74 74 __assign_str(device_id); 75 75 __entry->move_lacks_source = move_lacks_source; 76 76 ), 77 77 TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s", 78 78 __entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size, 79 - xe_mem_type_to_name[__entry->old_placement], 80 - xe_mem_type_to_name[__entry->new_placement], __get_str(device_id)) 79 + __get_str(old_placement_name), 80 + __get_str(new_placement_name), __get_str(device_id)) 81 81 ); 82 82 83 83 DECLARE_EVENT_CLASS(xe_vma,
+7
include/uapi/drm/xe_drm.h
··· 1487 1487 #define DRM_XE_OA_CAPS_BASE (1 << 0) 1488 1488 #define DRM_XE_OA_CAPS_SYNCS (1 << 1) 1489 1489 #define DRM_XE_OA_CAPS_OA_BUFFER_SIZE (1 << 2) 1490 + #define DRM_XE_OA_CAPS_WAIT_NUM_REPORTS (1 << 3) 1490 1491 1491 1492 /** @oa_timestamp_freq: OA timestamp freq */ 1492 1493 __u64 oa_timestamp_freq; ··· 1661 1660 * buffer is allocated by default. 1662 1661 */ 1663 1662 DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE, 1663 + 1664 + /** 1665 + * @DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS: Number of reports to wait 1666 + * for before unblocking poll or read 1667 + */ 1668 + DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS, 1664 1669 }; 1665 1670 1666 1671 /**