Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'drm-xe-next-2025-10-28' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next

Driver Changes:
More xe3p support (Harish, Brian, Balasubramani, Matt Roper)
Make panic support work on VRAM for display (Maarten)
Fix stolen size check (Shuicheng)
xe_pci_test update (Gustavo)
VF migration updates (Tomasz)
A couple of fixes around allocation and PM references (Matt Brost)
Migration update for the MEM_COPY instruction (Matt Auld)
Initial CRI support (Balasubramani, Matt Roper)
Use SVM range helpers in PT layer (Matt Brost)
Drop MAX_GT_TYPE_CHARS constant (Matt Roper)
Fix spelling and typos (Sanjay)
Fix VF FLR synchronization between all GTs (Michal)
Add a Workaround (Nitin)
Access VF's register using dedicated MMIO view (Michal)

Signed-off-by: Simona Vetter <simona.vetter@ffwll.ch>
From: Thomas Hellstrom <thomas.hellstrom@linux.intel.com>
Link: https://patch.msgid.link/aQCl9uJxN6CWJ8Vg@fedora

+534 -216
+36 -14
drivers/gpu/drm/xe/display/xe_panic.c
··· 8 8 #include "intel_fb.h" 9 9 #include "intel_panic.h" 10 10 #include "xe_bo.h" 11 + #include "xe_res_cursor.h" 11 12 12 13 struct intel_panic { 13 - struct page **pages; 14 + struct xe_res_cursor res; 15 + struct iosys_map vmap; 16 + 14 17 int page; 15 - void *vaddr; 16 18 }; 17 19 18 20 static void xe_panic_kunmap(struct intel_panic *panic) 19 21 { 20 - if (panic->vaddr) { 21 - drm_clflush_virt_range(panic->vaddr, PAGE_SIZE); 22 - kunmap_local(panic->vaddr); 23 - panic->vaddr = NULL; 22 + if (!panic->vmap.is_iomem && iosys_map_is_set(&panic->vmap)) { 23 + drm_clflush_virt_range(panic->vmap.vaddr, PAGE_SIZE); 24 + kunmap_local(panic->vmap.vaddr); 24 25 } 26 + iosys_map_clear(&panic->vmap); 27 + panic->page = -1; 25 28 } 26 29 27 30 /* ··· 49 46 new_page = offset >> PAGE_SHIFT; 50 47 offset = offset % PAGE_SIZE; 51 48 if (new_page != panic->page) { 52 - xe_panic_kunmap(panic); 49 + if (xe_bo_is_vram(bo)) { 50 + /* Display is always mapped on root tile */ 51 + struct xe_vram_region *vram = xe_bo_device(bo)->mem.vram; 52 + 53 + if (panic->page < 0 || new_page < panic->page) { 54 + xe_res_first(bo->ttm.resource, new_page * PAGE_SIZE, 55 + bo->ttm.base.size - new_page * PAGE_SIZE, &panic->res); 56 + } else { 57 + xe_res_next(&panic->res, PAGE_SIZE * (new_page - panic->page)); 58 + } 59 + iosys_map_set_vaddr_iomem(&panic->vmap, 60 + vram->mapping + panic->res.start); 61 + } else { 62 + xe_panic_kunmap(panic); 63 + iosys_map_set_vaddr(&panic->vmap, 64 + ttm_bo_kmap_try_from_panic(&bo->ttm, 65 + new_page)); 66 + } 53 67 panic->page = new_page; 54 - panic->vaddr = ttm_bo_kmap_try_from_panic(&bo->ttm, 55 - panic->page); 56 68 } 57 - if (panic->vaddr) { 58 - u32 *pix = panic->vaddr + offset; 59 - *pix = color; 60 - } 69 + 70 + if (iosys_map_is_set(&panic->vmap)) 71 + iosys_map_wr(&panic->vmap, offset, u32, color); 61 72 } 62 73 63 74 struct intel_panic *intel_panic_alloc(void) ··· 85 68 86 69 int intel_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb) 87 70 { 71 + struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; 72 + struct xe_bo *bo = gem_to_xe_bo(intel_fb_bo(&fb->base)); 73 + 74 + if (xe_bo_is_vram(bo) && !xe_bo_is_visible_vram(bo)) 75 + return -ENODEV; 76 + 88 77 panic->page = -1; 89 78 sb->set_pixel = xe_panic_page_set_pixel; 90 79 return 0; ··· 99 76 void intel_panic_finish(struct intel_panic *panic) 100 77 { 101 78 xe_panic_kunmap(panic); 102 - panic->page = -1; 103 79 }
+6
drivers/gpu/drm/xe/instructions/xe_gpu_commands.h
··· 31 31 #define XY_FAST_COPY_BLT_D1_DST_TILE4 REG_BIT(30) 32 32 #define XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK GENMASK(23, 20) 33 33 34 + #define MEM_COPY_CMD (2 << 29 | 0x5a << 22 | 0x8) 35 + #define MEM_COPY_PAGE_COPY_MODE REG_BIT(19) 36 + #define MEM_COPY_MATRIX_COPY REG_BIT(17) 37 + #define MEM_COPY_SRC_MOCS_INDEX_MASK GENMASK(31, 28) 38 + #define MEM_COPY_DST_MOCS_INDEX_MASK GENMASK(6, 3) 39 + 34 40 #define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22) 35 41 #define PVC_MEM_SET_CMD_LEN_DW 7 36 42 #define PVC_MEM_SET_MATRIX REG_BIT(17)
+10
drivers/gpu/drm/xe/regs/xe_gt_regs.h
··· 37 37 #define GMD_ID XE_REG(0xd8c) 38 38 #define GMD_ID_ARCH_MASK REG_GENMASK(31, 22) 39 39 #define GMD_ID_RELEASE_MASK REG_GENMASK(21, 14) 40 + /* 41 + * Spec defines these bits as "Reserved", but then make them assume some 42 + * meaning that depends on the ARCH. To avoid any confusion, call them 43 + * SUBIP_FLAG_MASK. 44 + */ 45 + #define GMD_ID_SUBIP_FLAG_MASK REG_GENMASK(13, 6) 40 46 #define GMD_ID_REVID REG_GENMASK(5, 0) 41 47 42 48 #define FORCEWAKE_ACK_GSC XE_REG(0xdf8) ··· 174 168 175 169 #define XEHP_SLICE_COMMON_ECO_CHICKEN1 XE_REG_MCR(0x731c, XE_REG_OPTION_MASKED) 176 170 #define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) 171 + #define FAST_CLEAR_VALIGN_FIX REG_BIT(13) 177 172 178 173 #define XE2LPM_CCCHKNREG1 XE_REG(0x82a8) 179 174 ··· 550 543 551 544 #define SARB_CHICKEN1 XE_REG_MCR(0xe90c) 552 545 #define COMP_CKN_IN REG_GENMASK(30, 29) 546 + 547 + #define MAIN_GAMCTRL_MODE XE_REG(0xef00) 548 + #define MAIN_GAMCTRL_QUEUE_SELECT REG_BIT(0) 553 549 554 550 #define RCU_MODE XE_REG(0x14800, XE_REG_OPTION_MASKED) 555 551 #define RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1)
+11 -5
drivers/gpu/drm/xe/tests/xe_pci_test.c
··· 44 44 KUNIT_ASSERT_EQ(test, mask, 0); 45 45 } 46 46 47 - static void check_platform_gt_count(struct kunit *test) 47 + static void check_platform_desc(struct kunit *test) 48 48 { 49 49 const struct pci_device_id *pci = test->param_value; 50 50 const struct xe_device_desc *desc = 51 51 (const struct xe_device_desc *)pci->driver_data; 52 - int max_gt = desc->max_gt_per_tile; 53 52 54 - KUNIT_ASSERT_GT(test, max_gt, 0); 55 - KUNIT_ASSERT_LE(test, max_gt, XE_MAX_GT_PER_TILE); 53 + KUNIT_EXPECT_GT(test, desc->dma_mask_size, 0); 54 + 55 + KUNIT_EXPECT_GT(test, (unsigned int)desc->max_gt_per_tile, 0); 56 + KUNIT_EXPECT_LE(test, (unsigned int)desc->max_gt_per_tile, XE_MAX_GT_PER_TILE); 57 + 58 + KUNIT_EXPECT_GT(test, desc->va_bits, 0); 59 + KUNIT_EXPECT_LE(test, desc->va_bits, 64); 60 + 61 + KUNIT_EXPECT_GT(test, desc->vm_max_level, 0); 56 62 } 57 63 58 64 static struct kunit_case xe_pci_tests[] = { 59 65 KUNIT_CASE_PARAM(check_graphics_ip, xe_pci_graphics_ip_gen_param), 60 66 KUNIT_CASE_PARAM(check_media_ip, xe_pci_media_ip_gen_param), 61 - KUNIT_CASE_PARAM(check_platform_gt_count, xe_pci_id_gen_param), 67 + KUNIT_CASE_PARAM(check_platform_desc, xe_pci_id_gen_param), 62 68 {} 63 69 }; 64 70
+26 -3
drivers/gpu/drm/xe/xe_bo.c
··· 610 610 return vres->used_visible_size == mem->size; 611 611 } 612 612 613 + /** 614 + * xe_bo_is_visible_vram - check if BO is placed entirely in visible VRAM. 615 + * @bo: The BO 616 + * 617 + * This function checks whether a given BO resides entirely in memory visible from the CPU 618 + * 619 + * Returns: true if the BO is entirely visible, false otherwise. 620 + * 621 + */ 622 + bool xe_bo_is_visible_vram(struct xe_bo *bo) 623 + { 624 + if (drm_WARN_ON(bo->ttm.base.dev, !xe_bo_is_vram(bo))) 625 + return false; 626 + 627 + return xe_ttm_resource_visible(bo->ttm.resource); 628 + } 629 + 613 630 static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, 614 631 struct ttm_resource *mem) 615 632 { ··· 1652 1635 if (!mem_type_is_vram(ttm_bo->resource->mem_type)) 1653 1636 return -EIO; 1654 1637 1655 - if (!xe_ttm_resource_visible(ttm_bo->resource) || len >= SZ_16K) { 1638 + if (!xe_bo_is_visible_vram(bo) || len >= SZ_16K) { 1656 1639 struct xe_migrate *migrate = 1657 1640 mem_type_to_migrate(xe, ttm_bo->resource->mem_type); 1658 1641 ··· 2122 2105 * if the function should allocate a new one. 2123 2106 * @tile: The tile to select for migration of this bo, and the tile used for 2124 2107 * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. 2125 - * @resv: Pointer to a locked shared reservation object to use fo this bo, 2108 + * @resv: Pointer to a locked shared reservation object to use for this bo, 2126 2109 * or NULL for the xe_bo to use its own. 2127 2110 * @bulk: The bulk move to use for LRU bumping, or NULL for external bos. 2128 2111 * @size: The storage size to use for the bo. ··· 2275 2258 { 2276 2259 struct ttm_place *place = bo->placements; 2277 2260 u32 vram_flag, vram_stolen_flags; 2261 + 2262 + /* 2263 + * to allow fixed placement in GGTT of a VF, post-migration fixups would have to 2264 + * include selecting a new fixed offset and shifting the page ranges for it 2265 + */ 2266 + xe_assert(xe, !IS_SRIOV_VF(xe) || !(bo->flags & XE_BO_FLAG_GGTT)); 2278 2267 2279 2268 if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM)) 2280 2269 return -EINVAL; ··· 2652 2629 * @size: The storage size to use for the bo. 2653 2630 * @type: The TTM buffer object type. 2654 2631 * @flags: XE_BO_FLAG_ flags. 2655 - * @intr: Whether to execut any waits for backing store interruptible. 2632 + * @intr: Whether to execute any waits for backing store interruptible. 2656 2633 * 2657 2634 * Create a pinned and mapped bo. The bo will be external and not associated 2658 2635 * with a VM.
+1
drivers/gpu/drm/xe/xe_bo.h
··· 274 274 275 275 bool mem_type_is_vram(u32 mem_type); 276 276 bool xe_bo_is_vram(struct xe_bo *bo); 277 + bool xe_bo_is_visible_vram(struct xe_bo *bo); 277 278 bool xe_bo_is_stolen(struct xe_bo *bo); 278 279 bool xe_bo_is_stolen_devmem(struct xe_bo *bo); 279 280 bool xe_bo_is_vm_bound(struct xe_bo *bo);
+4 -4
drivers/gpu/drm/xe/xe_bo_doc.h
··· 12 12 * BO management 13 13 * ============= 14 14 * 15 - * TTM manages (placement, eviction, etc...) all BOs in XE. 15 + * TTM manages (placement, eviction, etc...) all BOs in Xe. 16 16 * 17 17 * BO creation 18 18 * =========== ··· 29 29 * a kernel BO (e.g. engine state, memory for page tables, etc...). These BOs 30 30 * are typically mapped in the GGTT (any kernel BOs aside memory for page tables 31 31 * are in the GGTT), are pinned (can't move or be evicted at runtime), have a 32 - * vmap (XE can access the memory via xe_map layer) and have contiguous physical 32 + * vmap (Xe can access the memory via xe_map layer) and have contiguous physical 33 33 * memory. 34 34 * 35 35 * More details of why kernel BOs are pinned and contiguous below. ··· 40 40 * A user BO is created via the DRM_IOCTL_XE_GEM_CREATE IOCTL. Once it is 41 41 * created the BO can be mmap'd (via DRM_IOCTL_XE_GEM_MMAP_OFFSET) for user 42 42 * access and it can be bound for GPU access (via DRM_IOCTL_XE_VM_BIND). All 43 - * user BOs are evictable and user BOs are never pinned by XE. The allocation of 43 + * user BOs are evictable and user BOs are never pinned by Xe. The allocation of 44 44 * the backing store can be deferred from creation time until first use which is 45 45 * either mmap, bind, or pagefault. 46 46 * ··· 84 84 * ==================== 85 85 * 86 86 * All eviction (or in other words, moving a BO from one memory location to 87 - * another) is routed through TTM with a callback into XE. 87 + * another) is routed through TTM with a callback into Xe. 88 88 * 89 89 * Runtime eviction 90 90 * ----------------
+2 -3
drivers/gpu/drm/xe/xe_configfs.c
··· 27 27 * Overview 28 28 * ======== 29 29 * 30 - * Configfs is a filesystem-based manager of kernel objects. XE KMD registers a 30 + * Configfs is a filesystem-based manager of kernel objects. Xe KMD registers a 31 31 * configfs subsystem called ``xe`` that creates a directory in the mounted 32 32 * configfs directory. The user can create devices under this directory and 33 33 * configure them as necessary. See Documentation/filesystems/configfs.rst for ··· 301 301 /* Some helpful macros to aid on the sizing of buffer allocation when parsing */ 302 302 #define MAX_ENGINE_CLASS_CHARS 5 303 303 #define MAX_ENGINE_INSTANCE_CHARS 2 304 - #define MAX_GT_TYPE_CHARS 7 305 304 306 305 static const struct engine_info engine_info[] = { 307 306 { .cls = "rcs", .mask = XE_HW_ENGINE_RCS_MASK, .engine_class = XE_ENGINE_CLASS_RENDER }, ··· 312 313 }; 313 314 314 315 static const struct { 315 - const char name[MAX_GT_TYPE_CHARS + 1]; 316 + const char *name; 316 317 enum xe_gt_type type; 317 318 } gt_types[] = { 318 319 { .name = "primary", .type = XE_GT_TYPE_MAIN },
+1 -1
drivers/gpu/drm/xe/xe_device.c
··· 1217 1217 * 1218 1218 * /sys/bus/pci/devices/<device>/survivability_mode 1219 1219 * 1220 - * - Admin/userpsace consumer can use firmware flashing tools like fwupd to flash 1220 + * - Admin/userspace consumer can use firmware flashing tools like fwupd to flash 1221 1221 * firmware and restore device to normal operation. 1222 1222 */ 1223 1223
+6 -4
drivers/gpu/drm/xe/xe_device_types.h
··· 222 222 }; 223 223 224 224 /** 225 - * struct xe_device - Top level struct of XE device 225 + * struct xe_device - Top level struct of Xe device 226 226 */ 227 227 struct xe_device { 228 228 /** @drm: drm device */ ··· 245 245 u32 media_verx100; 246 246 /** @info.mem_region_mask: mask of valid memory regions */ 247 247 u32 mem_region_mask; 248 - /** @info.platform: XE platform enum */ 248 + /** @info.platform: Xe platform enum */ 249 249 enum xe_platform platform; 250 - /** @info.subplatform: XE subplatform enum */ 250 + /** @info.subplatform: Xe subplatform enum */ 251 251 enum xe_subplatform subplatform; 252 252 /** @info.devid: device ID */ 253 253 u16 devid; ··· 300 300 * pcode mailbox commands. 301 301 */ 302 302 u8 has_mbx_power_limits:1; 303 + /** @info.has_mem_copy_instr: Device supports MEM_COPY instruction */ 304 + u8 has_mem_copy_instr:1; 303 305 /** @info.has_pxp: Device has PXP support */ 304 306 u8 has_pxp:1; 305 307 /** @info.has_range_tlb_inval: Has range based TLB invalidations */ ··· 661 659 }; 662 660 663 661 /** 664 - * struct xe_file - file handle for XE driver 662 + * struct xe_file - file handle for Xe driver 665 663 */ 666 664 struct xe_file { 667 665 /** @xe: xe DEVICE **/
+1 -1
drivers/gpu/drm/xe/xe_exec.c
··· 33 33 * - Binding at exec time 34 34 * - Flow controlling the ring at exec time 35 35 * 36 - * In XE we avoid all of this complication by not allowing a BO list to be 36 + * In Xe we avoid all of this complication by not allowing a BO list to be 37 37 * passed into an exec, using the dma-buf implicit sync uAPI, have binds as 38 38 * separate operations, and using the DRM scheduler to flow control the ring. 39 39 * Let's deep dive on each of these.
+2 -2
drivers/gpu/drm/xe/xe_force_wake_types.h
··· 52 52 }; 53 53 54 54 /** 55 - * struct xe_force_wake_domain - XE force wake domains 55 + * struct xe_force_wake_domain - Xe force wake domains 56 56 */ 57 57 struct xe_force_wake_domain { 58 58 /** @id: domain force wake id */ ··· 70 70 }; 71 71 72 72 /** 73 - * struct xe_force_wake - XE force wake 73 + * struct xe_force_wake - Xe force wake 74 74 */ 75 75 struct xe_force_wake { 76 76 /** @gt: back pointers to GT */
+3
drivers/gpu/drm/xe/xe_ggtt.c
··· 312 312 ggtt->pt_ops = &xelp_pt_ops; 313 313 314 314 ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, WQ_MEM_RECLAIM); 315 + if (!ggtt->wq) 316 + return -ENOMEM; 317 + 315 318 __xe_ggtt_init_early(ggtt, xe_wopcm_size(xe)); 316 319 317 320 err = drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt);
+12 -7
drivers/gpu/drm/xe/xe_gt.c
··· 818 818 unsigned int fw_ref; 819 819 int err; 820 820 821 - if (xe_device_wedged(gt_to_xe(gt))) 822 - return -ECANCELED; 821 + if (xe_device_wedged(gt_to_xe(gt))) { 822 + err = -ECANCELED; 823 + goto err_pm_put; 824 + } 823 825 824 826 /* We only support GT resets with GuC submission */ 825 - if (!xe_device_uc_enabled(gt_to_xe(gt))) 826 - return -ENODEV; 827 + if (!xe_device_uc_enabled(gt_to_xe(gt))) { 828 + err = -ENODEV; 829 + goto err_pm_put; 830 + } 827 831 828 832 xe_gt_info(gt, "reset started\n"); 829 - 830 - xe_pm_runtime_get(gt_to_xe(gt)); 831 833 832 834 if (xe_fault_inject_gt_reset()) { 833 835 err = -ECANCELED; ··· 877 875 xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); 878 876 879 877 xe_device_declare_wedged(gt_to_xe(gt)); 878 + err_pm_put: 880 879 xe_pm_runtime_put(gt_to_xe(gt)); 881 880 882 881 return err; ··· 899 896 return; 900 897 901 898 xe_gt_info(gt, "reset queued\n"); 902 - queue_work(gt->ordered_wq, &gt->reset.worker); 899 + xe_pm_runtime_get_noresume(gt_to_xe(gt)); 900 + if (!queue_work(gt->ordered_wq, &gt->reset.worker)) 901 + xe_pm_runtime_put(gt_to_xe(gt)); 903 902 } 904 903 905 904 void xe_gt_suspend_prepare(struct xe_gt *gt)
+1 -1
drivers/gpu/drm/xe/xe_gt_freq.c
··· 36 36 * - act_freq: The actual resolved frequency decided by PCODE. 37 37 * - cur_freq: The current one requested by GuC PC to the PCODE. 38 38 * - rpn_freq: The Render Performance (RP) N level, which is the minimal one. 39 - * - rpa_freq: The Render Performance (RP) A level, which is the achiveable one. 39 + * - rpa_freq: The Render Performance (RP) A level, which is the achievable one. 40 40 * Calculated by PCODE at runtime based on multiple running conditions 41 41 * - rpe_freq: The Render Performance (RP) E level, which is the efficient one. 42 42 * Calculated by PCODE at runtime based on multiple running conditions
+22 -13
drivers/gpu/drm/xe/xe_gt_mcr.c
··· 268 268 {}, 269 269 }; 270 270 271 - static const struct xe_mmio_range xe3p_xpc_psmi_grp19_steering_table[] = { 272 - { 0x00B500, 0x00B5FF }, 271 + static const struct xe_mmio_range xe3p_xpc_node_steering_table[] = { 272 + { 0x00B000, 0x00B0FF }, 273 + { 0x00D880, 0x00D8FF }, 273 274 {}, 274 275 }; 275 276 276 277 static const struct xe_mmio_range xe3p_xpc_instance0_steering_table[] = { 277 - { 0x00B600, 0x00B6FF }, /* PSMI0 */ 278 + { 0x00B500, 0x00B6FF }, /* PSMI */ 278 279 { 0x00C800, 0x00CFFF }, /* GAMCTRL */ 279 280 { 0x00F000, 0x00F0FF }, /* GAMCTRL */ 280 281 {}, ··· 283 282 284 283 static void init_steering_l3bank(struct xe_gt *gt) 285 284 { 285 + struct xe_device *xe = gt_to_xe(gt); 286 286 struct xe_mmio *mmio = &gt->mmio; 287 287 288 - if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { 288 + if (GRAPHICS_VER(xe) >= 35) { 289 + unsigned int first_bank = xe_l3_bank_mask_ffs(gt->fuse_topo.l3_bank_mask); 290 + const int banks_per_node = 4; 291 + unsigned int node = first_bank / banks_per_node; 292 + 293 + /* L3BANK ranges place node in grpID, bank in instanceid */ 294 + gt->steering[L3BANK].group_target = node; 295 + gt->steering[L3BANK].instance_target = first_bank % banks_per_node; 296 + 297 + /* NODE ranges split the node across grpid and instanceid */ 298 + gt->steering[NODE].group_target = node >> 1; 299 + gt->steering[NODE].instance_target = node & 1; 300 + } else if (GRAPHICS_VERx100(xe) >= 1270) { 289 301 u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK, 290 302 xe_mmio_read32(mmio, MIRROR_FUSE3)); 291 303 u32 bank_mask = REG_FIELD_GET(GT_L3_EXC_MASK, ··· 311 297 gt->steering[L3BANK].group_target = __ffs(mslice_mask); 312 298 gt->steering[L3BANK].instance_target = 313 299 bank_mask & BIT(0) ? 0 : 2; 314 - } else if (gt_to_xe(gt)->info.platform == XE_DG2) { 300 + } else if (xe->info.platform == XE_DG2) { 315 301 u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK, 316 302 xe_mmio_read32(mmio, MIRROR_FUSE3)); 317 303 u32 bank = __ffs(mslice_mask) * 8; ··· 466 452 gt->steering[SQIDI_PSMI].instance_target = select & 0x1; 467 453 } 468 454 469 - static void init_steering_psmi(struct xe_gt *gt) 470 - { 471 - gt->steering[PSMI19].group_target = 19; 472 - gt->steering[PSMI19].instance_target = 0; 473 - } 474 - 475 455 static void init_steering_gam1(struct xe_gt *gt) 476 456 { 477 457 gt->steering[GAM1].group_target = 1; ··· 477 469 void (*init)(struct xe_gt *gt); 478 470 } xe_steering_types[] = { 479 471 [L3BANK] = { "L3BANK", init_steering_l3bank }, 472 + [NODE] = { "NODE", NULL }, /* initialized by l3bank init */ 480 473 [MSLICE] = { "MSLICE", init_steering_mslice }, 481 474 [LNCF] = { "LNCF", NULL }, /* initialized by mslice init */ 482 475 [DSS] = { "DSS / XeCore", init_steering_dss }, 483 476 [OADDRM] = { "OADDRM / GPMXMT", init_steering_oaddrm }, 484 477 [SQIDI_PSMI] = { "SQIDI_PSMI", init_steering_sqidi_psmi }, 485 - [PSMI19] = { "PSMI[19]", init_steering_psmi }, 486 478 [GAM1] = { "GAMWKRS / STLB / GAMREQSTRM", init_steering_gam1 }, 487 479 [INSTANCE0] = { "INSTANCE 0", NULL }, 488 480 [IMPLICIT_STEERING] = { "IMPLICIT", NULL }, ··· 532 524 gt->steering[DSS].ranges = xe3p_xpc_xecore_steering_table; 533 525 gt->steering[GAM1].ranges = xe3p_xpc_gam_grp1_steering_table; 534 526 gt->steering[INSTANCE0].ranges = xe3p_xpc_instance0_steering_table; 535 - gt->steering[PSMI19].ranges = xe3p_xpc_psmi_grp19_steering_table; 527 + gt->steering[L3BANK].ranges = xelpg_l3bank_steering_table; 528 + gt->steering[NODE].ranges = xe3p_xpc_node_steering_table; 536 529 } else if (GRAPHICS_VER(xe) >= 20) { 537 530 gt->steering[DSS].ranges = xe2lpg_dss_steering_table; 538 531 gt->steering[SQIDI_PSMI].ranges = xe2lpg_sqidi_psmi_steering_table;
+8 -28
drivers/gpu/drm/xe/xe_gt_sriov_pf.c
··· 158 158 xe_gt_sriov_pf_service_update(gt); 159 159 } 160 160 161 - static u32 pf_get_vf_regs_stride(struct xe_device *xe) 162 - { 163 - return GRAPHICS_VERx100(xe) > 1200 ? 0x400 : 0x1000; 164 - } 165 - 166 - static struct xe_reg xe_reg_vf_to_pf(struct xe_reg vf_reg, unsigned int vfid, u32 stride) 167 - { 168 - struct xe_reg pf_reg = vf_reg; 169 - 170 - pf_reg.vf = 0; 171 - pf_reg.addr += stride * vfid; 172 - 173 - return pf_reg; 174 - } 175 - 176 161 static void pf_clear_vf_scratch_regs(struct xe_gt *gt, unsigned int vfid) 177 162 { 178 - u32 stride = pf_get_vf_regs_stride(gt_to_xe(gt)); 179 - struct xe_reg scratch; 180 - int n, count; 163 + struct xe_mmio mmio; 164 + int n; 165 + 166 + xe_mmio_init_vf_view(&mmio, &gt->mmio, vfid); 181 167 182 168 if (xe_gt_is_media_type(gt)) { 183 - count = MED_VF_SW_FLAG_COUNT; 184 - for (n = 0; n < count; n++) { 185 - scratch = xe_reg_vf_to_pf(MED_VF_SW_FLAG(n), vfid, stride); 186 - xe_mmio_write32(&gt->mmio, scratch, 0); 187 - } 169 + for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++) 170 + xe_mmio_write32(&mmio, MED_VF_SW_FLAG(n), 0); 188 171 } else { 189 - count = VF_SW_FLAG_COUNT; 190 - for (n = 0; n < count; n++) { 191 - scratch = xe_reg_vf_to_pf(VF_SW_FLAG(n), vfid, stride); 192 - xe_mmio_write32(&gt->mmio, scratch, 0); 193 - } 172 + for (n = 0; n < VF_SW_FLAG_COUNT; n++) 173 + xe_mmio_write32(&mmio, VF_SW_FLAG(n), 0); 194 174 } 195 175 } 196 176
+2
drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
··· 997 997 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE); 998 998 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC); 999 999 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START); 1000 + 1001 + xe_sriov_pf_control_sync_flr(gt_to_xe(gt), vfid); 1000 1002 } 1001 1003 } 1002 1004
+1 -6
drivers/gpu/drm/xe/xe_gt_sriov_vf.c
··· 31 31 #include "xe_lrc.h" 32 32 #include "xe_memirq.h" 33 33 #include "xe_mmio.h" 34 - #include "xe_pm.h" 35 34 #include "xe_sriov.h" 36 35 #include "xe_sriov_vf.h" 37 36 #include "xe_sriov_vf_ccs.h" ··· 738 739 gt->sriov.vf.migration.recovery_queued = true; 739 740 WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, true); 740 741 WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, true); 741 - smp_wmb(); /* Ensure above writes visable before wake */ 742 + smp_wmb(); /* Ensure above writes visible before wake */ 742 743 743 744 xe_guc_ct_wake_waiters(&gt->uc.guc.ct); 744 745 ··· 1217 1218 1218 1219 xe_gt_sriov_dbg(gt, "migration recovery in progress\n"); 1219 1220 1220 - xe_pm_runtime_get(xe); 1221 1221 retry = vf_post_migration_shutdown(gt); 1222 1222 if (retry) 1223 1223 goto queue; ··· 1239 1241 1240 1242 vf_post_migration_kickstart(gt); 1241 1243 1242 - xe_pm_runtime_put(xe); 1243 1244 xe_gt_sriov_notice(gt, "migration recovery ended\n"); 1244 1245 return; 1245 1246 fail: 1246 1247 vf_post_migration_abort(gt); 1247 - xe_pm_runtime_put(xe); 1248 1248 xe_gt_sriov_err(gt, "migration recovery failed (%pe)\n", ERR_PTR(err)); 1249 1249 xe_device_declare_wedged(xe); 1250 1250 return; ··· 1250 1254 queue: 1251 1255 xe_gt_sriov_info(gt, "Re-queuing migration recovery\n"); 1252 1256 queue_work(gt->ordered_wq, &gt->sriov.vf.migration.worker); 1253 - xe_pm_runtime_put(xe); 1254 1257 } 1255 1258 1256 1259 static void migration_worker_func(struct work_struct *w)
+7
drivers/gpu/drm/xe/xe_gt_topology.c
··· 309 309 return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize); 310 310 } 311 311 312 + /* Used to obtain the index of the first L3 bank. */ 313 + unsigned int 314 + xe_l3_bank_mask_ffs(const xe_l3_bank_mask_t mask) 315 + { 316 + return find_first_bit(mask, XE_MAX_L3_BANK_MASK_BITS); 317 + } 318 + 312 319 /** 313 320 * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant 314 321 * @gt: GT to check
+2
drivers/gpu/drm/xe/xe_gt_topology.h
··· 40 40 41 41 unsigned int 42 42 xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum); 43 + unsigned int 44 + xe_l3_bank_mask_ffs(const xe_l3_bank_mask_t mask); 43 45 44 46 bool 45 47 xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad);
+1 -8
drivers/gpu/drm/xe/xe_gt_types.h
··· 66 66 */ 67 67 enum xe_steering_type { 68 68 L3BANK, 69 + NODE, 69 70 MSLICE, 70 71 LNCF, 71 72 DSS, 72 73 OADDRM, 73 74 SQIDI_PSMI, 74 - 75 - /* 76 - * The bspec lists multiple ranges as "PSMI," but the different 77 - * ranges with that label have different grpid steering values so we 78 - * treat them independently in code. Note that the ranges with grpid=0 79 - * are included in the INSTANCE0 group above. 80 - */ 81 - PSMI19, 82 75 83 76 /* 84 77 * Although most GAM ranges must be steered to (0,0) and thus use the
+46
drivers/gpu/drm/xe/xe_guc.c
··· 91 91 if (xe_configfs_get_psmi_enabled(to_pci_dev(xe->drm.dev))) 92 92 flags |= GUC_CTL_ENABLE_PSMI_LOGGING; 93 93 94 + if (xe_guc_using_main_gamctrl_queues(guc)) 95 + flags |= GUC_CTL_MAIN_GAMCTRL_QUEUES; 96 + 94 97 return flags; 95 98 } 96 99 ··· 1258 1255 1259 1256 int xe_guc_upload(struct xe_guc *guc) 1260 1257 { 1258 + struct xe_gt *gt = guc_to_gt(guc); 1259 + 1261 1260 xe_guc_ads_populate(&guc->ads); 1261 + 1262 + if (xe_guc_using_main_gamctrl_queues(guc)) 1263 + xe_mmio_write32(&gt->mmio, MAIN_GAMCTRL_MODE, MAIN_GAMCTRL_QUEUE_SELECT); 1262 1264 1263 1265 return __xe_guc_upload(guc); 1264 1266 } ··· 1663 1655 xe_guc_reset_prepare(guc); 1664 1656 xe_guc_ct_stop(&guc->ct); 1665 1657 xe_guc_submit_wedge(guc); 1658 + } 1659 + 1660 + /** 1661 + * xe_guc_using_main_gamctrl_queues() - Detect which reporting queues to use. 1662 + * @guc: The GuC object 1663 + * 1664 + * For Xe3p and beyond, we want to program the hardware to use the 1665 + * "Main GAMCTRL queue" rather than the legacy queue before we upload 1666 + * the GuC firmware. This will allow the GuC to use a new set of 1667 + * registers for pagefault handling and avoid some unnecessary 1668 + * complications with MCR register range handling. 1669 + * 1670 + * Return: true if can use new main gamctrl queues. 1671 + */ 1672 + bool xe_guc_using_main_gamctrl_queues(struct xe_guc *guc) 1673 + { 1674 + struct xe_gt *gt = guc_to_gt(guc); 1675 + 1676 + /* 1677 + * For Xe3p media gt (35), the GuC and the CS subunits may be still Xe3 1678 + * that lacks the Main GAMCTRL support. Reserved bits from the GMD_ID 1679 + * inform the IP version of the subunits. 1680 + */ 1681 + if (xe_gt_is_media_type(gt) && MEDIA_VER(gt_to_xe(gt)) == 35) { 1682 + u32 val = xe_mmio_read32(&gt->mmio, GMD_ID); 1683 + u32 subip = REG_FIELD_GET(GMD_ID_SUBIP_FLAG_MASK, val); 1684 + 1685 + if (!subip) 1686 + return true; 1687 + 1688 + xe_gt_WARN(gt, subip != 1, 1689 + "GMD_ID has unknown value in the SUBIP_FLAG field - 0x%x\n", 1690 + subip); 1691 + 1692 + return false; 1693 + } 1694 + 1695 + return GT_VER(gt) >= 35; 1666 1696 } 1667 1697 1668 1698 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+1
drivers/gpu/drm/xe/xe_guc.h
··· 52 52 void xe_guc_stop(struct xe_guc *guc); 53 53 int xe_guc_start(struct xe_guc *guc); 54 54 void xe_guc_declare_wedged(struct xe_guc *guc); 55 + bool xe_guc_using_main_gamctrl_queues(struct xe_guc *guc); 55 56 56 57 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) 57 58 int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *payload, u32 len);
+5 -1
drivers/gpu/drm/xe/xe_guc_ads.c
··· 820 820 static void guc_um_init_params(struct xe_guc_ads *ads) 821 821 { 822 822 u32 um_queue_offset = guc_ads_um_queues_offset(ads); 823 + struct xe_guc *guc = ads_to_guc(ads); 823 824 u64 base_dpa; 824 825 u32 base_ggtt; 826 + bool with_dpa; 825 827 int i; 828 + 829 + with_dpa = !xe_guc_using_main_gamctrl_queues(guc); 826 830 827 831 base_ggtt = xe_bo_ggtt_addr(ads->bo) + um_queue_offset; 828 832 base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset; 829 833 830 834 for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) { 831 835 ads_blob_write(ads, um_init_params.queue_params[i].base_dpa, 832 - base_dpa + (i * GUC_UM_QUEUE_SIZE)); 836 + with_dpa ? (base_dpa + (i * GUC_UM_QUEUE_SIZE)) : 0); 833 837 ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address, 834 838 base_ggtt + (i * GUC_UM_QUEUE_SIZE)); 835 839 ads_blob_write(ads, um_init_params.queue_params[i].size_in_bytes,
+1 -1
drivers/gpu/drm/xe/xe_guc_ads_types.h
··· 14 14 * struct xe_guc_ads - GuC additional data structures (ADS) 15 15 */ 16 16 struct xe_guc_ads { 17 - /** @bo: XE BO for GuC ads blob */ 17 + /** @bo: Xe BO for GuC ads blob */ 18 18 struct xe_bo *bo; 19 19 /** @golden_lrc_size: golden LRC size */ 20 20 size_t golden_lrc_size;
+1 -1
drivers/gpu/drm/xe/xe_guc_ct_types.h
··· 126 126 * for the H2G and G2H requests sent and received through the buffers. 127 127 */ 128 128 struct xe_guc_ct { 129 - /** @bo: XE BO for CT */ 129 + /** @bo: Xe BO for CT */ 130 130 struct xe_bo *bo; 131 131 /** @lock: protects everything in CT layer */ 132 132 struct mutex lock;
+1
drivers/gpu/drm/xe/xe_guc_fwif.h
··· 113 113 #define GUC_CTL_ENABLE_SLPC BIT(2) 114 114 #define GUC_CTL_ENABLE_LITE_RESTORE BIT(4) 115 115 #define GUC_CTL_ENABLE_PSMI_LOGGING BIT(7) 116 + #define GUC_CTL_MAIN_GAMCTRL_QUEUES BIT(9) 116 117 #define GUC_CTL_DISABLE_SCHEDULER BIT(14) 117 118 118 119 #define GUC_CTL_DEBUG 3
+1 -1
drivers/gpu/drm/xe/xe_guc_log_types.h
··· 44 44 struct xe_guc_log { 45 45 /** @level: GuC log level */ 46 46 u32 level; 47 - /** @bo: XE BO for GuC log */ 47 + /** @bo: Xe BO for GuC log */ 48 48 struct xe_bo *bo; 49 49 /** @stats: logging related stats */ 50 50 struct {
+1 -1
drivers/gpu/drm/xe/xe_guc_submit.c
··· 1920 1920 } 1921 1921 1922 1922 /* 1923 - * All of these functions are an abstraction layer which other parts of XE can 1923 + * All of these functions are an abstraction layer which other parts of Xe can 1924 1924 * use to trap into the GuC backend. All of these functions, aside from init, 1925 1925 * really shouldn't do much other than trap into the DRM scheduler which 1926 1926 * synchronizes these operations.
+1 -1
drivers/gpu/drm/xe/xe_guc_tlb_inval.c
··· 207 207 * @guc: GuC object 208 208 * @tlb_inval: TLB invalidation client 209 209 * 210 - * Inititialize GuC TLB invalidation by setting back pointer in TLB invalidation 210 + * Initialize GuC TLB invalidation by setting back pointer in TLB invalidation 211 211 * client to the GuC and setting GuC backend ops. 212 212 */ 213 213 void xe_guc_tlb_inval_init_early(struct xe_guc *guc,
+2 -2
drivers/gpu/drm/xe/xe_map.h
··· 14 14 * DOC: Map layer 15 15 * 16 16 * All access to any memory shared with a device (both sysmem and vram) in the 17 - * XE driver should go through this layer (xe_map). This layer is built on top 17 + * Xe driver should go through this layer (xe_map). This layer is built on top 18 18 * of :ref:`driver-api/device-io:Generalizing Access to System and I/O Memory` 19 - * and with extra hooks into the XE driver that allows adding asserts to memory 19 + * and with extra hooks into the Xe driver that allows adding asserts to memory 20 20 * accesses (e.g. for blocking runtime_pm D3Cold on Discrete Graphics). 21 21 */ 22 22
+107 -25
drivers/gpu/drm/xe/xe_migrate.c
··· 699 699 } 700 700 701 701 #define EMIT_COPY_DW 10 702 - static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, 703 - u64 src_ofs, u64 dst_ofs, unsigned int size, 704 - unsigned int pitch) 702 + static void emit_xy_fast_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, 703 + u64 dst_ofs, unsigned int size, 704 + unsigned int pitch) 705 705 { 706 706 struct xe_device *xe = gt_to_xe(gt); 707 707 u32 mocs = 0; ··· 728 728 bb->cs[bb->len++] = pitch | mocs; 729 729 bb->cs[bb->len++] = lower_32_bits(src_ofs); 730 730 bb->cs[bb->len++] = upper_32_bits(src_ofs); 731 + } 732 + 733 + #define PAGE_COPY_MODE_PS SZ_256 /* hw uses 256 bytes as the page-size */ 734 + static void emit_mem_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, 735 + u64 dst_ofs, unsigned int size, unsigned int pitch) 736 + { 737 + u32 mode, copy_type, width; 738 + 739 + xe_gt_assert(gt, IS_ALIGNED(size, pitch)); 740 + xe_gt_assert(gt, pitch <= U16_MAX); 741 + xe_gt_assert(gt, pitch); 742 + xe_gt_assert(gt, size); 743 + 744 + if (IS_ALIGNED(size, PAGE_COPY_MODE_PS) && 745 + IS_ALIGNED(lower_32_bits(src_ofs), PAGE_COPY_MODE_PS) && 746 + IS_ALIGNED(lower_32_bits(dst_ofs), PAGE_COPY_MODE_PS)) { 747 + mode = MEM_COPY_PAGE_COPY_MODE; 748 + copy_type = 0; /* linear copy */ 749 + width = size / PAGE_COPY_MODE_PS; 750 + } else if (pitch > 1) { 751 + xe_gt_assert(gt, size / pitch <= U16_MAX); 752 + mode = 0; /* BYTE_COPY */ 753 + copy_type = MEM_COPY_MATRIX_COPY; 754 + width = pitch; 755 + } else { 756 + mode = 0; /* BYTE_COPY */ 757 + copy_type = 0; /* linear copy */ 758 + width = size; 759 + } 760 + 761 + xe_gt_assert(gt, width <= U16_MAX); 762 + 763 + bb->cs[bb->len++] = MEM_COPY_CMD | mode | copy_type; 764 + bb->cs[bb->len++] = width - 1; 765 + bb->cs[bb->len++] = size / pitch - 1; /* ignored by hw for page-copy/linear above */ 766 + bb->cs[bb->len++] = pitch - 1; 767 + bb->cs[bb->len++] = pitch - 1; 768 + bb->cs[bb->len++] = lower_32_bits(src_ofs); 769 + bb->cs[bb->len++] = upper_32_bits(src_ofs); 770 + bb->cs[bb->len++] = lower_32_bits(dst_ofs); 771 + bb->cs[bb->len++] = upper_32_bits(dst_ofs); 772 + bb->cs[bb->len++] = FIELD_PREP(MEM_COPY_SRC_MOCS_INDEX_MASK, gt->mocs.uc_index) | 773 + FIELD_PREP(MEM_COPY_DST_MOCS_INDEX_MASK, gt->mocs.uc_index); 774 + } 775 + 776 + static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, 777 + u64 src_ofs, u64 dst_ofs, unsigned int size, 778 + unsigned int pitch) 779 + { 780 + struct xe_device *xe = gt_to_xe(gt); 781 + 782 + if (xe->info.has_mem_copy_instr) 783 + emit_mem_copy(gt, bb, src_ofs, dst_ofs, size, pitch); 784 + else 785 + emit_xy_fast_copy(gt, bb, src_ofs, dst_ofs, size, pitch); 731 786 } 732 787 733 788 static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm) ··· 902 847 &ccs_it); 903 848 904 849 while (size) { 905 - u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */ 850 + u32 batch_size = 1; /* MI_BATCH_BUFFER_END */ 906 851 struct xe_sched_job *job; 907 852 struct xe_bb *bb; 908 853 u32 flush_flags = 0; ··· 1367 1312 1368 1313 /* Calculate final sizes and batch size.. */ 1369 1314 pte_flags = clear_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0; 1370 - batch_size = 2 + 1315 + batch_size = 1 + 1371 1316 pte_update_size(m, pte_flags, src, &src_it, 1372 1317 &clear_L0, &clear_L0_ofs, &clear_L0_pt, 1373 1318 clear_bo_data ? emit_clear_cmd_len(gt) : 0, 0, ··· 1853 1798 u32 ptes; 1854 1799 int i = 0; 1855 1800 1801 + xe_tile_assert(m->tile, PAGE_ALIGNED(size)); 1802 + 1856 1803 ptes = DIV_ROUND_UP(size, gpu_page_size); 1857 1804 while (ptes) { 1858 1805 u32 chunk = min(MAX_PTE_PER_SDI, ptes); 1859 1806 1860 - chunk = ALIGN_DOWN(chunk, PAGE_SIZE / XE_PAGE_SIZE); 1807 + if (!level) 1808 + chunk = ALIGN_DOWN(chunk, PAGE_SIZE / XE_PAGE_SIZE); 1809 + 1861 1810 bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); 1862 1811 bb->cs[bb->len++] = pt_offset; 1863 1812 bb->cs[bb->len++] = 0; ··· 1870 1811 ptes -= chunk; 1871 1812 1872 1813 while (chunk--) { 1873 - u64 addr = sram_addr[i].addr & ~(gpu_page_size - 1); 1874 - u64 pte, orig_addr = addr; 1814 + u64 addr = sram_addr[i].addr; 1815 + u64 pte; 1875 1816 1876 1817 xe_tile_assert(m->tile, sram_addr[i].proto == 1877 1818 DRM_INTERCONNECT_SYSTEM); 1878 1819 xe_tile_assert(m->tile, addr); 1820 + xe_tile_assert(m->tile, PAGE_ALIGNED(addr)); 1879 1821 1880 1822 again: 1881 1823 pte = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe, ··· 1887 1827 1888 1828 if (gpu_page_size < PAGE_SIZE) { 1889 1829 addr += XE_PAGE_SIZE; 1890 - if (orig_addr + PAGE_SIZE != addr) { 1830 + if (!PAGE_ALIGNED(addr)) { 1891 1831 chunk--; 1892 1832 goto again; 1893 1833 } ··· 1920 1860 #define XE_CACHELINE_BYTES 64ull 1921 1861 #define XE_CACHELINE_MASK (XE_CACHELINE_BYTES - 1) 1922 1862 1863 + static u32 xe_migrate_copy_pitch(struct xe_device *xe, u32 len) 1864 + { 1865 + u32 pitch; 1866 + 1867 + if (IS_ALIGNED(len, PAGE_SIZE)) 1868 + pitch = PAGE_SIZE; 1869 + else if (IS_ALIGNED(len, SZ_4K)) 1870 + pitch = SZ_4K; 1871 + else if (IS_ALIGNED(len, SZ_256)) 1872 + pitch = SZ_256; 1873 + else if (IS_ALIGNED(len, 4)) 1874 + pitch = 4; 1875 + else 1876 + pitch = 1; 1877 + 1878 + xe_assert(xe, pitch > 1 || xe->info.has_mem_copy_instr); 1879 + return pitch; 1880 + } 1881 + 1923 1882 static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, 1924 1883 unsigned long len, 1925 1884 unsigned long sram_offset, ··· 1950 1871 struct xe_device *xe = gt_to_xe(gt); 1951 1872 bool use_usm_batch = xe->info.has_usm; 1952 1873 struct dma_fence *fence = NULL; 1953 - u32 batch_size = 2; 1874 + u32 batch_size = 1; 1954 1875 u64 src_L0_ofs, dst_L0_ofs; 1955 1876 struct xe_sched_job *job; 1956 1877 struct xe_bb *bb; 1957 1878 u32 update_idx, pt_slot = 0; 1958 1879 unsigned long npages = DIV_ROUND_UP(len + sram_offset, PAGE_SIZE); 1959 - unsigned int pitch = len >= PAGE_SIZE && !(len & ~PAGE_MASK) ? 1960 - PAGE_SIZE : 4; 1880 + unsigned int pitch = xe_migrate_copy_pitch(xe, len); 1961 1881 int err; 1962 1882 unsigned long i, j; 1963 1883 bool use_pde = xe_migrate_vram_use_pde(sram_addr, len + sram_offset); 1964 1884 1965 - if (drm_WARN_ON(&xe->drm, (len & XE_CACHELINE_MASK) || 1966 - (sram_offset | vram_addr) & XE_CACHELINE_MASK)) 1885 + if (!xe->info.has_mem_copy_instr && 1886 + drm_WARN_ON(&xe->drm, 1887 + (!IS_ALIGNED(len, pitch)) || (sram_offset | vram_addr) & XE_CACHELINE_MASK)) 1967 1888 return ERR_PTR(-EOPNOTSUPP); 1968 1889 1969 1890 xe_assert(xe, npages * PAGE_SIZE <= MAX_PREEMPTDISABLE_TRANSFER); 1970 1891 1971 - batch_size += pte_update_cmd_size(len); 1892 + batch_size += pte_update_cmd_size(npages << PAGE_SHIFT); 1972 1893 batch_size += EMIT_COPY_DW; 1973 1894 1974 1895 bb = xe_bb_new(gt, batch_size, use_usm_batch); ··· 1997 1918 1998 1919 if (use_pde) 1999 1920 build_pt_update_batch_sram(m, bb, m->large_page_copy_pdes, 2000 - sram_addr, len + sram_offset, 1); 1921 + sram_addr, npages << PAGE_SHIFT, 1); 2001 1922 else 2002 1923 build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE, 2003 - sram_addr, len + sram_offset, 0); 1924 + sram_addr, npages << PAGE_SHIFT, 0); 2004 1925 2005 1926 if (dir == XE_MIGRATE_COPY_TO_VRAM) { 2006 1927 if (use_pde) ··· 2060 1981 * 2061 1982 * Copy from an array dma addresses to a VRAM device physical address 2062 1983 * 2063 - * Return: dma fence for migrate to signal completion on succees, ERR_PTR on 1984 + * Return: dma fence for migrate to signal completion on success, ERR_PTR on 2064 1985 * failure 2065 1986 */ 2066 1987 struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, ··· 2081 2002 * 2082 2003 * Copy from a VRAM device physical address to an array dma addresses 2083 2004 * 2084 - * Return: dma fence for migrate to signal completion on succees, ERR_PTR on 2005 + * Return: dma fence for migrate to signal completion on success, ERR_PTR on 2085 2006 * failure 2086 2007 */ 2087 2008 struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m, ··· 2182 2103 xe_bo_assert_held(bo); 2183 2104 2184 2105 /* Use bounce buffer for small access and unaligned access */ 2185 - if (!IS_ALIGNED(len, XE_CACHELINE_BYTES) || 2186 - !IS_ALIGNED((unsigned long)buf + offset, XE_CACHELINE_BYTES)) { 2106 + if (!xe->info.has_mem_copy_instr && 2107 + (!IS_ALIGNED(len, 4) || 2108 + !IS_ALIGNED(page_offset, XE_CACHELINE_BYTES) || 2109 + !IS_ALIGNED(offset, XE_CACHELINE_BYTES))) { 2187 2110 int buf_offset = 0; 2188 2111 void *bounce; 2189 2112 int err; ··· 2247 2166 u64 vram_addr = vram_region_gpu_offset(bo->ttm.resource) + 2248 2167 cursor.start; 2249 2168 int current_bytes; 2169 + u32 pitch; 2250 2170 2251 2171 if (cursor.size > MAX_PREEMPTDISABLE_TRANSFER) 2252 2172 current_bytes = min_t(int, bytes_left, ··· 2255 2173 else 2256 2174 current_bytes = min_t(int, bytes_left, cursor.size); 2257 2175 2258 - if (current_bytes & ~PAGE_MASK) { 2259 - int pitch = 4; 2260 - 2176 + pitch = xe_migrate_copy_pitch(xe, current_bytes); 2177 + if (xe->info.has_mem_copy_instr) 2178 + current_bytes = min_t(int, current_bytes, U16_MAX * pitch); 2179 + else 2261 2180 current_bytes = min_t(int, current_bytes, 2262 2181 round_down(S16_MAX * pitch, 2263 2182 XE_CACHELINE_BYTES)); 2264 - } 2265 2183 2266 2184 __fence = xe_migrate_vram(m, current_bytes, 2267 2185 (unsigned long)buf & ~PAGE_MASK,
+1 -1
drivers/gpu/drm/xe/xe_migrate_doc.h
··· 9 9 /** 10 10 * DOC: Migrate Layer 11 11 * 12 - * The XE migrate layer is used generate jobs which can copy memory (eviction), 12 + * The Xe migrate layer is used generate jobs which can copy memory (eviction), 13 13 * clear memory, or program tables (binds). This layer exists in every GT, has 14 14 * a migrate engine, and uses a special VM for all generated jobs. 15 15 *
+29
drivers/gpu/drm/xe/xe_mmio.c
··· 379 379 { 380 380 return __xe_mmio_wait32(mmio, reg, mask, val, timeout_us, out_val, atomic, false); 381 381 } 382 + 383 + #ifdef CONFIG_PCI_IOV 384 + static size_t vf_regs_stride(struct xe_device *xe) 385 + { 386 + return GRAPHICS_VERx100(xe) > 1200 ? 0x400 : 0x1000; 387 + } 388 + 389 + /** 390 + * xe_mmio_init_vf_view() - Initialize an MMIO instance for accesses like the VF 391 + * @mmio: the target &xe_mmio to initialize as VF's view 392 + * @base: the source &xe_mmio to initialize from 393 + * @vfid: the VF identifier 394 + */ 395 + void xe_mmio_init_vf_view(struct xe_mmio *mmio, const struct xe_mmio *base, unsigned int vfid) 396 + { 397 + struct xe_tile *tile = base->tile; 398 + struct xe_device *xe = tile->xe; 399 + size_t offset = vf_regs_stride(xe) * vfid; 400 + 401 + xe_assert(xe, IS_SRIOV_PF(xe)); 402 + xe_assert(xe, vfid); 403 + xe_assert(xe, !base->sriov_vf_gt); 404 + xe_assert(xe, base->regs_size > offset); 405 + 406 + *mmio = *base; 407 + mmio->regs += offset; 408 + mmio->regs_size -= offset; 409 + } 410 + #endif
+4
drivers/gpu/drm/xe/xe_mmio.h
··· 42 42 return &xe->tiles[0].mmio; 43 43 } 44 44 45 + #ifdef CONFIG_PCI_IOV 46 + void xe_mmio_init_vf_view(struct xe_mmio *mmio, const struct xe_mmio *base, unsigned int vfid); 47 + #endif 48 + 45 49 #endif
+26
drivers/gpu/drm/xe/xe_mocs.c
··· 568 568 .dump = xe2_mocs_dump, 569 569 }; 570 570 571 + /* 572 + * Note that the "L3" and "L4" register fields actually control the L2 and L3 573 + * caches respectively on this platform. 574 + */ 575 + static const struct xe_mocs_entry xe3p_xpc_mocs_table[] = { 576 + /* Defer to PAT */ 577 + MOCS_ENTRY(0, XE2_L3_0_WB | L4_3_UC, 0), 578 + /* UC */ 579 + MOCS_ENTRY(1, IG_PAT | XE2_L3_3_UC | L4_3_UC, 0), 580 + /* L2 */ 581 + MOCS_ENTRY(2, IG_PAT | XE2_L3_0_WB | L4_3_UC, 0), 582 + /* L3 */ 583 + MOCS_ENTRY(3, IG_PAT | XE2_L3_3_UC | L4_0_WB, 0), 584 + /* L2 + L3 */ 585 + MOCS_ENTRY(4, IG_PAT | XE2_L3_0_WB | L4_0_WB, 0), 586 + }; 587 + 571 588 static unsigned int get_mocs_settings(struct xe_device *xe, 572 589 struct xe_mocs_info *info) 573 590 { ··· 593 576 memset(info, 0, sizeof(struct xe_mocs_info)); 594 577 595 578 switch (xe->info.platform) { 579 + case XE_CRESCENTISLAND: 580 + info->ops = &xe2_mocs_ops; 581 + info->table_size = ARRAY_SIZE(xe3p_xpc_mocs_table); 582 + info->table = xe3p_xpc_mocs_table; 583 + info->num_mocs_regs = XE2_NUM_MOCS_ENTRIES; 584 + info->uc_index = 1; 585 + info->wb_index = 4; 586 + info->unused_entries_index = 4; 587 + break; 596 588 case XE_NOVALAKE_S: 597 589 case XE_PANTHERLAKE: 598 590 case XE_LUNARLAKE:
+20
drivers/gpu/drm/xe/xe_pci.c
··· 342 342 .has_display = true, 343 343 .has_flat_ccs = 1, 344 344 .has_pxp = true, 345 + .has_mem_copy_instr = true, 345 346 .max_gt_per_tile = 2, 346 347 .needs_scratch = true, 347 348 .va_bits = 48, ··· 363 362 .has_heci_cscfi = 1, 364 363 .has_late_bind = true, 365 364 .has_sriov = true, 365 + .has_mem_copy_instr = true, 366 366 .max_gt_per_tile = 2, 367 367 .needs_scratch = true, 368 368 .subplatforms = (const struct xe_subplatform_desc[]) { ··· 380 378 .has_display = true, 381 379 .has_flat_ccs = 1, 382 380 .has_sriov = true, 381 + .has_mem_copy_instr = true, 383 382 .max_gt_per_tile = 2, 384 383 .needs_scratch = true, 385 384 .needs_shared_vf_gt_wq = true, ··· 393 390 .dma_mask_size = 46, 394 391 .has_display = true, 395 392 .has_flat_ccs = 1, 393 + .has_mem_copy_instr = true, 396 394 .max_gt_per_tile = 2, 397 395 .require_force_probe = true, 398 396 .va_bits = 48, 397 + .vm_max_level = 4, 398 + }; 399 + 400 + static const struct xe_device_desc cri_desc = { 401 + DGFX_FEATURES, 402 + PLATFORM(CRESCENTISLAND), 403 + .dma_mask_size = 52, 404 + .has_display = false, 405 + .has_flat_ccs = false, 406 + .has_mbx_power_limits = true, 407 + .has_sriov = true, 408 + .max_gt_per_tile = 2, 409 + .require_force_probe = true, 410 + .va_bits = 57, 399 411 .vm_max_level = 4, 400 412 }; 401 413 ··· 441 423 INTEL_BMG_IDS(INTEL_VGA_DEVICE, &bmg_desc), 442 424 INTEL_PTL_IDS(INTEL_VGA_DEVICE, &ptl_desc), 443 425 INTEL_NVLS_IDS(INTEL_VGA_DEVICE, &nvls_desc), 426 + INTEL_CRI_IDS(INTEL_PCI_DEVICE, &cri_desc), 444 427 { } 445 428 }; 446 429 MODULE_DEVICE_TABLE(pci, pciidlist); ··· 674 655 xe->info.has_pxp = desc->has_pxp; 675 656 xe->info.has_sriov = xe_configfs_primary_gt_allowed(to_pci_dev(xe->drm.dev)) && 676 657 desc->has_sriov; 658 + xe->info.has_mem_copy_instr = desc->has_mem_copy_instr; 677 659 xe->info.skip_guc_pc = desc->skip_guc_pc; 678 660 xe->info.skip_mtcfg = desc->skip_mtcfg; 679 661 xe->info.skip_pcode = desc->skip_pcode;
+1
drivers/gpu/drm/xe/xe_pci_types.h
··· 46 46 u8 has_late_bind:1; 47 47 u8 has_llc:1; 48 48 u8 has_mbx_power_limits:1; 49 + u8 has_mem_copy_instr:1; 49 50 u8 has_pxp:1; 50 51 u8 has_sriov:1; 51 52 u8 needs_scratch:1;
+1
drivers/gpu/drm/xe/xe_platform_types.h
··· 25 25 XE_BATTLEMAGE, 26 26 XE_PANTHERLAKE, 27 27 XE_NOVALAKE_S, 28 + XE_CRESCENTISLAND, 28 29 }; 29 30 30 31 enum xe_subplatform {
+1 -1
drivers/gpu/drm/xe/xe_pm.c
··· 102 102 /** 103 103 * xe_pm_might_block_on_suspend() - Annotate that the code might block on suspend 104 104 * 105 - * Annotation to use where the code might block or sieze to make 105 + * Annotation to use where the code might block or seize to make 106 106 * progress pending resume completion. 107 107 */ 108 108 void xe_pm_might_block_on_suspend(void)
+1 -1
drivers/gpu/drm/xe/xe_preempt_fence_types.h
··· 12 12 struct xe_exec_queue; 13 13 14 14 /** 15 - * struct xe_preempt_fence - XE preempt fence 15 + * struct xe_preempt_fence - Xe preempt fence 16 16 * 17 17 * hardware and triggers a callback once the xe_engine is complete. 18 18 */
+14 -14
drivers/gpu/drm/xe/xe_pt.c
··· 715 715 .vm = vm, 716 716 .tile = tile, 717 717 .curs = &curs, 718 - .va_curs_start = range ? range->base.itree.start : 718 + .va_curs_start = range ? xe_svm_range_start(range) : 719 719 xe_vma_start(vma), 720 720 .vma = vma, 721 721 .wupd.entries = entries, ··· 734 734 } 735 735 if (xe_svm_range_has_dma_mapping(range)) { 736 736 xe_res_first_dma(range->base.pages.dma_addr, 0, 737 - range->base.itree.last + 1 - range->base.itree.start, 737 + xe_svm_range_size(range), 738 738 &curs); 739 739 xe_svm_range_debug(range, "BIND PREPARE - MIXED"); 740 740 } else { ··· 778 778 779 779 walk_pt: 780 780 ret = xe_pt_walk_range(&pt->base, pt->level, 781 - range ? range->base.itree.start : xe_vma_start(vma), 782 - range ? range->base.itree.last + 1 : xe_vma_end(vma), 781 + range ? xe_svm_range_start(range) : xe_vma_start(vma), 782 + range ? xe_svm_range_end(range) : xe_vma_end(vma), 783 783 &xe_walk.base); 784 784 785 785 *num_entries = xe_walk.wupd.num_used_entries; ··· 975 975 if (!(pt_mask & BIT(tile->id))) 976 976 return false; 977 977 978 - (void)xe_pt_walk_shared(&pt->base, pt->level, range->base.itree.start, 979 - range->base.itree.last + 1, &xe_walk.base); 978 + (void)xe_pt_walk_shared(&pt->base, pt->level, xe_svm_range_start(range), 979 + xe_svm_range_end(range), &xe_walk.base); 980 980 981 981 return xe_walk.needs_invalidate; 982 982 } ··· 1661 1661 struct xe_svm_range *range, 1662 1662 struct xe_vm_pgtable_update *entries) 1663 1663 { 1664 - u64 start = range ? range->base.itree.start : xe_vma_start(vma); 1665 - u64 end = range ? range->base.itree.last + 1 : xe_vma_end(vma); 1664 + u64 start = range ? xe_svm_range_start(range) : xe_vma_start(vma); 1665 + u64 end = range ? xe_svm_range_end(range) : xe_vma_end(vma); 1666 1666 struct xe_pt_stage_unbind_walk xe_walk = { 1667 1667 .base = { 1668 1668 .ops = &xe_pt_stage_unbind_ops, ··· 1872 1872 1873 1873 vm_dbg(&xe_vma_vm(vma)->xe->drm, 1874 1874 "Preparing bind, with range [%lx...%lx)\n", 1875 - range->base.itree.start, range->base.itree.last); 1875 + xe_svm_range_start(range), xe_svm_range_end(range) - 1); 1876 1876 1877 1877 pt_op->vma = NULL; 1878 1878 pt_op->bind = true; ··· 1887 1887 pt_op->num_entries, true); 1888 1888 1889 1889 xe_pt_update_ops_rfence_interval(pt_update_ops, 1890 - range->base.itree.start, 1891 - range->base.itree.last + 1); 1890 + xe_svm_range_start(range), 1891 + xe_svm_range_end(range)); 1892 1892 ++pt_update_ops->current_op; 1893 1893 pt_update_ops->needs_svm_lock = true; 1894 1894 ··· 1983 1983 1984 1984 vm_dbg(&vm->xe->drm, 1985 1985 "Preparing unbind, with range [%lx...%lx)\n", 1986 - range->base.itree.start, range->base.itree.last); 1986 + xe_svm_range_start(range), xe_svm_range_end(range) - 1); 1987 1987 1988 1988 pt_op->vma = XE_INVALID_VMA; 1989 1989 pt_op->bind = false; ··· 1994 1994 1995 1995 xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, 1996 1996 pt_op->num_entries, false); 1997 - xe_pt_update_ops_rfence_interval(pt_update_ops, range->base.itree.start, 1998 - range->base.itree.last + 1); 1997 + xe_pt_update_ops_rfence_interval(pt_update_ops, xe_svm_range_start(range), 1998 + xe_svm_range_end(range)); 1999 1999 ++pt_update_ops->current_op; 2000 2000 pt_update_ops->needs_svm_lock = true; 2001 2001 pt_update_ops->needs_invalidation |= xe_vm_has_scratch(vm) ||
+2 -2
drivers/gpu/drm/xe/xe_range_fence.h
··· 13 13 struct xe_range_fence_tree; 14 14 struct xe_range_fence; 15 15 16 - /** struct xe_range_fence_ops - XE range fence ops */ 16 + /** struct xe_range_fence_ops - Xe range fence ops */ 17 17 struct xe_range_fence_ops { 18 18 /** @free: free range fence op */ 19 19 void (*free)(struct xe_range_fence *rfence); 20 20 }; 21 21 22 - /** struct xe_range_fence - XE range fence (address conflict tracking) */ 22 + /** struct xe_range_fence - Xe range fence (address conflict tracking) */ 23 23 struct xe_range_fence { 24 24 /** @rb: RB tree node inserted into interval tree */ 25 25 struct rb_node rb;
+3 -3
drivers/gpu/drm/xe/xe_sched_job.c
··· 160 160 } 161 161 162 162 /** 163 - * xe_sched_job_destroy - Destroy XE schedule job 164 - * @ref: reference to XE schedule job 163 + * xe_sched_job_destroy - Destroy Xe schedule job 164 + * @ref: reference to Xe schedule job 165 165 * 166 166 * Called when ref == 0, drop a reference to job's xe_engine + fence, cleanup 167 - * base DRM schedule job, and free memory for XE schedule job. 167 + * base DRM schedule job, and free memory for Xe schedule job. 168 168 */ 169 169 void xe_sched_job_destroy(struct kref *ref) 170 170 {
+6 -6
drivers/gpu/drm/xe/xe_sched_job.h
··· 23 23 void xe_sched_job_destroy(struct kref *ref); 24 24 25 25 /** 26 - * xe_sched_job_get - get reference to XE schedule job 27 - * @job: XE schedule job object 26 + * xe_sched_job_get - get reference to Xe schedule job 27 + * @job: Xe schedule job object 28 28 * 29 - * Increment XE schedule job's reference count 29 + * Increment Xe schedule job's reference count 30 30 */ 31 31 static inline struct xe_sched_job *xe_sched_job_get(struct xe_sched_job *job) 32 32 { ··· 35 35 } 36 36 37 37 /** 38 - * xe_sched_job_put - put reference to XE schedule job 39 - * @job: XE schedule job object 38 + * xe_sched_job_put - put reference to Xe schedule job 39 + * @job: Xe schedule job object 40 40 * 41 - * Decrement XE schedule job's reference count, call xe_sched_job_destroy when 41 + * Decrement Xe schedule job's reference count, call xe_sched_job_destroy when 42 42 * reference count == 0. 43 43 */ 44 44 static inline void xe_sched_job_put(struct xe_sched_job *job)
+1 -1
drivers/gpu/drm/xe/xe_sched_job_types.h
··· 32 32 }; 33 33 34 34 /** 35 - * struct xe_sched_job - XE schedule job (batch buffer tracking) 35 + * struct xe_sched_job - Xe schedule job (batch buffer tracking) 36 36 */ 37 37 struct xe_sched_job { 38 38 /** @drm: base DRM scheduler job */
+13 -26
drivers/gpu/drm/xe/xe_sriov_vf.c
··· 130 130 bool xe_sriov_vf_migration_supported(struct xe_device *xe) 131 131 { 132 132 xe_assert(xe, IS_SRIOV_VF(xe)); 133 - return xe->sriov.vf.migration.enabled; 133 + return !xe->sriov.vf.migration.disabled; 134 134 } 135 135 136 - static void vf_disable_migration(struct xe_device *xe, const char *fmt, ...) 136 + /** 137 + * xe_sriov_vf_migration_disable - Turn off VF migration with given log message. 138 + * @xe: the &xe_device instance. 139 + * @fmt: format string for the log message, to be combined with following VAs. 140 + */ 141 + void xe_sriov_vf_migration_disable(struct xe_device *xe, const char *fmt, ...) 137 142 { 138 143 struct va_format vaf; 139 144 va_list va_args; ··· 151 146 xe_sriov_notice(xe, "migration disabled: %pV\n", &vaf); 152 147 va_end(va_args); 153 148 154 - xe->sriov.vf.migration.enabled = false; 149 + xe->sriov.vf.migration.disabled = true; 155 150 } 156 151 157 152 static void vf_migration_init_early(struct xe_device *xe) ··· 161 156 * supported at production quality. 162 157 */ 163 158 if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG)) 164 - return vf_disable_migration(xe, 165 - "experimental feature not available on production builds"); 159 + return xe_sriov_vf_migration_disable(xe, 160 + "experimental feature not available on production builds"); 166 161 167 - if (GRAPHICS_VER(xe) < 20) 168 - return vf_disable_migration(xe, "requires gfx version >= 20, but only %u found", 169 - GRAPHICS_VER(xe)); 162 + if (!xe_device_has_memirq(xe)) 163 + return xe_sriov_vf_migration_disable(xe, "requires memory-based IRQ support"); 170 164 171 - if (!IS_DGFX(xe)) { 172 - struct xe_uc_fw_version guc_version; 173 - 174 - xe_gt_sriov_vf_guc_versions(xe_device_get_gt(xe, 0), NULL, &guc_version); 175 - if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 23, 0)) 176 - return vf_disable_migration(xe, 177 - "CCS migration requires GuC ABI >= 1.23 but only %u.%u found", 178 - guc_version.major, guc_version.minor); 179 - } 180 - 181 - xe->sriov.vf.migration.enabled = true; 182 - xe_sriov_dbg(xe, "migration support enabled\n"); 183 165 } 184 166 185 167 /** ··· 188 196 */ 189 197 int xe_sriov_vf_init_late(struct xe_device *xe) 190 198 { 191 - int err = 0; 192 - 193 - if (xe_sriov_vf_migration_supported(xe)) 194 - err = xe_sriov_vf_ccs_init(xe); 195 - 196 - return err; 199 + return xe_sriov_vf_ccs_init(xe); 197 200 } 198 201 199 202 static int sa_info_vf_ccs(struct seq_file *m, void *data)
+1
drivers/gpu/drm/xe/xe_sriov_vf.h
··· 14 14 void xe_sriov_vf_init_early(struct xe_device *xe); 15 15 int xe_sriov_vf_init_late(struct xe_device *xe); 16 16 bool xe_sriov_vf_migration_supported(struct xe_device *xe); 17 + void xe_sriov_vf_migration_disable(struct xe_device *xe, const char *fmt, ...); 17 18 void xe_sriov_vf_debugfs_register(struct xe_device *xe, struct dentry *root); 18 19 19 20 #endif
+44 -2
drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
··· 10 10 #include "xe_device.h" 11 11 #include "xe_exec_queue.h" 12 12 #include "xe_exec_queue_types.h" 13 + #include "xe_gt_sriov_vf.h" 14 + #include "xe_guc.h" 13 15 #include "xe_guc_submit.h" 14 16 #include "xe_lrc.h" 15 17 #include "xe_migrate.h" ··· 262 260 return err; 263 261 } 264 262 263 + /* 264 + * Whether GuC requires CCS copy BBs for VF migration. 265 + * @xe: the &xe_device instance. 266 + * 267 + * Only selected platforms require VF KMD to maintain CCS copy BBs and linked LRCAs. 268 + * 269 + * Return: true if VF driver must participate in the CCS migration, false otherwise. 270 + */ 271 + static bool vf_migration_ccs_bb_needed(struct xe_device *xe) 272 + { 273 + xe_assert(xe, IS_SRIOV_VF(xe)); 274 + 275 + return !IS_DGFX(xe) && xe_device_has_flat_ccs(xe); 276 + } 277 + 278 + /* 279 + * Check for disable migration due to no CCS BBs support in GuC FW. 280 + * @xe: the &xe_device instance. 281 + * 282 + * Performs late disable of VF migration feature in case GuC FW cannot support it. 283 + * 284 + * Returns: True if VF migration with CCS BBs is supported, false otherwise. 285 + */ 286 + static bool vf_migration_ccs_bb_support_check(struct xe_device *xe) 287 + { 288 + struct xe_gt *gt = xe_root_mmio_gt(xe); 289 + struct xe_uc_fw_version guc_version; 290 + 291 + xe_gt_sriov_vf_guc_versions(gt, NULL, &guc_version); 292 + if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 23, 0)) { 293 + xe_sriov_vf_migration_disable(xe, 294 + "CCS migration requires GuC ABI >= 1.23 but only %u.%u found", 295 + guc_version.major, guc_version.minor); 296 + return false; 297 + } 298 + 299 + return true; 300 + } 301 + 265 302 static void xe_sriov_vf_ccs_fini(void *arg) 266 303 { 267 304 struct xe_sriov_vf_ccs_ctx *ctx = arg; ··· 333 292 int err; 334 293 335 294 xe_assert(xe, IS_SRIOV_VF(xe)); 336 - xe_assert(xe, xe_sriov_vf_migration_supported(xe)); 337 295 338 - if (IS_DGFX(xe) || !xe_device_has_flat_ccs(xe)) 296 + if (!xe_sriov_vf_migration_supported(xe) || 297 + !vf_migration_ccs_bb_needed(xe) || 298 + !vf_migration_ccs_bb_support_check(xe)) 339 299 return 0; 340 300 341 301 for_each_ccs_rw_ctx(ctx_id) {
+3 -3
drivers/gpu/drm/xe/xe_sriov_vf_types.h
··· 34 34 /** @migration: VF Migration state data */ 35 35 struct { 36 36 /** 37 - * @migration.enabled: flag indicating if migration support 38 - * was enabled or not due to missing prerequisites 37 + * @migration.disabled: flag indicating if migration support 38 + * was turned off due to missing prerequisites 39 39 */ 40 - bool enabled; 40 + bool disabled; 41 41 } migration; 42 42 43 43 /** @ccs: VF CCS state data */
+1 -1
drivers/gpu/drm/xe/xe_svm.c
··· 633 633 634 634 /* 635 635 * XXX: We can't derive the GT here (or anywhere in this functions, but 636 - * compute always uses the primary GT so accumlate stats on the likely 636 + * compute always uses the primary GT so accumulate stats on the likely 637 637 * GT of the fault. 638 638 */ 639 639 if (gt)
+1 -1
drivers/gpu/drm/xe/xe_tlb_inval.h
··· 33 33 * xe_tlb_inval_fence_wait() - TLB invalidiation fence wait 34 34 * @fence: TLB invalidation fence to wait on 35 35 * 36 - * Wait on a TLB invalidiation fence until it signals, non interruptable 36 + * Wait on a TLB invalidiation fence until it signals, non interruptible 37 37 */ 38 38 static inline void 39 39 xe_tlb_inval_fence_wait(struct xe_tlb_inval_fence *fence)
+1 -1
drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
··· 106 106 107 107 stolen_size = tile_size - mgr->stolen_base; 108 108 109 - xe_assert(xe, stolen_size > wopcm_size); 109 + xe_assert(xe, stolen_size >= wopcm_size); 110 110 stolen_size -= wopcm_size; 111 111 112 112 /* Verify usage fits in the actual resource available */
+2 -2
drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
··· 10 10 #include <drm/ttm/ttm_device.h> 11 11 12 12 /** 13 - * struct xe_ttm_vram_mgr - XE TTM VRAM manager 13 + * struct xe_ttm_vram_mgr - Xe TTM VRAM manager 14 14 * 15 15 * Manages placement of TTM resource in VRAM. 16 16 */ ··· 32 32 }; 33 33 34 34 /** 35 - * struct xe_ttm_vram_mgr_resource - XE TTM VRAM resource 35 + * struct xe_ttm_vram_mgr_resource - Xe TTM VRAM resource 36 36 */ 37 37 struct xe_ttm_vram_mgr_resource { 38 38 /** @base: Base TTM resource */
+3 -3
drivers/gpu/drm/xe/xe_uc_fw_types.h
··· 62 62 }; 63 63 64 64 /** 65 - * struct xe_uc_fw_version - Version for XE micro controller firmware 65 + * struct xe_uc_fw_version - Version for Xe micro controller firmware 66 66 */ 67 67 struct xe_uc_fw_version { 68 68 /** @branch: branch version of the FW (not always available) */ ··· 84 84 }; 85 85 86 86 /** 87 - * struct xe_uc_fw - XE micro controller firmware 87 + * struct xe_uc_fw - Xe micro controller firmware 88 88 */ 89 89 struct xe_uc_fw { 90 90 /** @type: type uC firmware */ ··· 112 112 /** @size: size of uC firmware including css header */ 113 113 size_t size; 114 114 115 - /** @bo: XE BO for uC firmware */ 115 + /** @bo: Xe BO for uC firmware */ 116 116 struct xe_bo *bo; 117 117 118 118 /** @has_gsc_headers: whether the FW image starts with GSC headers */
+1 -1
drivers/gpu/drm/xe/xe_uc_types.h
··· 12 12 #include "xe_wopcm_types.h" 13 13 14 14 /** 15 - * struct xe_uc - XE micro controllers 15 + * struct xe_uc - Xe micro controllers 16 16 */ 17 17 struct xe_uc { 18 18 /** @guc: Graphics micro controller */
+3 -3
drivers/gpu/drm/xe/xe_validation.h
··· 108 108 * @request_exclusive: Whether to lock exclusively (write mode) the next time 109 109 * the domain lock is locked. 110 110 * @exec_flags: The drm_exec flags used for drm_exec (re-)initialization. 111 - * @nr: The drm_exec nr parameter used for drm_exec (re-)initializaiton. 111 + * @nr: The drm_exec nr parameter used for drm_exec (re-)initialization. 112 112 */ 113 113 struct xe_validation_ctx { 114 114 struct drm_exec *exec; ··· 137 137 * @_ret: The current error value possibly holding -ENOMEM 138 138 * 139 139 * Use this in way similar to drm_exec_retry_on_contention(). 140 - * If @_ret contains -ENOMEM the tranaction is restarted once in a way that 140 + * If @_ret contains -ENOMEM the transaction is restarted once in a way that 141 141 * blocks other transactions and allows exhastive eviction. If the transaction 142 142 * was already restarted once, Just return the -ENOMEM. May also set 143 143 * _ret to -EINTR if not retrying and waits are interruptible. ··· 180 180 * @_val: The xe_validation_device. 181 181 * @_exec: The struct drm_exec object 182 182 * @_flags: Flags for the xe_validation_ctx initialization. 183 - * @_ret: Return in / out parameter. May be set by this macro. Typicall 0 when called. 183 + * @_ret: Return in / out parameter. May be set by this macro. Typically 0 when called. 184 184 * 185 185 * This macro is will initiate a drm_exec transaction with additional support for 186 186 * exhaustive eviction.
+5 -5
drivers/gpu/drm/xe/xe_vm.c
··· 824 824 * 825 825 * (re)bind SVM range setting up GPU page tables for the range. 826 826 * 827 - * Return: dma fence for rebind to signal completion on succees, ERR_PTR on 827 + * Return: dma fence for rebind to signal completion on success, ERR_PTR on 828 828 * failure 829 829 */ 830 830 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, ··· 907 907 * 908 908 * Unbind SVM range removing the GPU page tables for the range. 909 909 * 910 - * Return: dma fence for unbind to signal completion on succees, ERR_PTR on 910 + * Return: dma fence for unbind to signal completion on success, ERR_PTR on 911 911 * failure 912 912 */ 913 913 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, ··· 1291 1291 * selection of options. The user PAT index is only for encoding leaf 1292 1292 * nodes, where we have use of more bits to do the encoding. The 1293 1293 * non-leaf nodes are instead under driver control so the chosen index 1294 - * here should be distict from the user PAT index. Also the 1294 + * here should be distinct from the user PAT index. Also the 1295 1295 * corresponding coherency of the PAT index should be tied to the 1296 1296 * allocation type of the page table (or at least we should pick 1297 1297 * something which is always safe). ··· 4172 4172 4173 4173 /** 4174 4174 * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations 4175 - * @xe: Pointer to the XE device structure 4175 + * @xe: Pointer to the Xe device structure 4176 4176 * @vma: Pointer to the virtual memory area (VMA) structure 4177 4177 * @is_atomic: In pagefault path and atomic operation 4178 4178 * ··· 4319 4319 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL); 4320 4320 } else if (__op->op == DRM_GPUVA_OP_MAP) { 4321 4321 vma = op->map.vma; 4322 - /* In case of madvise call, MAP will always be follwed by REMAP. 4322 + /* In case of madvise call, MAP will always be followed by REMAP. 4323 4323 * Therefore temp_attr will always have sane values, making it safe to 4324 4324 * copy them to new vma. 4325 4325 */
+4 -4
drivers/gpu/drm/xe/xe_vm_doc.h
··· 7 7 #define _XE_VM_DOC_H_ 8 8 9 9 /** 10 - * DOC: XE VM (user address space) 10 + * DOC: Xe VM (user address space) 11 11 * 12 12 * VM creation 13 13 * =========== ··· 202 202 * User pointers are user allocated memory (malloc'd, mmap'd, etc..) for which the 203 203 * user wants to create a GPU mapping. Typically in other DRM drivers a dummy BO 204 204 * was created and then a binding was created. We bypass creating a dummy BO in 205 - * XE and simply create a binding directly from the userptr. 205 + * Xe and simply create a binding directly from the userptr. 206 206 * 207 207 * Invalidation 208 208 * ------------ 209 209 * 210 210 * Since this a core kernel managed memory the kernel can move this memory 211 - * whenever it wants. We register an invalidation MMU notifier to alert XE when 211 + * whenever it wants. We register an invalidation MMU notifier to alert Xe when 212 212 * a user pointer is about to move. The invalidation notifier needs to block 213 213 * until all pending users (jobs or compute mode engines) of the userptr are 214 214 * idle to ensure no faults. This done by waiting on all of VM's dma-resv slots. ··· 419 419 * ======= 420 420 * 421 421 * VM locking protects all of the core data paths (bind operations, execs, 422 - * evictions, and compute mode rebind worker) in XE. 422 + * evictions, and compute mode rebind worker) in Xe. 423 423 * 424 424 * Locks 425 425 * -----
+2 -2
drivers/gpu/drm/xe/xe_vm_types.h
··· 52 52 * struct xe_vma_mem_attr - memory attributes associated with vma 53 53 */ 54 54 struct xe_vma_mem_attr { 55 - /** @preferred_loc: perferred memory_location */ 55 + /** @preferred_loc: preferred memory_location */ 56 56 struct { 57 57 /** @preferred_loc.migration_policy: Pages migration policy */ 58 58 u32 migration_policy; ··· 338 338 u64 tlb_flush_seqno; 339 339 /** @batch_invalidate_tlb: Always invalidate TLB before batch start */ 340 340 bool batch_invalidate_tlb; 341 - /** @xef: XE file handle for tracking this VM's drm client */ 341 + /** @xef: Xe file handle for tracking this VM's drm client */ 342 342 struct xe_file *xef; 343 343 }; 344 344
+4
drivers/gpu/drm/xe/xe_wa.c
··· 916 916 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)), 917 917 XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) 918 918 }, 919 + { XE_RTP_NAME("14024681466"), 920 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)), 921 + XE_RTP_ACTIONS(SET(XEHP_SLICE_COMMON_ECO_CHICKEN1, FAST_CLEAR_VALIGN_FIX)) 922 + }, 919 923 }; 920 924 921 925 static __maybe_unused const struct xe_rtp_entry oob_was[] = {
+4
include/drm/intel/pciids.h
··· 893 893 MACRO__(0xD744, ## __VA_ARGS__), \ 894 894 MACRO__(0xD745, ## __VA_ARGS__) 895 895 896 + /* CRI */ 897 + #define INTEL_CRI_IDS(MACRO__, ...) \ 898 + MACRO__(0x674C, ## __VA_ARGS__) 899 + 896 900 #endif /* __PCIIDS_H__ */