Merge tag 'drm-xe-next-2025-06-18' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next

+48

Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon

··· 148 148 Description: RO. Fan 3 speed in RPM. 149 149 150 150 Only supported for particular Intel Xe graphics platforms. 151 + 152 + What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_cap 153 + Date: May 2025 154 + KernelVersion: 6.15 155 + Contact: intel-xe@lists.freedesktop.org 156 + Description: RW. Card burst (PL2) power limit in microwatts. 157 + 158 + The power controller will throttle the operating frequency 159 + if the power averaged over a window (typically milli seconds) 160 + exceeds this limit. A read value of 0 means that the PL2 161 + power limit is disabled, writing 0 disables the limit. 162 + PL2 is greater than PL1 and its time window is lesser 163 + compared to PL1. 164 + 165 + Only supported for particular Intel Xe graphics platforms. 166 + 167 + What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power2_cap 168 + Date: May 2025 169 + KernelVersion: 6.15 170 + Contact: intel-xe@lists.freedesktop.org 171 + Description: RW. Package burst (PL2) power limit in microwatts. 172 + 173 + The power controller will throttle the operating frequency 174 + if the power averaged over a window (typically milli seconds) 175 + exceeds this limit. A read value of 0 means that the PL2 176 + power limit is disabled, writing 0 disables the limit. 177 + PL2 is greater than PL1 and its time window is lesser 178 + compared to PL1. 179 + 180 + Only supported for particular Intel Xe graphics platforms. 181 + 182 + What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_cap_interval 183 + Date: May 2025 184 + KernelVersion: 6.15 185 + Contact: intel-xe@lists.freedesktop.org 186 + Description: RW. Card burst power limit interval (Tau in PL2/Tau) in 187 + milliseconds over which sustained power is averaged. 188 + 189 + Only supported for particular Intel Xe graphics platforms. 190 + 191 + What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power2_cap_interval 192 + Date: May 2025 193 + KernelVersion: 6.15 194 + Contact: intel-xe@lists.freedesktop.org 195 + Description: RW. Package burst power limit interval (Tau in PL2/Tau) in 196 + milliseconds over which sustained power is averaged. 197 + 198 + Only supported for particular Intel Xe graphics platforms.

+8 -2

Documentation/gpu/xe/xe_configfs.rst

··· 2 2 3 3 .. _xe_configfs: 4 4 5 - ============ 5 + =========== 6 6 Xe Configfs 7 - ============ 7 + =========== 8 8 9 9 .. kernel-doc:: drivers/gpu/drm/xe/xe_configfs.c 10 10 :doc: Xe Configfs 11 + 12 + Internal API 13 + ============ 14 + 15 + .. kernel-doc:: drivers/gpu/drm/xe/xe_configfs.c 16 + :internal:

+34

drivers/gpu/drm/drm_gpusvm.c

··· 981 981 #endif 982 982 983 983 /** 984 + * drm_gpusvm_find_vma_start() - Find start address for first VMA in range 985 + * @gpusvm: Pointer to the GPU SVM structure 986 + * @start: The inclusive start user address. 987 + * @end: The exclusive end user address. 988 + * 989 + * Returns: The start address of first VMA within the provided range, 990 + * ULONG_MAX otherwise. Assumes start_addr < end_addr. 991 + */ 992 + unsigned long 993 + drm_gpusvm_find_vma_start(struct drm_gpusvm *gpusvm, 994 + unsigned long start, 995 + unsigned long end) 996 + { 997 + struct mm_struct *mm = gpusvm->mm; 998 + struct vm_area_struct *vma; 999 + unsigned long addr = ULONG_MAX; 1000 + 1001 + if (!mmget_not_zero(mm)) 1002 + return addr; 1003 + 1004 + mmap_read_lock(mm); 1005 + 1006 + vma = find_vma_intersection(mm, start, end); 1007 + if (vma) 1008 + addr = vma->vm_start; 1009 + 1010 + mmap_read_unlock(mm); 1011 + mmput(mm); 1012 + 1013 + return addr; 1014 + } 1015 + EXPORT_SYMBOL_GPL(drm_gpusvm_find_vma_start); 1016 + 1017 + /** 984 1018 * drm_gpusvm_range_find_or_insert() - Find or insert GPU SVM range 985 1019 * @gpusvm: Pointer to the GPU SVM structure 986 1020 * @fault_addr: Fault address

+3 -3

drivers/gpu/drm/xe/Kconfig

··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 2 config DRM_XE 3 - tristate "Intel Xe Graphics" 3 + tristate "Intel Xe2 Graphics" 4 4 depends on DRM && PCI && (m || (y && KUNIT=y)) 5 5 depends on INTEL_VSEC || !INTEL_VSEC 6 6 depends on X86_PLATFORM_DEVICES || !(X86 && ACPI) ··· 31 31 select ACPI_VIDEO if X86 && ACPI 32 32 select ACPI_WMI if X86 && ACPI 33 33 select SYNC_FILE 34 - select IOSF_MBI 35 34 select CRC32 36 35 select SND_HDA_I915 if SND_HDA_CORE 37 36 select CEC_CORE if CEC_NOTIFIER ··· 45 46 select AUXILIARY_BUS 46 47 select HMM_MIRROR 47 48 help 48 - Experimental driver for Intel Xe series GPUs 49 + Driver for Intel Xe2 series GPUs and later. Experimental support 50 + for Xe series is also available. 49 51 50 52 If "M" is selected, the module will be called xe. 51 53

+8 -3

drivers/gpu/drm/xe/Kconfig.debug

··· 86 86 87 87 If in doubt, say "N". 88 88 89 - config DRM_XE_LARGE_GUC_BUFFER 90 - bool "Enable larger guc log buffer" 89 + config DRM_XE_DEBUG_GUC 90 + bool "Enable extra GuC related debug options" 91 + depends on DRM_XE_DEBUG 91 92 default n 93 + select STACKDEPOT 92 94 help 93 95 Choose this option when debugging guc issues. 94 - Buffer should be large enough for complex issues. 96 + The GuC log buffer is increased to the maximum allowed, which should 97 + be large enough for complex issues. The tracking of FAST_REQ messages 98 + is extended to include a record of the calling stack, which is then 99 + dumped on a FAST_REQ error notification. 95 100 96 101 Recommended for driver developers only. 97 102

+2 -1

drivers/gpu/drm/xe/Makefile

··· 139 139 xe_guc_relay.o \ 140 140 xe_memirq.o \ 141 141 xe_sriov.o \ 142 - xe_sriov_vf.o 142 + xe_sriov_vf.o \ 143 + xe_tile_sriov_vf.o 143 144 144 145 xe-$(CONFIG_PCI_IOV) += \ 145 146 xe_gt_sriov_pf.o \

+31

drivers/gpu/drm/xe/abi/guc_actions_abi.h

··· 161 161 XE_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8, 162 162 }; 163 163 164 + enum xe_guc_register_context_param_offsets { 165 + XE_GUC_REGISTER_CONTEXT_DATA_0_MBZ = 0, 166 + XE_GUC_REGISTER_CONTEXT_DATA_1_FLAGS, 167 + XE_GUC_REGISTER_CONTEXT_DATA_2_CONTEXT_INDEX, 168 + XE_GUC_REGISTER_CONTEXT_DATA_3_ENGINE_CLASS, 169 + XE_GUC_REGISTER_CONTEXT_DATA_4_ENGINE_SUBMIT_MASK, 170 + XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER, 171 + XE_GUC_REGISTER_CONTEXT_DATA_6_WQ_DESC_ADDR_UPPER, 172 + XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER, 173 + XE_GUC_REGISTER_CONTEXT_DATA_8_WQ_BUF_BASE_UPPER, 174 + XE_GUC_REGISTER_CONTEXT_DATA_9_WQ_BUF_SIZE, 175 + XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR, 176 + XE_GUC_REGISTER_CONTEXT_MSG_LEN, 177 + }; 178 + 179 + enum xe_guc_register_context_multi_lrc_param_offsets { 180 + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_0_MBZ = 0, 181 + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_1_FLAGS, 182 + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_2_PARENT_CONTEXT, 183 + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_3_ENGINE_CLASS, 184 + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_4_ENGINE_SUBMIT_MASK, 185 + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER, 186 + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_6_WQ_DESC_ADDR_UPPER, 187 + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER, 188 + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_8_WQ_BUF_BASE_UPPER, 189 + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_9_WQ_BUF_SIZE, 190 + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS, 191 + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR, 192 + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_MSG_MIN_LEN = 11, 193 + }; 194 + 164 195 enum xe_guc_report_status { 165 196 XE_GUC_REPORT_STATUS_UNKNOWN = 0x0, 166 197 XE_GUC_REPORT_STATUS_ACKED = 0x1,

+11 -3

drivers/gpu/drm/xe/abi/guc_errors_abi.h

··· 6 6 #ifndef _ABI_GUC_ERRORS_ABI_H 7 7 #define _ABI_GUC_ERRORS_ABI_H 8 8 9 - enum xe_guc_response_status { 10 - XE_GUC_RESPONSE_STATUS_SUCCESS = 0x0, 9 + enum xe_guc_response { 11 10 XE_GUC_RESPONSE_ERROR_PROTOCOL = 0x04, 12 11 XE_GUC_RESPONSE_INVALID_STATE = 0x0A, 13 12 XE_GUC_RESPONSE_UNSUPPORTED_VERSION = 0x0B, ··· 20 21 XE_GUC_RESPONSE_CANNOT_COMPLETE_ACTION = 0x41, 21 22 XE_GUC_RESPONSE_INVALID_KLV_DATA = 0x50, 22 23 XE_GUC_RESPONSE_INVALID_PARAMS = 0x60, 24 + XE_GUC_RESPONSE_INVALID_CONTEXT_INDEX = 0x61, 25 + XE_GUC_RESPONSE_INVALID_CONTEXT_REGISTRATION = 0x62, 26 + XE_GUC_RESPONSE_INVALID_DOORBELL_ID = 0x63, 27 + XE_GUC_RESPONSE_INVALID_ENGINE_ID = 0x64, 23 28 XE_GUC_RESPONSE_INVALID_BUFFER_RANGE = 0x70, 24 29 XE_GUC_RESPONSE_INVALID_BUFFER = 0x71, 30 + XE_GUC_RESPONSE_BUFFER_ALREADY_REGISTERED = 0x72, 25 31 XE_GUC_RESPONSE_INVALID_GGTT_ADDRESS = 0x80, 26 32 XE_GUC_RESPONSE_PENDING_ACTION = 0x90, 33 + XE_GUC_RESPONSE_CONTEXT_NOT_REGISTERED = 0x100, 34 + XE_GUC_RESPONSE_CONTEXT_ALREADY_REGISTERED = 0X101, 27 35 XE_GUC_RESPONSE_INVALID_SIZE = 0x102, 28 36 XE_GUC_RESPONSE_MALFORMED_KLV = 0x103, 37 + XE_GUC_RESPONSE_INVALID_CONTEXT = 0x104, 29 38 XE_GUC_RESPONSE_INVALID_KLV_KEY = 0x105, 30 39 XE_GUC_RESPONSE_DATA_TOO_LARGE = 0x106, 31 40 XE_GUC_RESPONSE_VF_MIGRATED = 0x107, ··· 47 40 XE_GUC_RESPONSE_CTB_NOT_REGISTERED = 0x304, 48 41 XE_GUC_RESPONSE_CTB_IN_USE = 0x305, 49 42 XE_GUC_RESPONSE_CTB_INVALID_DESC = 0x306, 43 + XE_GUC_RESPONSE_HW_TIMEOUT = 0x30C, 50 44 XE_GUC_RESPONSE_CTB_SOURCE_INVALID_DESCRIPTOR = 0x30D, 51 45 XE_GUC_RESPONSE_CTB_DESTINATION_INVALID_DESCRIPTOR = 0x30E, 52 46 XE_GUC_RESPONSE_INVALID_CONFIG_STATE = 0x30F, 53 - XE_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000, 47 + XE_GUC_RESPONSE_GENERIC_FAIL = 0xF000, 54 48 }; 55 49 56 50 enum xe_guc_load_status {

+21 -29

drivers/gpu/drm/xe/display/xe_fb_pin.c

··· 23 23 struct xe_device *xe = xe_bo_device(bo); 24 24 struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; 25 25 u32 column, row; 26 + u64 pte = xe_ggtt_encode_pte_flags(ggtt, bo, xe->pat.idx[XE_CACHE_NONE]); 26 27 27 28 /* TODO: Maybe rewrite so we can traverse the bo addresses sequentially, 28 29 * by writing dpt/ggtt in a different order? ··· 33 32 u32 src_idx = src_stride * (height - 1) + column + bo_ofs; 34 33 35 34 for (row = 0; row < height; row++) { 36 - u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, 37 - xe->pat.idx[XE_CACHE_NONE]); 35 + u64 addr = xe_bo_addr(bo, src_idx * XE_PAGE_SIZE, XE_PAGE_SIZE); 38 36 39 - iosys_map_wr(map, *dpt_ofs, u64, pte); 37 + iosys_map_wr(map, *dpt_ofs, u64, pte | addr); 40 38 *dpt_ofs += 8; 41 39 src_idx -= src_stride; 42 40 } ··· 55 55 { 56 56 struct xe_device *xe = xe_bo_device(bo); 57 57 struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; 58 - u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index) 59 - = ggtt->pt_ops->pte_encode_bo; 60 58 u32 column, row; 59 + u64 pte = xe_ggtt_encode_pte_flags(ggtt, bo, xe->pat.idx[XE_CACHE_NONE]); 61 60 62 61 for (row = 0; row < height; row++) { 63 62 u32 src_idx = src_stride * row + bo_ofs; 64 63 65 64 for (column = 0; column < width; column++) { 66 - iosys_map_wr(map, *dpt_ofs, u64, 67 - pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, 68 - xe->pat.idx[XE_CACHE_NONE])); 65 + u64 addr = xe_bo_addr(bo, src_idx * XE_PAGE_SIZE, XE_PAGE_SIZE); 66 + iosys_map_wr(map, *dpt_ofs, u64, pte | addr); 69 67 70 68 *dpt_ofs += 8; 71 69 src_idx++; ··· 127 129 return PTR_ERR(dpt); 128 130 129 131 if (view->type == I915_GTT_VIEW_NORMAL) { 132 + u64 pte = xe_ggtt_encode_pte_flags(ggtt, bo, xe->pat.idx[XE_CACHE_NONE]); 130 133 u32 x; 131 134 132 135 for (x = 0; x < size / XE_PAGE_SIZE; x++) { 133 - u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x * XE_PAGE_SIZE, 134 - xe->pat.idx[XE_CACHE_NONE]); 136 + u64 addr = xe_bo_addr(bo, x * XE_PAGE_SIZE, XE_PAGE_SIZE); 135 137 136 - iosys_map_wr(&dpt->vmap, x * 8, u64, pte); 138 + iosys_map_wr(&dpt->vmap, x * 8, u64, pte | addr); 137 139 } 138 140 } else if (view->type == I915_GTT_VIEW_REMAPPED) { 139 141 const struct intel_remapped_info *remap_info = &view->remapped; ··· 171 173 { 172 174 struct xe_device *xe = xe_bo_device(bo); 173 175 u32 column, row; 176 + u64 pte = ggtt->pt_ops->pte_encode_flags(bo, xe->pat.idx[XE_CACHE_NONE]); 174 177 175 178 for (column = 0; column < width; column++) { 176 179 u32 src_idx = src_stride * (height - 1) + column + bo_ofs; 177 180 178 181 for (row = 0; row < height; row++) { 179 - u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, 180 - xe->pat.idx[XE_CACHE_NONE]); 182 + u64 addr = xe_bo_addr(bo, src_idx * XE_PAGE_SIZE, XE_PAGE_SIZE); 181 183 182 - ggtt->pt_ops->ggtt_set_pte(ggtt, *ggtt_ofs, pte); 184 + ggtt->pt_ops->ggtt_set_pte(ggtt, *ggtt_ofs, pte | addr); 183 185 *ggtt_ofs += XE_PAGE_SIZE; 184 186 src_idx -= src_stride; 185 187 } ··· 197 199 struct drm_gem_object *obj = intel_fb_bo(&fb->base); 198 200 struct xe_bo *bo = gem_to_xe_bo(obj); 199 201 struct xe_device *xe = to_xe_device(fb->base.dev); 200 - struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; 202 + struct xe_tile *tile0 = xe_device_get_root_tile(xe); 203 + struct xe_ggtt *ggtt = tile0->mem.ggtt; 201 204 u32 align; 202 205 int ret; 203 206 204 207 /* TODO: Consider sharing framebuffer mapping? 205 208 * embed i915_vma inside intel_framebuffer 206 209 */ 207 - xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); 210 + xe_pm_runtime_get_noresume(xe); 208 211 ret = mutex_lock_interruptible(&ggtt->lock); 209 212 if (ret) 210 213 goto out; ··· 214 215 if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) 215 216 align = max_t(u32, align, SZ_64K); 216 217 217 - if (bo->ggtt_node[ggtt->tile->id] && view->type == I915_GTT_VIEW_NORMAL) { 218 - vma->node = bo->ggtt_node[ggtt->tile->id]; 218 + if (bo->ggtt_node[tile0->id] && view->type == I915_GTT_VIEW_NORMAL) { 219 + vma->node = bo->ggtt_node[tile0->id]; 219 220 } else if (view->type == I915_GTT_VIEW_NORMAL) { 220 - u32 x, size = bo->ttm.base.size; 221 - 222 221 vma->node = xe_ggtt_node_init(ggtt); 223 222 if (IS_ERR(vma->node)) { 224 223 ret = PTR_ERR(vma->node); 225 224 goto out_unlock; 226 225 } 227 226 228 - ret = xe_ggtt_node_insert_locked(vma->node, size, align, 0); 227 + ret = xe_ggtt_node_insert_locked(vma->node, bo->size, align, 0); 229 228 if (ret) { 230 229 xe_ggtt_node_fini(vma->node); 231 230 goto out_unlock; 232 231 } 233 232 234 - for (x = 0; x < size; x += XE_PAGE_SIZE) { 235 - u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x, 236 - xe->pat.idx[XE_CACHE_NONE]); 237 - 238 - ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node->base.start + x, pte); 239 - } 233 + xe_ggtt_map_bo(ggtt, vma->node, bo, xe->pat.idx[XE_CACHE_NONE]); 240 234 } else { 241 235 u32 i, ggtt_ofs; 242 236 const struct intel_rotation_info *rot_info = &view->rotated; ··· 263 271 out_unlock: 264 272 mutex_unlock(&ggtt->lock); 265 273 out: 266 - xe_pm_runtime_put(tile_to_xe(ggtt->tile)); 274 + xe_pm_runtime_put(xe); 267 275 return ret; 268 276 } 269 277 ··· 340 348 341 349 static void __xe_unpin_fb_vma(struct i915_vma *vma) 342 350 { 343 - u8 tile_id = vma->node->ggtt->tile->id; 351 + u8 tile_id = xe_device_get_root_tile(xe_bo_device(vma->bo))->id; 344 352 345 353 if (!refcount_dec_and_test(&vma->ref)) 346 354 return;

+1 -5

drivers/gpu/drm/xe/display/xe_plane_initial.c

··· 87 87 88 88 base = round_down(plane_config->base, page_size); 89 89 if (IS_DGFX(xe)) { 90 - u64 __iomem *gte = tile0->mem.ggtt->gsm; 91 - u64 pte; 90 + u64 pte = xe_ggtt_read_pte(tile0->mem.ggtt, base); 92 91 93 - gte += base / XE_PAGE_SIZE; 94 - 95 - pte = ioread64(gte); 96 92 if (!(pte & XE_GGTT_PTE_DM)) { 97 93 drm_err(&xe->drm, 98 94 "Initial plane programming missing DM bit\n");

+1

drivers/gpu/drm/xe/regs/xe_mchbar_regs.h

··· 40 40 #define PCU_CR_PACKAGE_RAPL_LIMIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0) 41 41 #define PWR_LIM_VAL REG_GENMASK(14, 0) 42 42 #define PWR_LIM_EN REG_BIT(15) 43 + #define PWR_LIM REG_GENMASK(15, 0) 43 44 #define PWR_LIM_TIME REG_GENMASK(23, 17) 44 45 #define PWR_LIM_TIME_X REG_GENMASK(23, 22) 45 46 #define PWR_LIM_TIME_Y REG_GENMASK(21, 17)

+3

drivers/gpu/drm/xe/regs/xe_oa_regs.h

··· 97 97 #define OAM_STATUS(base) XE_REG((base) + OAM_STATUS_OFFSET) 98 98 #define OAM_MMIO_TRG(base) XE_REG((base) + OAM_MMIO_TRG_OFFSET) 99 99 100 + #define OAM_COMPRESSION_T3_CONTROL XE_REG(0x1c2e00) 101 + #define OAM_LAT_MEASURE_ENABLE REG_BIT(4) 102 + 100 103 #endif

-2

drivers/gpu/drm/xe/regs/xe_pcode_regs.h

··· 18 18 #define PVC_GT0_PLATFORM_ENERGY_STATUS XE_REG(0x28106c) 19 19 #define PVC_GT0_PACKAGE_POWER_SKU XE_REG(0x281080) 20 20 21 - #define BMG_PACKAGE_ENERGY_STATUS XE_REG(0x138120) 22 21 #define BMG_FAN_1_SPEED XE_REG(0x138140) 23 22 #define BMG_FAN_2_SPEED XE_REG(0x138170) 24 23 #define BMG_FAN_3_SPEED XE_REG(0x1381a0) 25 24 #define BMG_VRAM_TEMPERATURE XE_REG(0x1382c0) 26 25 #define BMG_PACKAGE_TEMPERATURE XE_REG(0x138434) 27 - #define BMG_PLATFORM_ENERGY_STATUS XE_REG(0x138458) 28 26 29 27 #endif /* _XE_PCODE_REGS_H_ */

+5

drivers/gpu/drm/xe/regs/xe_pmt.h

··· 10 10 #define BMG_PMT_BASE_OFFSET 0xDB000 11 11 #define BMG_DISCOVERY_OFFSET (SOC_BASE + BMG_PMT_BASE_OFFSET) 12 12 13 + #define PUNIT_TELEMETRY_GUID XE_REG(BMG_DISCOVERY_OFFSET + 0x4) 14 + #define BMG_ENERGY_STATUS_PMT_OFFSET (0x30) 15 + #define ENERGY_PKG REG_GENMASK64(31, 0) 16 + #define ENERGY_CARD REG_GENMASK64(63, 32) 17 + 13 18 #define BMG_TELEMETRY_BASE_OFFSET 0xE0000 14 19 #define BMG_TELEMETRY_OFFSET (SOC_BASE + BMG_TELEMETRY_BASE_OFFSET) 15 20

+2 -2

drivers/gpu/drm/xe/tests/xe_bo.c

··· 514 514 * other way around, they may not be subject to swapping... 515 515 */ 516 516 if (alloced < purgeable) { 517 - xe_ttm_tt_account_subtract(&xe_tt->ttm); 517 + xe_ttm_tt_account_subtract(xe, &xe_tt->ttm); 518 518 xe_tt->purgeable = true; 519 - xe_ttm_tt_account_add(&xe_tt->ttm); 519 + xe_ttm_tt_account_add(xe, &xe_tt->ttm); 520 520 bo->ttm.priority = 0; 521 521 spin_lock(&bo->ttm.bdev->lru_lock); 522 522 ttm_bo_move_to_lru_tail(&bo->ttm);

+5 -6

drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c

··· 42 42 KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo->ggtt_node[tile->id]); 43 43 44 44 KUNIT_ASSERT_EQ(test, 0, 45 - drm_mm_insert_node_in_range(&ggtt->mm, 46 - &bo->ggtt_node[tile->id]->base, 47 - bo->size, SZ_4K, 48 - 0, 0, U64_MAX, 0)); 45 + xe_ggtt_node_insert(bo->ggtt_node[tile->id], 46 + bo->size, SZ_4K)); 49 47 } 50 48 51 49 return bo; ··· 65 67 ggtt = xe_device_get_root_tile(test->priv)->mem.ggtt; 66 68 guc = &xe_device_get_gt(test->priv, 0)->uc.guc; 67 69 68 - drm_mm_init(&ggtt->mm, DUT_GGTT_START, DUT_GGTT_SIZE); 69 - mutex_init(&ggtt->lock); 70 + KUNIT_ASSERT_EQ(test, 0, 71 + xe_ggtt_init_kunit(ggtt, DUT_GGTT_START, 72 + DUT_GGTT_START + DUT_GGTT_SIZE)); 70 73 71 74 kunit_activate_static_stub(test, xe_managed_bo_create_pin_map, 72 75 replacement_xe_managed_bo_create_pin_map);

+34 -35

drivers/gpu/drm/xe/tests/xe_pci.c

··· 12 12 #include <kunit/test-bug.h> 13 13 #include <kunit/visibility.h> 14 14 15 - /** 16 - * xe_call_for_each_graphics_ip - Iterate over all recognized graphics IPs 17 - * @xe_fn: Function to call for each device. 18 - * 19 - * This function iterates over the descriptors for all graphics IPs recognized 20 - * by the driver and calls @xe_fn: for each one of them. 21 - */ 22 - void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn) 15 + static void xe_ip_kunit_desc(const struct xe_ip *param, char *desc) 23 16 { 24 - const struct xe_graphics_desc *desc, *last = NULL; 25 - 26 - for (int i = 0; i < ARRAY_SIZE(graphics_ips); i++) { 27 - desc = graphics_ips[i].desc; 28 - if (desc == last) 29 - continue; 30 - 31 - xe_fn(desc); 32 - last = desc; 33 - } 17 + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%u.%02u %s", 18 + param->verx100 / 100, param->verx100 % 100, param->name); 34 19 } 35 - EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_graphics_ip); 20 + 21 + KUNIT_ARRAY_PARAM(graphics_ip, graphics_ips, xe_ip_kunit_desc); 22 + KUNIT_ARRAY_PARAM(media_ip, media_ips, xe_ip_kunit_desc); 36 23 37 24 /** 38 - * xe_call_for_each_media_ip - Iterate over all recognized media IPs 39 - * @xe_fn: Function to call for each device. 25 + * xe_pci_graphics_ip_gen_param - Generate graphics struct xe_ip parameters 26 + * @prev: the pointer to the previous parameter to iterate from or NULL 27 + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE 40 28 * 41 - * This function iterates over the descriptors for all media IPs recognized 42 - * by the driver and calls @xe_fn: for each one of them. 29 + * This function prepares struct xe_ip parameter. 30 + * 31 + * To be used only as a parameter generator function in &KUNIT_CASE_PARAM. 32 + * 33 + * Return: pointer to the next parameter or NULL if no more parameters 43 34 */ 44 - void xe_call_for_each_media_ip(xe_media_fn xe_fn) 35 + const void *xe_pci_graphics_ip_gen_param(const void *prev, char *desc) 45 36 { 46 - const struct xe_media_desc *desc, *last = NULL; 47 - 48 - for (int i = 0; i < ARRAY_SIZE(media_ips); i++) { 49 - desc = media_ips[i].desc; 50 - if (desc == last) 51 - continue; 52 - 53 - xe_fn(desc); 54 - last = desc; 55 - } 37 + return graphics_ip_gen_params(prev, desc); 56 38 } 57 - EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_media_ip); 39 + EXPORT_SYMBOL_IF_KUNIT(xe_pci_graphics_ip_gen_param); 40 + 41 + /** 42 + * xe_pci_media_ip_gen_param - Generate media struct xe_ip parameters 43 + * @prev: the pointer to the previous parameter to iterate from or NULL 44 + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE 45 + * 46 + * This function prepares struct xe_ip parameter. 47 + * 48 + * To be used only as a parameter generator function in &KUNIT_CASE_PARAM. 49 + * 50 + * Return: pointer to the next parameter or NULL if no more parameters 51 + */ 52 + const void *xe_pci_media_ip_gen_param(const void *prev, char *desc) 53 + { 54 + return media_ip_gen_params(prev, desc); 55 + } 56 + EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param); 58 57 59 58 static void fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, 60 59 u32 *ver, u32 *revid)

+8 -16

drivers/gpu/drm/xe/tests/xe_pci_test.c

··· 14 14 #include "xe_pci_test.h" 15 15 #include "xe_pci_types.h" 16 16 17 - static void check_graphics_ip(const struct xe_graphics_desc *graphics) 17 + static void check_graphics_ip(struct kunit *test) 18 18 { 19 - struct kunit *test = kunit_get_current_test(); 19 + const struct xe_ip *param = test->param_value; 20 + const struct xe_graphics_desc *graphics = param->desc; 20 21 u64 mask = graphics->hw_engine_mask; 21 22 22 23 /* RCS, CCS, and BCS engines are allowed on the graphics IP */ ··· 29 28 KUNIT_ASSERT_EQ(test, mask, 0); 30 29 } 31 30 32 - static void check_media_ip(const struct xe_media_desc *media) 31 + static void check_media_ip(struct kunit *test) 33 32 { 34 - struct kunit *test = kunit_get_current_test(); 33 + const struct xe_ip *param = test->param_value; 34 + const struct xe_media_desc *media = param->desc; 35 35 u64 mask = media->hw_engine_mask; 36 36 37 37 /* VCS, VECS and GSCCS engines are allowed on the media IP */ ··· 44 42 KUNIT_ASSERT_EQ(test, mask, 0); 45 43 } 46 44 47 - static void xe_gmdid_graphics_ip(struct kunit *test) 48 - { 49 - xe_call_for_each_graphics_ip(check_graphics_ip); 50 - } 51 - 52 - static void xe_gmdid_media_ip(struct kunit *test) 53 - { 54 - xe_call_for_each_media_ip(check_media_ip); 55 - } 56 - 57 45 static struct kunit_case xe_pci_tests[] = { 58 - KUNIT_CASE(xe_gmdid_graphics_ip), 59 - KUNIT_CASE(xe_gmdid_media_ip), 46 + KUNIT_CASE_PARAM(check_graphics_ip, xe_pci_graphics_ip_gen_param), 47 + KUNIT_CASE_PARAM(check_media_ip, xe_pci_media_ip_gen_param), 60 48 {} 61 49 }; 62 50

+2 -9

drivers/gpu/drm/xe/tests/xe_pci_test.h

··· 12 12 #include "xe_sriov_types.h" 13 13 14 14 struct xe_device; 15 - struct xe_graphics_desc; 16 - struct xe_media_desc; 17 - 18 - typedef int (*xe_device_fn)(struct xe_device *); 19 - typedef void (*xe_graphics_fn)(const struct xe_graphics_desc *); 20 - typedef void (*xe_media_fn)(const struct xe_media_desc *); 21 - 22 - void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn); 23 - void xe_call_for_each_media_ip(xe_media_fn xe_fn); 24 15 25 16 struct xe_pci_fake_data { 26 17 enum xe_sriov_mode sriov_mode; ··· 25 34 26 35 int xe_pci_fake_device_init(struct xe_device *xe); 27 36 37 + const void *xe_pci_graphics_ip_gen_param(const void *prev, char *desc); 38 + const void *xe_pci_media_ip_gen_param(const void *prev, char *desc); 28 39 const void *xe_pci_live_device_gen_param(const void *prev, char *desc); 29 40 30 41 #endif

+39 -30

drivers/gpu/drm/xe/xe_bo.c

··· 336 336 /* struct xe_ttm_tt - Subclassed ttm_tt for xe */ 337 337 struct xe_ttm_tt { 338 338 struct ttm_tt ttm; 339 - /** @xe - The xe device */ 340 - struct xe_device *xe; 341 339 struct sg_table sgt; 342 340 struct sg_table *sg; 343 341 /** @purgeable: Whether the content of the pages of @ttm is purgeable. */ 344 342 bool purgeable; 345 343 }; 346 344 347 - static int xe_tt_map_sg(struct ttm_tt *tt) 345 + static int xe_tt_map_sg(struct xe_device *xe, struct ttm_tt *tt) 348 346 { 349 347 struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); 350 348 unsigned long num_pages = tt->num_pages; ··· 357 359 ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages, 358 360 num_pages, 0, 359 361 (u64)num_pages << PAGE_SHIFT, 360 - xe_sg_segment_size(xe_tt->xe->drm.dev), 362 + xe_sg_segment_size(xe->drm.dev), 361 363 GFP_KERNEL); 362 364 if (ret) 363 365 return ret; 364 366 365 367 xe_tt->sg = &xe_tt->sgt; 366 - ret = dma_map_sgtable(xe_tt->xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL, 368 + ret = dma_map_sgtable(xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL, 367 369 DMA_ATTR_SKIP_CPU_SYNC); 368 370 if (ret) { 369 371 sg_free_table(xe_tt->sg); ··· 374 376 return 0; 375 377 } 376 378 377 - static void xe_tt_unmap_sg(struct ttm_tt *tt) 379 + static void xe_tt_unmap_sg(struct xe_device *xe, struct ttm_tt *tt) 378 380 { 379 381 struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); 380 382 381 383 if (xe_tt->sg) { 382 - dma_unmap_sgtable(xe_tt->xe->drm.dev, xe_tt->sg, 384 + dma_unmap_sgtable(xe->drm.dev, xe_tt->sg, 383 385 DMA_BIDIRECTIONAL, 0); 384 386 sg_free_table(xe_tt->sg); 385 387 xe_tt->sg = NULL; ··· 398 400 * Account ttm pages against the device shrinker's shrinkable and 399 401 * purgeable counts. 400 402 */ 401 - static void xe_ttm_tt_account_add(struct ttm_tt *tt) 403 + static void xe_ttm_tt_account_add(struct xe_device *xe, struct ttm_tt *tt) 402 404 { 403 405 struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); 404 406 405 407 if (xe_tt->purgeable) 406 - xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, 0, tt->num_pages); 408 + xe_shrinker_mod_pages(xe->mem.shrinker, 0, tt->num_pages); 407 409 else 408 - xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, tt->num_pages, 0); 410 + xe_shrinker_mod_pages(xe->mem.shrinker, tt->num_pages, 0); 409 411 } 410 412 411 - static void xe_ttm_tt_account_subtract(struct ttm_tt *tt) 413 + static void xe_ttm_tt_account_subtract(struct xe_device *xe, struct ttm_tt *tt) 412 414 { 413 415 struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); 414 416 415 417 if (xe_tt->purgeable) 416 - xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, 0, -(long)tt->num_pages); 418 + xe_shrinker_mod_pages(xe->mem.shrinker, 0, -(long)tt->num_pages); 417 419 else 418 - xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, -(long)tt->num_pages, 0); 420 + xe_shrinker_mod_pages(xe->mem.shrinker, -(long)tt->num_pages, 0); 419 421 } 420 422 421 423 static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, ··· 434 436 return NULL; 435 437 436 438 tt = &xe_tt->ttm; 437 - xe_tt->xe = xe; 438 439 439 440 extra_pages = 0; 440 441 if (xe_bo_needs_ccs_pages(bo)) ··· 524 527 return err; 525 528 526 529 xe_tt->purgeable = false; 527 - xe_ttm_tt_account_add(tt); 530 + xe_ttm_tt_account_add(ttm_to_xe_device(ttm_dev), tt); 528 531 529 532 return 0; 530 533 } 531 534 532 535 static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt) 533 536 { 537 + struct xe_device *xe = ttm_to_xe_device(ttm_dev); 538 + 534 539 if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) && 535 540 !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE)) 536 541 return; 537 542 538 - xe_tt_unmap_sg(tt); 543 + xe_tt_unmap_sg(xe, tt); 539 544 540 545 ttm_pool_free(&ttm_dev->pool, tt); 541 - xe_ttm_tt_account_subtract(tt); 546 + xe_ttm_tt_account_subtract(xe, tt); 542 547 } 543 548 544 549 static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt) ··· 788 789 /* Bo creation path, moving to system or TT. */ 789 790 if ((!old_mem && ttm) && !handle_system_ccs) { 790 791 if (new_mem->mem_type == XE_PL_TT) 791 - ret = xe_tt_map_sg(ttm); 792 + ret = xe_tt_map_sg(xe, ttm); 792 793 if (!ret) 793 794 ttm_bo_move_null(ttm_bo, new_mem); 794 795 goto out; ··· 811 812 (!ttm && ttm_bo->type == ttm_bo_type_device); 812 813 813 814 if (new_mem->mem_type == XE_PL_TT) { 814 - ret = xe_tt_map_sg(ttm); 815 + ret = xe_tt_map_sg(xe, ttm); 815 816 if (ret) 816 817 goto out; 817 818 } ··· 957 958 if (timeout < 0) 958 959 ret = timeout; 959 960 960 - xe_tt_unmap_sg(ttm_bo->ttm); 961 + xe_tt_unmap_sg(xe, ttm_bo->ttm); 961 962 } 962 963 963 964 return ret; ··· 967 968 struct ttm_buffer_object *bo, 968 969 unsigned long *scanned) 969 970 { 971 + struct xe_device *xe = ttm_to_xe_device(bo->bdev); 970 972 long lret; 971 973 972 974 /* Fake move to system, without copying data. */ ··· 982 982 if (lret) 983 983 return lret; 984 984 985 - xe_tt_unmap_sg(bo->ttm); 985 + xe_tt_unmap_sg(xe, bo->ttm); 986 986 ttm_bo_move_null(bo, new_resource); 987 987 } 988 988 ··· 993 993 .allow_move = false}); 994 994 995 995 if (lret > 0) 996 - xe_ttm_tt_account_subtract(bo->ttm); 996 + xe_ttm_tt_account_subtract(xe, bo->ttm); 997 997 998 998 return lret; 999 999 } ··· 1043 1043 struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); 1044 1044 struct ttm_place place = {.mem_type = bo->resource->mem_type}; 1045 1045 struct xe_bo *xe_bo = ttm_to_xe_bo(bo); 1046 - struct xe_device *xe = xe_tt->xe; 1046 + struct xe_device *xe = ttm_to_xe_device(bo->bdev); 1047 1047 bool needs_rpm; 1048 1048 long lret = 0L; 1049 1049 ··· 1080 1080 xe_pm_runtime_put(xe); 1081 1081 1082 1082 if (lret > 0) 1083 - xe_ttm_tt_account_subtract(tt); 1083 + xe_ttm_tt_account_subtract(xe, tt); 1084 1084 1085 1085 out_unref: 1086 1086 xe_bo_put(xe_bo); ··· 1381 1381 ttm_bo->sg = NULL; 1382 1382 xe_tt->sg = NULL; 1383 1383 } else if (xe_tt->sg) { 1384 - dma_unmap_sgtable(xe_tt->xe->drm.dev, xe_tt->sg, 1384 + dma_unmap_sgtable(ttm_to_xe_device(ttm_bo->bdev)->drm.dev, 1385 + xe_tt->sg, 1385 1386 DMA_BIDIRECTIONAL, 0); 1386 1387 sg_free_table(xe_tt->sg); 1387 1388 xe_tt->sg = NULL; ··· 2294 2293 2295 2294 ttm_bo_pin(&bo->ttm); 2296 2295 if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) 2297 - xe_ttm_tt_account_subtract(bo->ttm.ttm); 2296 + xe_ttm_tt_account_subtract(xe, bo->ttm.ttm); 2298 2297 2299 2298 /* 2300 2299 * FIXME: If we always use the reserve / unreserve functions for locking ··· 2342 2341 2343 2342 ttm_bo_pin(&bo->ttm); 2344 2343 if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) 2345 - xe_ttm_tt_account_subtract(bo->ttm.ttm); 2344 + xe_ttm_tt_account_subtract(xe, bo->ttm.ttm); 2346 2345 2347 2346 /* 2348 2347 * FIXME: If we always use the reserve / unreserve functions for locking ··· 2378 2377 2379 2378 ttm_bo_unpin(&bo->ttm); 2380 2379 if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) 2381 - xe_ttm_tt_account_add(bo->ttm.ttm); 2380 + xe_ttm_tt_account_add(xe, bo->ttm.ttm); 2382 2381 2383 2382 /* 2384 2383 * FIXME: If we always use the reserve / unreserve functions for locking ··· 2410 2409 } 2411 2410 ttm_bo_unpin(&bo->ttm); 2412 2411 if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) 2413 - xe_ttm_tt_account_add(bo->ttm.ttm); 2412 + xe_ttm_tt_account_add(xe, bo->ttm.ttm); 2414 2413 } 2415 2414 2416 2415 /** ··· 2993 2992 if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM)) 2994 2993 return false; 2995 2994 2995 + /* 2996 + * Compression implies coh_none, therefore we know for sure that WB 2997 + * memory can't currently use compression, which is likely one of the 2998 + * common cases. 2999 + */ 3000 + if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB) 3001 + return false; 3002 + 2996 3003 return true; 2997 3004 } 2998 3005 ··· 3076 3067 #endif 3077 3068 for_each_tile(tile, xe_bo_device(bo), id) 3078 3069 if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt) 3079 - might_lock(&bo->ggtt_node[id]->ggtt->lock); 3070 + xe_ggtt_might_lock(bo->ggtt_node[id]->ggtt); 3080 3071 drm_gem_object_put(&bo->ttm.base); 3081 3072 } 3082 3073 }

+1 -3

drivers/gpu/drm/xe/xe_bo_evict.c

··· 197 197 if (tile != bo->tile && !(bo->flags & XE_BO_FLAG_GGTTx(tile))) 198 198 continue; 199 199 200 - mutex_lock(&tile->mem.ggtt->lock); 201 - xe_ggtt_map_bo(tile->mem.ggtt, bo); 202 - mutex_unlock(&tile->mem.ggtt->lock); 200 + xe_ggtt_map_bo_unlocked(tile->mem.ggtt, bo); 203 201 } 204 202 } 205 203

+160

drivers/gpu/drm/xe/xe_configfs.c

··· 3 3 * Copyright © 2025 Intel Corporation 4 4 */ 5 5 6 + #include <linux/bitops.h> 6 7 #include <linux/configfs.h> 8 + #include <linux/find.h> 7 9 #include <linux/init.h> 8 10 #include <linux/module.h> 9 11 #include <linux/pci.h> 12 + #include <linux/string.h> 10 13 11 14 #include "xe_configfs.h" 12 15 #include "xe_module.h" 16 + 17 + #include "xe_hw_engine_types.h" 13 18 14 19 /** 15 20 * DOC: Xe Configfs ··· 53 48 * # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode 54 49 * # echo 0000:03:00.0 > /sys/bus/pci/drivers/xe/bind (Enters survivability mode if supported) 55 50 * 51 + * Allowed engines: 52 + * ---------------- 53 + * 54 + * Allow only a set of engine(s) to be available, disabling the other engines 55 + * even if they are available in hardware. This is applied after HW fuses are 56 + * considered on each tile. Examples: 57 + * 58 + * Allow only one render and one copy engines, nothing else:: 59 + * 60 + * # echo 'rcs0,bcs0' > /sys/kernel/config/xe/0000:03:00.0/engines_allowed 61 + * 62 + * Allow only compute engines and first copy engine:: 63 + * 64 + * # echo 'ccs*,bcs0' > /sys/kernel/config/xe/0000:03:00.0/engines_allowed 65 + * 66 + * Note that the engine names are the per-GT hardware names. On multi-tile 67 + * platforms, writing ``rcs0,bcs0`` to this file would allow the first render 68 + * and copy engines on each tile. 69 + * 70 + * The requested configuration may not be supported by the platform and driver 71 + * may fail to probe. For example: if at least one copy engine is expected to be 72 + * available for migrations, but it's disabled. This is intended for debugging 73 + * purposes only. 74 + * 56 75 * Remove devices 57 76 * ============== 58 77 * ··· 89 60 struct config_group group; 90 61 91 62 bool survivability_mode; 63 + u64 engines_allowed; 92 64 93 65 /* protects attributes */ 94 66 struct mutex lock; 67 + }; 68 + 69 + struct engine_info { 70 + const char *cls; 71 + u64 mask; 72 + }; 73 + 74 + /* Some helpful macros to aid on the sizing of buffer allocation when parsing */ 75 + #define MAX_ENGINE_CLASS_CHARS 5 76 + #define MAX_ENGINE_INSTANCE_CHARS 2 77 + 78 + static const struct engine_info engine_info[] = { 79 + { .cls = "rcs", .mask = XE_HW_ENGINE_RCS_MASK }, 80 + { .cls = "bcs", .mask = XE_HW_ENGINE_BCS_MASK }, 81 + { .cls = "vcs", .mask = XE_HW_ENGINE_VCS_MASK }, 82 + { .cls = "vecs", .mask = XE_HW_ENGINE_VECS_MASK }, 83 + { .cls = "ccs", .mask = XE_HW_ENGINE_CCS_MASK }, 84 + { .cls = "gsccs", .mask = XE_HW_ENGINE_GSCCS_MASK }, 95 85 }; 96 86 97 87 static struct xe_config_device *to_xe_config_device(struct config_item *item) ··· 142 94 return len; 143 95 } 144 96 97 + static ssize_t engines_allowed_show(struct config_item *item, char *page) 98 + { 99 + struct xe_config_device *dev = to_xe_config_device(item); 100 + char *p = page; 101 + 102 + for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) { 103 + u64 mask = engine_info[i].mask; 104 + 105 + if ((dev->engines_allowed & mask) == mask) { 106 + p += sprintf(p, "%s*\n", engine_info[i].cls); 107 + } else if (mask & dev->engines_allowed) { 108 + u16 bit0 = __ffs64(mask), bit; 109 + 110 + mask &= dev->engines_allowed; 111 + 112 + for_each_set_bit(bit, (const unsigned long *)&mask, 64) 113 + p += sprintf(p, "%s%u\n", engine_info[i].cls, 114 + bit - bit0); 115 + } 116 + } 117 + 118 + return p - page; 119 + } 120 + 121 + static bool lookup_engine_mask(const char *pattern, u64 *mask) 122 + { 123 + for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) { 124 + u8 instance; 125 + u16 bit; 126 + 127 + if (!str_has_prefix(pattern, engine_info[i].cls)) 128 + continue; 129 + 130 + pattern += strlen(engine_info[i].cls); 131 + 132 + if (!strcmp(pattern, "*")) { 133 + *mask = engine_info[i].mask; 134 + return true; 135 + } 136 + 137 + if (kstrtou8(pattern, 10, &instance)) 138 + return false; 139 + 140 + bit = __ffs64(engine_info[i].mask) + instance; 141 + if (bit >= fls64(engine_info[i].mask)) 142 + return false; 143 + 144 + *mask = BIT_ULL(bit); 145 + return true; 146 + } 147 + 148 + return false; 149 + } 150 + 151 + static ssize_t engines_allowed_store(struct config_item *item, const char *page, 152 + size_t len) 153 + { 154 + struct xe_config_device *dev = to_xe_config_device(item); 155 + size_t patternlen, p; 156 + u64 mask, val = 0; 157 + 158 + for (p = 0; p < len; p += patternlen + 1) { 159 + char buf[MAX_ENGINE_CLASS_CHARS + MAX_ENGINE_INSTANCE_CHARS + 1]; 160 + 161 + patternlen = strcspn(page + p, ",\n"); 162 + if (patternlen >= sizeof(buf)) 163 + return -EINVAL; 164 + 165 + memcpy(buf, page + p, patternlen); 166 + buf[patternlen] = '\0'; 167 + 168 + if (!lookup_engine_mask(buf, &mask)) 169 + return -EINVAL; 170 + 171 + val |= mask; 172 + } 173 + 174 + mutex_lock(&dev->lock); 175 + dev->engines_allowed = val; 176 + mutex_unlock(&dev->lock); 177 + 178 + return len; 179 + } 180 + 145 181 CONFIGFS_ATTR(, survivability_mode); 182 + CONFIGFS_ATTR(, engines_allowed); 146 183 147 184 static struct configfs_attribute *xe_config_device_attrs[] = { 148 185 &attr_survivability_mode, 186 + &attr_engines_allowed, 149 187 NULL, 150 188 }; 151 189 ··· 272 138 dev = kzalloc(sizeof(*dev), GFP_KERNEL); 273 139 if (!dev) 274 140 return ERR_PTR(-ENOMEM); 141 + 142 + /* Default values */ 143 + dev->engines_allowed = U64_MAX; 275 144 276 145 config_group_init_type_name(&dev->group, name, &xe_config_device_type); 277 146 ··· 361 224 mutex_unlock(&dev->lock); 362 225 363 226 config_item_put(&dev->group.cg_item); 227 + } 228 + 229 + /** 230 + * xe_configfs_get_engines_allowed - get engine allowed mask from configfs 231 + * @pdev: pci device 232 + * 233 + * Find the configfs group that belongs to the pci device and return 234 + * the mask of engines allowed to be used. 235 + * 236 + * Return: engine mask with allowed engines 237 + */ 238 + u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev) 239 + { 240 + struct xe_config_device *dev = configfs_find_group(pdev); 241 + u64 engines_allowed; 242 + 243 + if (!dev) 244 + return U64_MAX; 245 + 246 + engines_allowed = dev->engines_allowed; 247 + config_item_put(&dev->group.cg_item); 248 + 249 + return engines_allowed; 364 250 } 365 251 366 252 int __init xe_configfs_init(void)

+7 -4

drivers/gpu/drm/xe/xe_configfs.h

··· 5 5 #ifndef _XE_CONFIGFS_H_ 6 6 #define _XE_CONFIGFS_H_ 7 7 8 + #include <linux/limits.h> 8 9 #include <linux/types.h> 9 10 10 11 struct pci_dev; ··· 15 14 void xe_configfs_exit(void); 16 15 bool xe_configfs_get_survivability_mode(struct pci_dev *pdev); 17 16 void xe_configfs_clear_survivability_mode(struct pci_dev *pdev); 17 + u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev); 18 18 #else 19 - static inline int xe_configfs_init(void) { return 0; }; 20 - static inline void xe_configfs_exit(void) {}; 21 - static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; }; 22 - static inline void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) {}; 19 + static inline int xe_configfs_init(void) { return 0; } 20 + static inline void xe_configfs_exit(void) { } 21 + static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; } 22 + static inline void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) { } 23 + static inline u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev) { return U64_MAX; } 23 24 #endif 24 25 25 26 #endif

+38

drivers/gpu/drm/xe/xe_debugfs.c

··· 191 191 .write = wedged_mode_set, 192 192 }; 193 193 194 + static ssize_t atomic_svm_timeslice_ms_show(struct file *f, char __user *ubuf, 195 + size_t size, loff_t *pos) 196 + { 197 + struct xe_device *xe = file_inode(f)->i_private; 198 + char buf[32]; 199 + int len = 0; 200 + 201 + len = scnprintf(buf, sizeof(buf), "%d\n", xe->atomic_svm_timeslice_ms); 202 + 203 + return simple_read_from_buffer(ubuf, size, pos, buf, len); 204 + } 205 + 206 + static ssize_t atomic_svm_timeslice_ms_set(struct file *f, 207 + const char __user *ubuf, 208 + size_t size, loff_t *pos) 209 + { 210 + struct xe_device *xe = file_inode(f)->i_private; 211 + u32 atomic_svm_timeslice_ms; 212 + ssize_t ret; 213 + 214 + ret = kstrtouint_from_user(ubuf, size, 0, &atomic_svm_timeslice_ms); 215 + if (ret) 216 + return ret; 217 + 218 + xe->atomic_svm_timeslice_ms = atomic_svm_timeslice_ms; 219 + 220 + return size; 221 + } 222 + 223 + static const struct file_operations atomic_svm_timeslice_ms_fops = { 224 + .owner = THIS_MODULE, 225 + .read = atomic_svm_timeslice_ms_show, 226 + .write = atomic_svm_timeslice_ms_set, 227 + }; 228 + 194 229 void xe_debugfs_register(struct xe_device *xe) 195 230 { 196 231 struct ttm_device *bdev = &xe->ttm; ··· 245 210 246 211 debugfs_create_file("wedged_mode", 0600, root, xe, 247 212 &wedged_mode_fops); 213 + 214 + debugfs_create_file("atomic_svm_timeslice_ms", 0600, root, xe, 215 + &atomic_svm_timeslice_ms_fops); 248 216 249 217 for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) { 250 218 man = ttm_manager_type(bdev, mem_type);

+10 -11

drivers/gpu/drm/xe/xe_device.c

··· 402 402 if (xe->unordered_wq) 403 403 destroy_workqueue(xe->unordered_wq); 404 404 405 - if (!IS_ERR_OR_NULL(xe->mem.shrinker)) 406 - xe_shrinker_destroy(xe->mem.shrinker); 407 - 408 405 if (xe->destroy_wq) 409 406 destroy_workqueue(xe->destroy_wq); 410 407 ··· 435 438 if (err) 436 439 goto err; 437 440 438 - xe->mem.shrinker = xe_shrinker_create(xe); 439 - if (IS_ERR(xe->mem.shrinker)) 440 - return ERR_CAST(xe->mem.shrinker); 441 + err = xe_shrinker_create(xe); 442 + if (err) 443 + goto err; 441 444 442 445 xe->info.devid = pdev->device; 443 446 xe->info.revid = pdev->revision; 444 447 xe->info.force_execlist = xe_modparam.force_execlist; 448 + xe->atomic_svm_timeslice_ms = 5; 445 449 446 450 err = xe_irq_init(xe); 447 451 if (err) ··· 802 804 * be performed. 803 805 */ 804 806 xe_gt_mmio_init(gt); 805 - } 806 807 807 - for_each_tile(tile, xe, id) { 808 808 if (IS_SRIOV_VF(xe)) { 809 - xe_guc_comm_init_early(&tile->primary_gt->uc.guc); 810 - err = xe_gt_sriov_vf_bootstrap(tile->primary_gt); 809 + xe_guc_comm_init_early(&gt->uc.guc); 810 + err = xe_gt_sriov_vf_bootstrap(gt); 811 811 if (err) 812 812 return err; 813 - err = xe_gt_sriov_vf_query_config(tile->primary_gt); 813 + err = xe_gt_sriov_vf_query_config(gt); 814 814 if (err) 815 815 return err; 816 816 } 817 + } 818 + 819 + for_each_tile(tile, xe, id) { 817 820 err = xe_ggtt_init_early(tile->mem.ggtt); 818 821 if (err) 819 822 return err;

+2

drivers/gpu/drm/xe/xe_device.h

··· 195 195 struct xe_file *xe_file_get(struct xe_file *xef); 196 196 void xe_file_put(struct xe_file *xef); 197 197 198 + int xe_is_injection_active(void); 199 + 198 200 /* 199 201 * Occasionally it is seen that the G2H worker starts running after a delay of more than 200 202 * a second even after being queued and activated by the Linux workqueue subsystem. This

+7

drivers/gpu/drm/xe/xe_device_types.h

··· 502 502 const struct xe_pat_table_entry *table; 503 503 /** @pat.n_entries: Number of PAT entries */ 504 504 int n_entries; 505 + /** @pat.ats_entry: PAT entry for PCIe ATS responses */ 506 + const struct xe_pat_table_entry *pat_ats; 507 + /** @pat.pta_entry: PAT entry for page table accesses */ 508 + const struct xe_pat_table_entry *pat_pta; 505 509 u32 idx[__XE_CACHE_LEVEL_COUNT]; 506 510 } pat; 507 511 ··· 579 575 580 576 /** @pmu: performance monitoring unit */ 581 577 struct xe_pmu pmu; 578 + 579 + /** @atomic_svm_timeslice_ms: Atomic SVM fault timeslice MS */ 580 + u32 atomic_svm_timeslice_ms; 582 581 583 582 #ifdef TEST_VM_OPS_ERROR 584 583 /**

+1 -1

drivers/gpu/drm/xe/xe_drv.h

··· 9 9 #include <drm/drm_drv.h> 10 10 11 11 #define DRIVER_NAME "xe" 12 - #define DRIVER_DESC "Intel Xe Graphics" 12 + #define DRIVER_DESC "Intel Xe2 Graphics" 13 13 14 14 /* Interface history: 15 15 *

+195 -54

drivers/gpu/drm/xe/xe_ggtt.c

··· 5 5 6 6 #include "xe_ggtt.h" 7 7 8 + #include <kunit/visibility.h> 8 9 #include <linux/fault-inject.h> 9 10 #include <linux/io-64-nonatomic-lo-hi.h> 10 11 #include <linux/sizes.h> ··· 23 22 #include "xe_device.h" 24 23 #include "xe_gt.h" 25 24 #include "xe_gt_printk.h" 26 - #include "xe_gt_sriov_vf.h" 27 25 #include "xe_gt_tlb_invalidation.h" 28 26 #include "xe_map.h" 29 27 #include "xe_mmio.h" 30 28 #include "xe_pm.h" 29 + #include "xe_res_cursor.h" 31 30 #include "xe_sriov.h" 31 + #include "xe_tile_sriov_vf.h" 32 32 #include "xe_wa.h" 33 33 #include "xe_wopcm.h" 34 34 ··· 66 64 * give us the correct placement for free. 67 65 */ 68 66 69 - static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 70 - u16 pat_index) 67 + static u64 xelp_ggtt_pte_flags(struct xe_bo *bo, u16 pat_index) 71 68 { 72 - u64 pte; 73 - 74 - pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 75 - pte |= XE_PAGE_PRESENT; 69 + u64 pte = XE_PAGE_PRESENT; 76 70 77 71 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 78 72 pte |= XE_GGTT_PTE_DM; ··· 76 78 return pte; 77 79 } 78 80 79 - static u64 xelpg_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 80 - u16 pat_index) 81 + static u64 xelpg_ggtt_pte_flags(struct xe_bo *bo, u16 pat_index) 81 82 { 82 83 struct xe_device *xe = xe_bo_device(bo); 83 84 u64 pte; 84 85 85 - pte = xelp_ggtt_pte_encode_bo(bo, bo_offset, pat_index); 86 + pte = xelp_ggtt_pte_flags(bo, pat_index); 86 87 87 88 xe_assert(xe, pat_index <= 3); 88 89 ··· 146 149 xe_tile_assert(ggtt->tile, start < end); 147 150 148 151 if (ggtt->scratch) 149 - scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, 150 - pat_index); 152 + scratch_pte = xe_bo_addr(ggtt->scratch, 0, XE_PAGE_SIZE) | 153 + ggtt->pt_ops->pte_encode_flags(ggtt->scratch, 154 + pat_index); 151 155 else 152 156 scratch_pte = 0; 153 157 ··· 156 158 ggtt->pt_ops->ggtt_set_pte(ggtt, start, scratch_pte); 157 159 start += XE_PAGE_SIZE; 158 160 } 161 + } 162 + 163 + /** 164 + * xe_ggtt_alloc - Allocate a GGTT for a given &xe_tile 165 + * @tile: &xe_tile 166 + * 167 + * Allocates a &xe_ggtt for a given tile. 168 + * 169 + * Return: &xe_ggtt on success, or NULL when out of memory. 170 + */ 171 + struct xe_ggtt *xe_ggtt_alloc(struct xe_tile *tile) 172 + { 173 + struct xe_ggtt *ggtt = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*ggtt), GFP_KERNEL); 174 + if (ggtt) 175 + ggtt->tile = tile; 176 + return ggtt; 159 177 } 160 178 161 179 static void ggtt_fini_early(struct drm_device *drm, void *arg) ··· 190 176 ggtt->scratch = NULL; 191 177 } 192 178 179 + #ifdef CONFIG_LOCKDEP 180 + void xe_ggtt_might_lock(struct xe_ggtt *ggtt) 181 + { 182 + might_lock(&ggtt->lock); 183 + } 184 + #endif 185 + 193 186 static void primelockdep(struct xe_ggtt *ggtt) 194 187 { 195 188 if (!IS_ENABLED(CONFIG_LOCKDEP)) ··· 208 187 } 209 188 210 189 static const struct xe_ggtt_pt_ops xelp_pt_ops = { 211 - .pte_encode_bo = xelp_ggtt_pte_encode_bo, 190 + .pte_encode_flags = xelp_ggtt_pte_flags, 212 191 .ggtt_set_pte = xe_ggtt_set_pte, 213 192 }; 214 193 215 194 static const struct xe_ggtt_pt_ops xelpg_pt_ops = { 216 - .pte_encode_bo = xelpg_ggtt_pte_encode_bo, 195 + .pte_encode_flags = xelpg_ggtt_pte_flags, 217 196 .ggtt_set_pte = xe_ggtt_set_pte, 218 197 }; 219 198 220 199 static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { 221 - .pte_encode_bo = xelpg_ggtt_pte_encode_bo, 200 + .pte_encode_flags = xelpg_ggtt_pte_flags, 222 201 .ggtt_set_pte = xe_ggtt_set_pte_and_flush, 223 202 }; 203 + 204 + static void __xe_ggtt_init_early(struct xe_ggtt *ggtt, u32 reserved) 205 + { 206 + drm_mm_init(&ggtt->mm, reserved, 207 + ggtt->size - reserved); 208 + mutex_init(&ggtt->lock); 209 + primelockdep(ggtt); 210 + } 211 + 212 + int xe_ggtt_init_kunit(struct xe_ggtt *ggtt, u32 reserved, u32 size) 213 + { 214 + ggtt->size = size; 215 + __xe_ggtt_init_early(ggtt, reserved); 216 + return 0; 217 + } 218 + EXPORT_SYMBOL_IF_KUNIT(xe_ggtt_init_kunit); 224 219 225 220 /** 226 221 * xe_ggtt_init_early - Early GGTT initialization ··· 256 219 unsigned int gsm_size; 257 220 int err; 258 221 259 - if (IS_SRIOV_VF(xe)) 222 + if (IS_SRIOV_VF(xe) || GRAPHICS_VERx100(xe) >= 1250) 260 223 gsm_size = SZ_8M; /* GGTT is expected to be 4GiB */ 261 224 else 262 225 gsm_size = probe_gsm_size(pdev); ··· 284 247 ggtt->pt_ops = &xelp_pt_ops; 285 248 286 249 ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, WQ_MEM_RECLAIM); 287 - 288 - drm_mm_init(&ggtt->mm, xe_wopcm_size(xe), 289 - ggtt->size - xe_wopcm_size(xe)); 290 - mutex_init(&ggtt->lock); 291 - primelockdep(ggtt); 250 + __xe_ggtt_init_early(ggtt, xe_wopcm_size(xe)); 292 251 293 252 err = drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt); 294 253 if (err) 295 254 return err; 296 255 297 256 if (IS_SRIOV_VF(xe)) { 298 - err = xe_gt_sriov_vf_prepare_ggtt(xe_tile_get_gt(ggtt->tile, 0)); 257 + err = xe_tile_sriov_vf_prepare_ggtt(ggtt->tile); 299 258 if (err) 300 259 return err; 301 260 } ··· 462 429 } 463 430 464 431 /** 465 - * xe_ggtt_node_insert_balloon - prevent allocation of specified GGTT addresses 432 + * xe_ggtt_node_insert_balloon_locked - prevent allocation of specified GGTT addresses 466 433 * @node: the &xe_ggtt_node to hold reserved GGTT node 467 434 * @start: the starting GGTT address of the reserved region 468 435 * @end: then end GGTT address of the reserved region 469 436 * 470 - * Use xe_ggtt_node_remove_balloon() to release a reserved GGTT node. 437 + * To be used in cases where ggtt->lock is already taken. 438 + * Use xe_ggtt_node_remove_balloon_locked() to release a reserved GGTT node. 471 439 * 472 440 * Return: 0 on success or a negative error code on failure. 473 441 */ 474 - int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, u64 start, u64 end) 442 + int xe_ggtt_node_insert_balloon_locked(struct xe_ggtt_node *node, u64 start, u64 end) 475 443 { 476 444 struct xe_ggtt *ggtt = node->ggtt; 477 445 int err; ··· 481 447 xe_tile_assert(ggtt->tile, IS_ALIGNED(start, XE_PAGE_SIZE)); 482 448 xe_tile_assert(ggtt->tile, IS_ALIGNED(end, XE_PAGE_SIZE)); 483 449 xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(&node->base)); 450 + lockdep_assert_held(&ggtt->lock); 484 451 485 452 node->base.color = 0; 486 453 node->base.start = start; 487 454 node->base.size = end - start; 488 455 489 - mutex_lock(&ggtt->lock); 490 456 err = drm_mm_reserve_node(&ggtt->mm, &node->base); 491 - mutex_unlock(&ggtt->lock); 492 457 493 458 if (xe_gt_WARN(ggtt->tile->primary_gt, err, 494 459 "Failed to balloon GGTT %#llx-%#llx (%pe)\n", ··· 499 466 } 500 467 501 468 /** 502 - * xe_ggtt_node_remove_balloon - release a reserved GGTT region 469 + * xe_ggtt_node_remove_balloon_locked - release a reserved GGTT region 503 470 * @node: the &xe_ggtt_node with reserved GGTT region 504 471 * 505 - * See xe_ggtt_node_insert_balloon() for details. 472 + * To be used in cases where ggtt->lock is already taken. 473 + * See xe_ggtt_node_insert_balloon_locked() for details. 506 474 */ 507 - void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node) 475 + void xe_ggtt_node_remove_balloon_locked(struct xe_ggtt_node *node) 508 476 { 509 - if (!node || !node->ggtt) 477 + if (!xe_ggtt_node_allocated(node)) 510 478 return; 511 479 512 - if (!drm_mm_node_allocated(&node->base)) 513 - goto free_node; 480 + lockdep_assert_held(&node->ggtt->lock); 514 481 515 482 xe_ggtt_dump_node(node->ggtt, &node->base, "remove-balloon"); 516 483 517 - mutex_lock(&node->ggtt->lock); 518 484 drm_mm_remove_node(&node->base); 519 - mutex_unlock(&node->ggtt->lock); 485 + } 520 486 521 - free_node: 522 - xe_ggtt_node_fini(node); 487 + static void xe_ggtt_assert_fit(struct xe_ggtt *ggtt, u64 start, u64 size) 488 + { 489 + struct xe_tile *tile = ggtt->tile; 490 + struct xe_device *xe = tile_to_xe(tile); 491 + u64 __maybe_unused wopcm = xe_wopcm_size(xe); 492 + 493 + xe_tile_assert(tile, start >= wopcm); 494 + xe_tile_assert(tile, start + size < ggtt->size - wopcm); 495 + } 496 + 497 + /** 498 + * xe_ggtt_shift_nodes_locked - Shift GGTT nodes to adjust for a change in usable address range. 499 + * @ggtt: the &xe_ggtt struct instance 500 + * @shift: change to the location of area provisioned for current VF 501 + * 502 + * This function moves all nodes from the GGTT VM, to a temp list. These nodes are expected 503 + * to represent allocations in range formerly assigned to current VF, before the range changed. 504 + * When the GGTT VM is completely clear of any nodes, they are re-added with shifted offsets. 505 + * 506 + * The function has no ability of failing - because it shifts existing nodes, without 507 + * any additional processing. If the nodes were successfully existing at the old address, 508 + * they will do the same at the new one. A fail inside this function would indicate that 509 + * the list of nodes was either already damaged, or that the shift brings the address range 510 + * outside of valid bounds. Both cases justify an assert rather than error code. 511 + */ 512 + void xe_ggtt_shift_nodes_locked(struct xe_ggtt *ggtt, s64 shift) 513 + { 514 + struct xe_tile *tile __maybe_unused = ggtt->tile; 515 + struct drm_mm_node *node, *tmpn; 516 + LIST_HEAD(temp_list_head); 517 + 518 + lockdep_assert_held(&ggtt->lock); 519 + 520 + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) 521 + drm_mm_for_each_node_safe(node, tmpn, &ggtt->mm) 522 + xe_ggtt_assert_fit(ggtt, node->start + shift, node->size); 523 + 524 + drm_mm_for_each_node_safe(node, tmpn, &ggtt->mm) { 525 + drm_mm_remove_node(node); 526 + list_add(&node->node_list, &temp_list_head); 527 + } 528 + 529 + list_for_each_entry_safe(node, tmpn, &temp_list_head, node_list) { 530 + list_del(&node->node_list); 531 + node->start += shift; 532 + drm_mm_reserve_node(&ggtt->mm, node); 533 + xe_tile_assert(tile, drm_mm_node_allocated(node)); 534 + } 523 535 } 524 536 525 537 /** ··· 615 537 * xe_ggtt_node_init - Initialize %xe_ggtt_node struct 616 538 * @ggtt: the &xe_ggtt where the new node will later be inserted/reserved. 617 539 * 618 - * This function will allocated the struct %xe_ggtt_node and return it's pointer. 540 + * This function will allocate the struct %xe_ggtt_node and return its pointer. 619 541 * This struct will then be freed after the node removal upon xe_ggtt_node_remove() 620 - * or xe_ggtt_node_remove_balloon(). 542 + * or xe_ggtt_node_remove_balloon_locked(). 621 543 * Having %xe_ggtt_node struct allocated doesn't mean that the node is already allocated 622 544 * in GGTT. Only the xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), 623 - * xe_ggtt_node_insert_balloon() will ensure the node is inserted or reserved in GGTT. 545 + * xe_ggtt_node_insert_balloon_locked() will ensure the node is inserted or reserved in GGTT. 624 546 * 625 547 * Return: A pointer to %xe_ggtt_node struct on success. An ERR_PTR otherwise. 626 548 **/ ··· 642 564 * @node: the &xe_ggtt_node to be freed 643 565 * 644 566 * If anything went wrong with either xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), 645 - * or xe_ggtt_node_insert_balloon(); and this @node is not going to be reused, then, 567 + * or xe_ggtt_node_insert_balloon_locked(); and this @node is not going to be reused, then, 646 568 * this function needs to be called to free the %xe_ggtt_node struct 647 569 **/ 648 570 void xe_ggtt_node_fini(struct xe_ggtt_node *node) ··· 667 589 /** 668 590 * xe_ggtt_map_bo - Map the BO into GGTT 669 591 * @ggtt: the &xe_ggtt where node will be mapped 592 + * @node: the &xe_ggtt_node where this BO is mapped 670 593 * @bo: the &xe_bo to be mapped 594 + * @pat_index: Which pat_index to use. 671 595 */ 672 - void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) 596 + void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, 597 + struct xe_bo *bo, u16 pat_index) 598 + { 599 + 600 + u64 start, pte, end; 601 + struct xe_res_cursor cur; 602 + 603 + if (XE_WARN_ON(!node)) 604 + return; 605 + 606 + start = node->base.start; 607 + end = start + bo->size; 608 + 609 + pte = ggtt->pt_ops->pte_encode_flags(bo, pat_index); 610 + if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) { 611 + xe_assert(xe_bo_device(bo), bo->ttm.ttm); 612 + 613 + for (xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &cur); 614 + cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) 615 + ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, 616 + pte | xe_res_dma(&cur)); 617 + } else { 618 + /* Prepend GPU offset */ 619 + pte |= vram_region_gpu_offset(bo->ttm.resource); 620 + 621 + for (xe_res_first(bo->ttm.resource, 0, bo->size, &cur); 622 + cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) 623 + ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, 624 + pte + cur.start); 625 + } 626 + } 627 + 628 + /** 629 + * xe_ggtt_map_bo_unlocked - Restore a mapping of a BO into GGTT 630 + * @ggtt: the &xe_ggtt where node will be mapped 631 + * @bo: the &xe_bo to be mapped 632 + * 633 + * This is used to restore a GGTT mapping after suspend. 634 + */ 635 + void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo) 673 636 { 674 637 u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; 675 638 u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; 676 - u64 start; 677 - u64 offset, pte; 678 639 679 - if (XE_WARN_ON(!bo->ggtt_node[ggtt->tile->id])) 680 - return; 681 - 682 - start = bo->ggtt_node[ggtt->tile->id]->base.start; 683 - 684 - for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { 685 - pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index); 686 - ggtt->pt_ops->ggtt_set_pte(ggtt, start + offset, pte); 687 - } 640 + mutex_lock(&ggtt->lock); 641 + xe_ggtt_map_bo(ggtt, bo->ggtt_node[ggtt->tile->id], bo, pat_index); 642 + mutex_unlock(&ggtt->lock); 688 643 } 689 644 690 645 static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, ··· 756 645 xe_ggtt_node_fini(bo->ggtt_node[tile_id]); 757 646 bo->ggtt_node[tile_id] = NULL; 758 647 } else { 759 - xe_ggtt_map_bo(ggtt, bo); 648 + u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; 649 + u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; 650 + 651 + xe_ggtt_map_bo(ggtt, bo->ggtt_node[tile_id], bo, pat_index); 760 652 } 761 653 mutex_unlock(&ggtt->lock); 762 654 ··· 954 840 mutex_unlock(&ggtt->lock); 955 841 956 842 return total; 843 + } 844 + 845 + /** 846 + * xe_ggtt_encode_pte_flags - Get PTE encoding flags for BO 847 + * @ggtt: &xe_ggtt 848 + * @bo: &xe_bo 849 + * @pat_index: The pat_index for the PTE. 850 + * 851 + * This function returns the pte_flags for a given BO, without address. 852 + * It's used for DPT to fill a GGTT mapped BO with a linear lookup table. 853 + */ 854 + u64 xe_ggtt_encode_pte_flags(struct xe_ggtt *ggtt, 855 + struct xe_bo *bo, u16 pat_index) 856 + { 857 + return ggtt->pt_ops->pte_encode_flags(bo, pat_index); 858 + } 859 + 860 + /** 861 + * xe_ggtt_read_pte - Read a PTE from the GGTT 862 + * @ggtt: &xe_ggtt 863 + * @offset: the offset for which the mapping should be read. 864 + * 865 + * Used by testcases, and by display reading out an inherited bios FB. 866 + */ 867 + u64 xe_ggtt_read_pte(struct xe_ggtt *ggtt, u64 offset) 868 + { 869 + return ioread64(ggtt->gsm + (offset / XE_PAGE_SIZE)); 957 870 }

+20 -4

drivers/gpu/drm/xe/xe_ggtt.h

··· 9 9 #include "xe_ggtt_types.h" 10 10 11 11 struct drm_printer; 12 + struct xe_tile; 12 13 14 + struct xe_ggtt *xe_ggtt_alloc(struct xe_tile *tile); 13 15 int xe_ggtt_init_early(struct xe_ggtt *ggtt); 16 + int xe_ggtt_init_kunit(struct xe_ggtt *ggtt, u32 reserved, u32 size); 14 17 int xe_ggtt_init(struct xe_ggtt *ggtt); 15 18 16 19 struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt); 17 20 void xe_ggtt_node_fini(struct xe_ggtt_node *node); 18 - int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, 19 - u64 start, u64 size); 20 - void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node); 21 + int xe_ggtt_node_insert_balloon_locked(struct xe_ggtt_node *node, 22 + u64 start, u64 size); 23 + void xe_ggtt_node_remove_balloon_locked(struct xe_ggtt_node *node); 24 + void xe_ggtt_shift_nodes_locked(struct xe_ggtt *ggtt, s64 shift); 21 25 22 26 int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align); 23 27 int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node, 24 28 u32 size, u32 align, u32 mm_flags); 25 29 void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate); 26 30 bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node); 27 - void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); 31 + void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, 32 + struct xe_bo *bo, u16 pat_index); 33 + void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo); 28 34 int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); 29 35 int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, 30 36 u64 start, u64 end); ··· 43 37 #ifdef CONFIG_PCI_IOV 44 38 void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid); 45 39 #endif 40 + 41 + #ifndef CONFIG_LOCKDEP 42 + static inline void xe_ggtt_might_lock(struct xe_ggtt *ggtt) 43 + { } 44 + #else 45 + void xe_ggtt_might_lock(struct xe_ggtt *ggtt); 46 + #endif 47 + 48 + u64 xe_ggtt_encode_pte_flags(struct xe_ggtt *ggtt, struct xe_bo *bo, u16 pat_index); 49 + u64 xe_ggtt_read_pte(struct xe_ggtt *ggtt, u64 offset); 46 50 47 51 #endif

+2 -2

drivers/gpu/drm/xe/xe_ggtt_types.h

··· 74 74 * Which can vary from platform to platform. 75 75 */ 76 76 struct xe_ggtt_pt_ops { 77 - /** @pte_encode_bo: Encode PTE address for a given BO */ 78 - u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index); 77 + /** @pte_encode_flags: Encode PTE flags for a given BO */ 78 + u64 (*pte_encode_flags)(struct xe_bo *bo, u16 pat_index); 79 79 /** @ggtt_set_pte: Directly write into GGTT's PTE */ 80 80 void (*ggtt_set_pte)(struct xe_ggtt *ggtt, u64 addr, u64 pte); 81 81 };

+3 -3

drivers/gpu/drm/xe/xe_gt.c

··· 118 118 xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); 119 119 } 120 120 121 - xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); 121 + xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0xF); 122 122 xe_force_wake_put(gt_to_fw(gt), fw_ref); 123 123 } 124 124 ··· 417 417 if (err) 418 418 return err; 419 419 420 + xe_mocs_init_early(gt); 421 + 420 422 return 0; 421 423 } 422 424 ··· 635 633 err = xe_gt_pagefault_init(gt); 636 634 if (err) 637 635 return err; 638 - 639 - xe_mocs_init_early(gt); 640 636 641 637 err = xe_gt_sysfs_init(gt); 642 638 if (err)

+76 -20

drivers/gpu/drm/xe/xe_gt_debugfs.c

··· 122 122 return ret; 123 123 } 124 124 125 - static int force_reset(struct xe_gt *gt, struct drm_printer *p) 126 - { 127 - xe_pm_runtime_get(gt_to_xe(gt)); 128 - xe_gt_reset_async(gt); 129 - xe_pm_runtime_put(gt_to_xe(gt)); 130 - 131 - return 0; 132 - } 133 - 134 - static int force_reset_sync(struct xe_gt *gt, struct drm_printer *p) 135 - { 136 - xe_pm_runtime_get(gt_to_xe(gt)); 137 - xe_gt_reset(gt); 138 - xe_pm_runtime_put(gt_to_xe(gt)); 139 - 140 - return 0; 141 - } 142 - 143 125 static int sa_info(struct xe_gt *gt, struct drm_printer *p) 144 126 { 145 127 struct xe_tile *tile = gt_to_tile(gt); ··· 288 306 * - without access to the PF specific data 289 307 */ 290 308 static const struct drm_info_list vf_safe_debugfs_list[] = { 291 - {"force_reset", .show = xe_gt_debugfs_simple_show, .data = force_reset}, 292 - {"force_reset_sync", .show = xe_gt_debugfs_simple_show, .data = force_reset_sync}, 293 309 {"sa_info", .show = xe_gt_debugfs_simple_show, .data = sa_info}, 294 310 {"topology", .show = xe_gt_debugfs_simple_show, .data = topology}, 295 311 {"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt}, ··· 312 332 {"steering", .show = xe_gt_debugfs_simple_show, .data = steering}, 313 333 }; 314 334 335 + static ssize_t write_to_gt_call(const char __user *userbuf, size_t count, loff_t *ppos, 336 + void (*call)(struct xe_gt *), struct xe_gt *gt) 337 + { 338 + bool yes; 339 + int ret; 340 + 341 + if (*ppos) 342 + return -EINVAL; 343 + ret = kstrtobool_from_user(userbuf, count, &yes); 344 + if (ret < 0) 345 + return ret; 346 + if (yes) 347 + call(gt); 348 + return count; 349 + } 350 + 351 + static void force_reset(struct xe_gt *gt) 352 + { 353 + struct xe_device *xe = gt_to_xe(gt); 354 + 355 + xe_pm_runtime_get(xe); 356 + xe_gt_reset_async(gt); 357 + xe_pm_runtime_put(xe); 358 + } 359 + 360 + static ssize_t force_reset_write(struct file *file, 361 + const char __user *userbuf, 362 + size_t count, loff_t *ppos) 363 + { 364 + struct seq_file *s = file->private_data; 365 + struct xe_gt *gt = s->private; 366 + 367 + return write_to_gt_call(userbuf, count, ppos, force_reset, gt); 368 + } 369 + 370 + static int force_reset_show(struct seq_file *s, void *unused) 371 + { 372 + struct xe_gt *gt = s->private; 373 + 374 + force_reset(gt); /* to be deprecated! */ 375 + return 0; 376 + } 377 + DEFINE_SHOW_STORE_ATTRIBUTE(force_reset); 378 + 379 + static void force_reset_sync(struct xe_gt *gt) 380 + { 381 + struct xe_device *xe = gt_to_xe(gt); 382 + 383 + xe_pm_runtime_get(xe); 384 + xe_gt_reset(gt); 385 + xe_pm_runtime_put(xe); 386 + } 387 + 388 + static ssize_t force_reset_sync_write(struct file *file, 389 + const char __user *userbuf, 390 + size_t count, loff_t *ppos) 391 + { 392 + struct seq_file *s = file->private_data; 393 + struct xe_gt *gt = s->private; 394 + 395 + return write_to_gt_call(userbuf, count, ppos, force_reset_sync, gt); 396 + } 397 + 398 + static int force_reset_sync_show(struct seq_file *s, void *unused) 399 + { 400 + struct xe_gt *gt = s->private; 401 + 402 + force_reset_sync(gt); /* to be deprecated! */ 403 + return 0; 404 + } 405 + DEFINE_SHOW_STORE_ATTRIBUTE(force_reset_sync); 406 + 315 407 void xe_gt_debugfs_register(struct xe_gt *gt) 316 408 { 317 409 struct xe_device *xe = gt_to_xe(gt); ··· 406 354 * it by looking at its parent node private data. 407 355 */ 408 356 root->d_inode->i_private = gt; 357 + 358 + /* VF safe */ 359 + debugfs_create_file("force_reset", 0600, root, gt, &force_reset_fops); 360 + debugfs_create_file("force_reset_sync", 0600, root, gt, &force_reset_sync_fops); 409 361 410 362 drm_debugfs_create_files(vf_safe_debugfs_list, 411 363 ARRAY_SIZE(vf_safe_debugfs_list),

+40 -66

drivers/gpu/drm/xe/xe_gt_pagefault.c

··· 14 14 #include "abi/guc_actions_abi.h" 15 15 #include "xe_bo.h" 16 16 #include "xe_gt.h" 17 + #include "xe_gt_printk.h" 17 18 #include "xe_gt_stats.h" 18 19 #include "xe_gt_tlb_invalidation.h" 19 20 #include "xe_guc.h" ··· 69 68 70 69 static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma) 71 70 { 72 - return BIT(tile->id) & vma->tile_present && 73 - !(BIT(tile->id) & vma->tile_invalidated); 74 - } 75 - 76 - static bool vma_matches(struct xe_vma *vma, u64 page_addr) 77 - { 78 - if (page_addr > xe_vma_end(vma) - 1 || 79 - page_addr + SZ_4K - 1 < xe_vma_start(vma)) 80 - return false; 81 - 82 - return true; 83 - } 84 - 85 - static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr) 86 - { 87 - struct xe_vma *vma = NULL; 88 - 89 - if (vm->usm.last_fault_vma) { /* Fast lookup */ 90 - if (vma_matches(vm->usm.last_fault_vma, page_addr)) 91 - vma = vm->usm.last_fault_vma; 92 - } 93 - if (!vma) 94 - vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); 95 - 96 - return vma; 71 + return xe_vm_has_valid_gpu_mapping(tile, vma->tile_present, 72 + vma->tile_invalidated); 97 73 } 98 74 99 75 static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, ··· 121 143 122 144 trace_xe_vma_pagefault(vma); 123 145 124 - /* Check if VMA is valid */ 146 + /* Check if VMA is valid, opportunistic check only */ 125 147 if (vma_is_valid(tile, vma) && !atomic) 126 148 return 0; 127 149 ··· 158 180 159 181 dma_fence_wait(fence, false); 160 182 dma_fence_put(fence); 161 - vma->tile_invalidated &= ~BIT(tile->id); 162 183 163 184 unlock_dma_resv: 164 185 drm_exec_fini(&exec); ··· 208 231 goto unlock_vm; 209 232 } 210 233 211 - vma = lookup_vma(vm, pf->page_addr); 234 + vma = xe_vm_find_vma_by_addr(vm, pf->page_addr); 212 235 if (!vma) { 213 236 err = -EINVAL; 214 237 goto unlock_vm; ··· 243 266 return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 244 267 } 245 268 246 - static void print_pagefault(struct xe_device *xe, struct pagefault *pf) 269 + static void print_pagefault(struct xe_gt *gt, struct pagefault *pf) 247 270 { 248 - drm_dbg(&xe->drm, "\n\tASID: %d\n" 249 - "\tVFID: %d\n" 250 - "\tPDATA: 0x%04x\n" 251 - "\tFaulted Address: 0x%08x%08x\n" 252 - "\tFaultType: %d\n" 253 - "\tAccessType: %d\n" 254 - "\tFaultLevel: %d\n" 255 - "\tEngineClass: %d %s\n" 256 - "\tEngineInstance: %d\n", 257 - pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr), 258 - lower_32_bits(pf->page_addr), 259 - pf->fault_type, pf->access_type, pf->fault_level, 260 - pf->engine_class, xe_hw_engine_class_to_str(pf->engine_class), 261 - pf->engine_instance); 271 + xe_gt_dbg(gt, "\n\tASID: %d\n" 272 + "\tVFID: %d\n" 273 + "\tPDATA: 0x%04x\n" 274 + "\tFaulted Address: 0x%08x%08x\n" 275 + "\tFaultType: %d\n" 276 + "\tAccessType: %d\n" 277 + "\tFaultLevel: %d\n" 278 + "\tEngineClass: %d %s\n" 279 + "\tEngineInstance: %d\n", 280 + pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr), 281 + lower_32_bits(pf->page_addr), 282 + pf->fault_type, pf->access_type, pf->fault_level, 283 + pf->engine_class, xe_hw_engine_class_to_str(pf->engine_class), 284 + pf->engine_instance); 262 285 } 263 286 264 287 #define PF_MSG_LEN_DW 4 ··· 310 333 int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) 311 334 { 312 335 struct xe_gt *gt = guc_to_gt(guc); 313 - struct xe_device *xe = gt_to_xe(gt); 314 336 struct pf_queue *pf_queue; 315 337 unsigned long flags; 316 338 u32 asid; ··· 334 358 pf_queue->num_dw; 335 359 queue_work(gt->usm.pf_wq, &pf_queue->worker); 336 360 } else { 337 - drm_warn(&xe->drm, "PF Queue full, shouldn't be possible"); 361 + xe_gt_warn(gt, "PageFault Queue full, shouldn't be possible\n"); 338 362 } 339 363 spin_unlock_irqrestore(&pf_queue->lock, flags); 340 364 ··· 347 371 { 348 372 struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker); 349 373 struct xe_gt *gt = pf_queue->gt; 350 - struct xe_device *xe = gt_to_xe(gt); 351 374 struct xe_guc_pagefault_reply reply = {}; 352 375 struct pagefault pf = {}; 353 376 unsigned long threshold; ··· 357 382 while (get_pagefault(pf_queue, &pf)) { 358 383 ret = handle_pagefault(gt, &pf); 359 384 if (unlikely(ret)) { 360 - print_pagefault(xe, &pf); 385 + print_pagefault(gt, &pf); 361 386 pf.fault_unsuccessful = 1; 362 - drm_dbg(&xe->drm, "Fault response: Unsuccessful %d\n", ret); 387 + xe_gt_dbg(gt, "Fault response: Unsuccessful %pe\n", ERR_PTR(ret)); 363 388 } 364 389 365 390 reply.dw0 = FIELD_PREP(PFR_VALID, 1) | ··· 512 537 return (granularity_in_byte(val) / 32); 513 538 } 514 539 515 - static void print_acc(struct xe_device *xe, struct acc *acc) 540 + static void print_acc(struct xe_gt *gt, struct acc *acc) 516 541 { 517 - drm_warn(&xe->drm, "Access counter request:\n" 518 - "\tType: %s\n" 519 - "\tASID: %d\n" 520 - "\tVFID: %d\n" 521 - "\tEngine: %d:%d\n" 522 - "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n" 523 - "\tSub_Granularity Vector: 0x%08x\n" 524 - "\tVA Range base: 0x%016llx\n", 525 - acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL", 526 - acc->asid, acc->vfid, acc->engine_class, acc->engine_instance, 527 - granularity_in_byte(acc->granularity) / SZ_1K, 528 - sub_granularity_in_byte(acc->granularity) / SZ_1K, 529 - acc->sub_granularity, acc->va_range_base); 542 + xe_gt_warn(gt, "Access counter request:\n" 543 + "\tType: %s\n" 544 + "\tASID: %d\n" 545 + "\tVFID: %d\n" 546 + "\tEngine: %d:%d\n" 547 + "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n" 548 + "\tSub_Granularity Vector: 0x%08x\n" 549 + "\tVA Range base: 0x%016llx\n", 550 + acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL", 551 + acc->asid, acc->vfid, acc->engine_class, acc->engine_instance, 552 + granularity_in_byte(acc->granularity) / SZ_1K, 553 + sub_granularity_in_byte(acc->granularity) / SZ_1K, 554 + acc->sub_granularity, acc->va_range_base); 530 555 } 531 556 532 557 static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc) ··· 624 649 { 625 650 struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker); 626 651 struct xe_gt *gt = acc_queue->gt; 627 - struct xe_device *xe = gt_to_xe(gt); 628 652 struct acc acc = {}; 629 653 unsigned long threshold; 630 654 int ret; ··· 633 659 while (get_acc(acc_queue, &acc)) { 634 660 ret = handle_acc(gt, &acc); 635 661 if (unlikely(ret)) { 636 - print_acc(xe, &acc); 637 - drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret); 662 + print_acc(gt, &acc); 663 + xe_gt_warn(gt, "ACC: Unsuccessful %pe\n", ERR_PTR(ret)); 638 664 } 639 665 640 666 if (time_after(jiffies, threshold) && ··· 679 705 acc_queue->head = (acc_queue->head + len) % ACC_QUEUE_NUM_DW; 680 706 queue_work(gt->usm.acc_wq, &acc_queue->worker); 681 707 } else { 682 - drm_warn(&gt_to_xe(gt)->drm, "ACC Queue full, dropping ACC"); 708 + xe_gt_warn(gt, "ACC Queue full, dropping ACC\n"); 683 709 } 684 710 spin_unlock(&acc_queue->lock); 685 711

+4 -2

drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c

··· 1520 1520 { 1521 1521 int err; 1522 1522 1523 + xe_gt_assert(gt, xe_device_has_lmtt(gt_to_xe(gt))); 1524 + 1523 1525 mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); 1524 1526 if (vfid) 1525 1527 err = pf_provision_vf_lmem(gt, vfid, size); ··· 1631 1629 xe_gt_assert(gt, num_vfs); 1632 1630 xe_gt_assert(gt, !xe_gt_is_media_type(gt)); 1633 1631 1634 - if (!IS_DGFX(gt_to_xe(gt))) 1632 + if (!xe_device_has_lmtt(gt_to_xe(gt))) 1635 1633 return 0; 1636 1634 1637 1635 mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); ··· 2165 2163 valid_all = valid_all && valid_ggtt; 2166 2164 valid_any = valid_any || (valid_ggtt && is_primary); 2167 2165 2168 - if (IS_DGFX(xe)) { 2166 + if (xe_device_has_lmtt(xe)) { 2169 2167 bool valid_lmem = pf_get_vf_config_lmem(primary_gt, vfid); 2170 2168 2171 2169 valid_any = valid_any || (valid_lmem && is_primary);

+2 -2

drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c

··· 308 308 if (!xe_gt_is_media_type(gt)) { 309 309 debugfs_create_file_unsafe(vfid ? "ggtt_quota" : "ggtt_spare", 310 310 0644, parent, parent, &ggtt_fops); 311 - if (IS_DGFX(gt_to_xe(gt))) 311 + if (xe_device_has_lmtt(gt_to_xe(gt))) 312 312 debugfs_create_file_unsafe(vfid ? "lmem_quota" : "lmem_spare", 313 313 0644, parent, parent, &lmem_fops); 314 314 } ··· 558 558 drm_debugfs_create_files(pf_ggtt_info, 559 559 ARRAY_SIZE(pf_ggtt_info), 560 560 pfdentry, minor); 561 - if (IS_DGFX(gt_to_xe(gt))) 561 + if (xe_device_has_lmtt(gt_to_xe(gt))) 562 562 drm_debugfs_create_files(pf_lmem_info, 563 563 ARRAY_SIZE(pf_lmem_info), 564 564 pfdentry, minor);

+146 -151

drivers/gpu/drm/xe/xe_gt_sriov_vf.c

··· 82 82 } 83 83 84 84 static int guc_action_match_version(struct xe_guc *guc, 85 - u32 wanted_branch, u32 wanted_major, u32 wanted_minor, 86 - u32 *branch, u32 *major, u32 *minor, u32 *patch) 85 + struct xe_uc_fw_version *wanted, 86 + struct xe_uc_fw_version *found) 87 87 { 88 88 u32 request[VF2GUC_MATCH_VERSION_REQUEST_MSG_LEN] = { 89 89 FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | 90 90 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | 91 91 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, 92 92 GUC_ACTION_VF2GUC_MATCH_VERSION), 93 - FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_BRANCH, wanted_branch) | 94 - FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MAJOR, wanted_major) | 95 - FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MINOR, wanted_minor), 93 + FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_BRANCH, wanted->branch) | 94 + FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MAJOR, wanted->major) | 95 + FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MINOR, wanted->minor), 96 96 }; 97 97 u32 response[GUC_MAX_MMIO_MSG_LEN]; 98 98 int ret; ··· 106 106 if (unlikely(FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_0_MBZ, response[0]))) 107 107 return -EPROTO; 108 108 109 - *branch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_BRANCH, response[1]); 110 - *major = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MAJOR, response[1]); 111 - *minor = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MINOR, response[1]); 112 - *patch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_PATCH, response[1]); 109 + memset(found, 0, sizeof(struct xe_uc_fw_version)); 110 + found->branch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_BRANCH, response[1]); 111 + found->major = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MAJOR, response[1]); 112 + found->minor = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MINOR, response[1]); 113 + found->patch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_PATCH, response[1]); 113 114 114 115 return 0; 115 116 } 116 117 117 - static void vf_minimum_guc_version(struct xe_gt *gt, u32 *branch, u32 *major, u32 *minor) 118 + static int guc_action_match_version_any(struct xe_guc *guc, 119 + struct xe_uc_fw_version *found) 120 + { 121 + struct xe_uc_fw_version wanted = { 122 + .branch = GUC_VERSION_BRANCH_ANY, 123 + .major = GUC_VERSION_MAJOR_ANY, 124 + .minor = GUC_VERSION_MINOR_ANY, 125 + .patch = 0 126 + }; 127 + 128 + return guc_action_match_version(guc, &wanted, found); 129 + } 130 + 131 + static void vf_minimum_guc_version(struct xe_gt *gt, struct xe_uc_fw_version *ver) 118 132 { 119 133 struct xe_device *xe = gt_to_xe(gt); 134 + 135 + memset(ver, 0, sizeof(struct xe_uc_fw_version)); 120 136 121 137 switch (xe->info.platform) { 122 138 case XE_TIGERLAKE ... XE_PVC: 123 139 /* 1.1 this is current baseline for Xe driver */ 124 - *branch = 0; 125 - *major = 1; 126 - *minor = 1; 140 + ver->branch = 0; 141 + ver->major = 1; 142 + ver->minor = 1; 127 143 break; 128 144 default: 129 145 /* 1.2 has support for the GMD_ID KLV */ 130 - *branch = 0; 131 - *major = 1; 132 - *minor = 2; 146 + ver->branch = 0; 147 + ver->major = 1; 148 + ver->minor = 2; 133 149 break; 134 150 } 135 151 } 136 152 137 - static void vf_wanted_guc_version(struct xe_gt *gt, u32 *branch, u32 *major, u32 *minor) 153 + static void vf_wanted_guc_version(struct xe_gt *gt, struct xe_uc_fw_version *ver) 138 154 { 139 155 /* for now it's the same as minimum */ 140 - return vf_minimum_guc_version(gt, branch, major, minor); 156 + return vf_minimum_guc_version(gt, ver); 141 157 } 142 158 143 159 static int vf_handshake_with_guc(struct xe_gt *gt) 144 160 { 145 - struct xe_gt_sriov_vf_guc_version *guc_version = &gt->sriov.vf.guc_version; 161 + struct xe_uc_fw_version *guc_version = &gt->sriov.vf.guc_version; 162 + struct xe_uc_fw_version wanted = {0}; 146 163 struct xe_guc *guc = &gt->uc.guc; 147 - u32 wanted_branch, wanted_major, wanted_minor; 148 - u32 branch, major, minor, patch; 164 + bool old = false; 149 165 int err; 150 166 151 167 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 152 168 153 169 /* select wanted version - prefer previous (if any) */ 154 170 if (guc_version->major || guc_version->minor) { 155 - wanted_branch = guc_version->branch; 156 - wanted_major = guc_version->major; 157 - wanted_minor = guc_version->minor; 171 + wanted = *guc_version; 172 + old = true; 158 173 } else { 159 - vf_wanted_guc_version(gt, &wanted_branch, &wanted_major, &wanted_minor); 160 - xe_gt_assert(gt, wanted_major != GUC_VERSION_MAJOR_ANY); 174 + vf_wanted_guc_version(gt, &wanted); 175 + xe_gt_assert(gt, wanted.major != GUC_VERSION_MAJOR_ANY); 176 + 177 + /* First time we handshake, so record the minimum wanted */ 178 + gt->sriov.vf.wanted_guc_version = wanted; 161 179 } 162 180 163 - err = guc_action_match_version(guc, wanted_branch, wanted_major, wanted_minor, 164 - &branch, &major, &minor, &patch); 181 + err = guc_action_match_version(guc, &wanted, guc_version); 165 182 if (unlikely(err)) 166 183 goto fail; 167 184 168 - /* we don't support interface version change */ 169 - if ((guc_version->major || guc_version->minor) && 170 - (guc_version->branch != branch || guc_version->major != major || 171 - guc_version->minor != minor)) { 172 - xe_gt_sriov_err(gt, "New GuC interface version detected: %u.%u.%u.%u\n", 173 - branch, major, minor, patch); 174 - xe_gt_sriov_info(gt, "Previously used version was: %u.%u.%u.%u\n", 175 - guc_version->branch, guc_version->major, 176 - guc_version->minor, guc_version->patch); 177 - err = -EREMCHG; 178 - goto fail; 185 + if (old) { 186 + /* we don't support interface version change */ 187 + if (MAKE_GUC_VER_STRUCT(*guc_version) != MAKE_GUC_VER_STRUCT(wanted)) { 188 + xe_gt_sriov_err(gt, "New GuC interface version detected: %u.%u.%u.%u\n", 189 + guc_version->branch, guc_version->major, 190 + guc_version->minor, guc_version->patch); 191 + xe_gt_sriov_info(gt, "Previously used version was: %u.%u.%u.%u\n", 192 + wanted.branch, wanted.major, 193 + wanted.minor, wanted.patch); 194 + err = -EREMCHG; 195 + goto fail; 196 + } else { 197 + /* version is unchanged, no need to re-verify it */ 198 + return 0; 199 + } 179 200 } 180 201 181 202 /* illegal */ 182 - if (major > wanted_major) { 203 + if (guc_version->major > wanted.major) { 183 204 err = -EPROTO; 184 205 goto unsupported; 185 206 } 186 207 187 208 /* there's no fallback on major version. */ 188 - if (major != wanted_major) { 209 + if (guc_version->major != wanted.major) { 189 210 err = -ENOPKG; 190 211 goto unsupported; 191 212 } 192 213 193 214 /* check against minimum version supported by us */ 194 - vf_minimum_guc_version(gt, &wanted_branch, &wanted_major, &wanted_minor); 195 - xe_gt_assert(gt, major != GUC_VERSION_MAJOR_ANY); 196 - if (major < wanted_major || (major == wanted_major && minor < wanted_minor)) { 215 + vf_minimum_guc_version(gt, &wanted); 216 + xe_gt_assert(gt, wanted.major != GUC_VERSION_MAJOR_ANY); 217 + if (MAKE_GUC_VER_STRUCT(*guc_version) < MAKE_GUC_VER_STRUCT(wanted)) { 197 218 err = -ENOKEY; 198 219 goto unsupported; 199 220 } 200 221 201 222 xe_gt_sriov_dbg(gt, "using GuC interface version %u.%u.%u.%u\n", 202 - branch, major, minor, patch); 223 + guc_version->branch, guc_version->major, 224 + guc_version->minor, guc_version->patch); 203 225 204 - guc_version->branch = branch; 205 - guc_version->major = major; 206 - guc_version->minor = minor; 207 - guc_version->patch = patch; 208 226 return 0; 209 227 210 228 unsupported: 211 229 xe_gt_sriov_err(gt, "Unsupported GuC version %u.%u.%u.%u (%pe)\n", 212 - branch, major, minor, patch, ERR_PTR(err)); 230 + guc_version->branch, guc_version->major, 231 + guc_version->minor, guc_version->patch, 232 + ERR_PTR(err)); 213 233 fail: 214 234 xe_gt_sriov_err(gt, "Unable to confirm GuC version %u.%u (%pe)\n", 215 - wanted_major, wanted_minor, ERR_PTR(err)); 235 + wanted.major, wanted.minor, ERR_PTR(err)); 216 236 217 237 /* try again with *any* just to query which version is supported */ 218 - if (!guc_action_match_version(guc, GUC_VERSION_BRANCH_ANY, 219 - GUC_VERSION_MAJOR_ANY, GUC_VERSION_MINOR_ANY, 220 - &branch, &major, &minor, &patch)) 238 + if (!guc_action_match_version_any(guc, &wanted)) 221 239 xe_gt_sriov_notice(gt, "GuC reports interface version %u.%u.%u.%u\n", 222 - branch, major, minor, patch); 240 + wanted.branch, wanted.major, wanted.minor, wanted.patch); 223 241 return err; 224 242 } 225 243 ··· 266 248 return err; 267 249 268 250 return 0; 251 + } 252 + 253 + /** 254 + * xe_gt_sriov_vf_guc_versions - Minimum required and found GuC ABI versions 255 + * @gt: the &xe_gt 256 + * @wanted: pointer to the xe_uc_fw_version to be filled with the wanted version 257 + * @found: pointer to the xe_uc_fw_version to be filled with the found version 258 + * 259 + * This function is for VF use only and it can only be used after successful 260 + * version handshake with the GuC. 261 + */ 262 + void xe_gt_sriov_vf_guc_versions(struct xe_gt *gt, 263 + struct xe_uc_fw_version *wanted, 264 + struct xe_uc_fw_version *found) 265 + { 266 + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 267 + xe_gt_assert(gt, gt->sriov.vf.guc_version.major); 268 + 269 + if (wanted) 270 + *wanted = gt->sriov.vf.wanted_guc_version; 271 + 272 + if (found) 273 + *found = gt->sriov.vf.guc_version; 269 274 } 270 275 271 276 static int guc_action_vf_notify_resfix_done(struct xe_guc *guc) ··· 456 415 xe_gt_sriov_dbg_verbose(gt, "GGTT %#llx-%#llx = %lluK\n", 457 416 start, start + size - 1, size / SZ_1K); 458 417 418 + config->ggtt_shift = start - (s64)config->ggtt_base; 459 419 config->ggtt_base = start; 460 420 config->ggtt_size = size; 461 421 ··· 602 560 return gt->sriov.vf.self_config.lmem_size; 603 561 } 604 562 605 - static struct xe_ggtt_node * 606 - vf_balloon_ggtt_node(struct xe_ggtt *ggtt, u64 start, u64 end) 607 - { 608 - struct xe_ggtt_node *node; 609 - int err; 610 - 611 - node = xe_ggtt_node_init(ggtt); 612 - if (IS_ERR(node)) 613 - return node; 614 - 615 - err = xe_ggtt_node_insert_balloon(node, start, end); 616 - if (err) { 617 - xe_ggtt_node_fini(node); 618 - return ERR_PTR(err); 619 - } 620 - 621 - return node; 622 - } 623 - 624 - static int vf_balloon_ggtt(struct xe_gt *gt) 625 - { 626 - struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config; 627 - struct xe_tile *tile = gt_to_tile(gt); 628 - struct xe_ggtt *ggtt = tile->mem.ggtt; 629 - struct xe_device *xe = gt_to_xe(gt); 630 - u64 start, end; 631 - 632 - xe_gt_assert(gt, IS_SRIOV_VF(xe)); 633 - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); 634 - 635 - if (!config->ggtt_size) 636 - return -ENODATA; 637 - 638 - /* 639 - * VF can only use part of the GGTT as allocated by the PF: 640 - * 641 - * WOPCM GUC_GGTT_TOP 642 - * |<------------ Total GGTT size ------------------>| 643 - * 644 - * VF GGTT base -->|<- size ->| 645 - * 646 - * +--------------------+----------+-----------------+ 647 - * |////////////////////| block |\\\\\\\\\\\\\\\\\| 648 - * +--------------------+----------+-----------------+ 649 - * 650 - * |<--- balloon[0] --->|<-- VF -->|<-- balloon[1] ->| 651 - */ 652 - 653 - start = xe_wopcm_size(xe); 654 - end = config->ggtt_base; 655 - if (end != start) { 656 - tile->sriov.vf.ggtt_balloon[0] = vf_balloon_ggtt_node(ggtt, start, end); 657 - if (IS_ERR(tile->sriov.vf.ggtt_balloon[0])) 658 - return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]); 659 - } 660 - 661 - start = config->ggtt_base + config->ggtt_size; 662 - end = GUC_GGTT_TOP; 663 - if (end != start) { 664 - tile->sriov.vf.ggtt_balloon[1] = vf_balloon_ggtt_node(ggtt, start, end); 665 - if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) { 666 - xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]); 667 - return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]); 668 - } 669 - } 670 - 671 - return 0; 672 - } 673 - 674 - static void deballoon_ggtt(struct drm_device *drm, void *arg) 675 - { 676 - struct xe_tile *tile = arg; 677 - 678 - xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); 679 - xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[1]); 680 - xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]); 681 - } 682 - 683 563 /** 684 - * xe_gt_sriov_vf_prepare_ggtt - Prepare a VF's GGTT configuration. 564 + * xe_gt_sriov_vf_ggtt - VF GGTT configuration. 685 565 * @gt: the &xe_gt 686 566 * 687 567 * This function is for VF use only. 688 568 * 689 - * Return: 0 on success or a negative error code on failure. 569 + * Return: size of the GGTT assigned to VF. 690 570 */ 691 - int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt) 571 + u64 xe_gt_sriov_vf_ggtt(struct xe_gt *gt) 692 572 { 693 - struct xe_tile *tile = gt_to_tile(gt); 694 - struct xe_device *xe = tile_to_xe(tile); 695 - int err; 573 + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 574 + xe_gt_assert(gt, gt->sriov.vf.guc_version.major); 575 + xe_gt_assert(gt, gt->sriov.vf.self_config.ggtt_size); 696 576 697 - if (xe_gt_is_media_type(gt)) 698 - return 0; 577 + return gt->sriov.vf.self_config.ggtt_size; 578 + } 699 579 700 - err = vf_balloon_ggtt(gt); 701 - if (err) 702 - return err; 580 + /** 581 + * xe_gt_sriov_vf_ggtt_base - VF GGTT base offset. 582 + * @gt: the &xe_gt 583 + * 584 + * This function is for VF use only. 585 + * 586 + * Return: base offset of the GGTT assigned to VF. 587 + */ 588 + u64 xe_gt_sriov_vf_ggtt_base(struct xe_gt *gt) 589 + { 590 + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 591 + xe_gt_assert(gt, gt->sriov.vf.guc_version.major); 592 + xe_gt_assert(gt, gt->sriov.vf.self_config.ggtt_size); 703 593 704 - return drmm_add_action_or_reset(&xe->drm, deballoon_ggtt, tile); 594 + return gt->sriov.vf.self_config.ggtt_base; 595 + } 596 + 597 + /** 598 + * xe_gt_sriov_vf_ggtt_shift - Return shift in GGTT range due to VF migration 599 + * @gt: the &xe_gt struct instance 600 + * 601 + * This function is for VF use only. 602 + * 603 + * Return: The shift value; could be negative 604 + */ 605 + s64 xe_gt_sriov_vf_ggtt_shift(struct xe_gt *gt) 606 + { 607 + struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config; 608 + 609 + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 610 + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); 611 + 612 + return config->ggtt_shift; 705 613 } 706 614 707 615 static int relay_action_handshake(struct xe_gt *gt, u32 *major, u32 *minor) ··· 1035 1043 string_get_size(config->ggtt_size, 1, STRING_UNITS_2, buf, sizeof(buf)); 1036 1044 drm_printf(p, "GGTT size:\t%llu (%s)\n", config->ggtt_size, buf); 1037 1045 1046 + drm_printf(p, "GGTT shift on last restore:\t%lld\n", config->ggtt_shift); 1047 + 1038 1048 if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) { 1039 1049 string_get_size(config->lmem_size, 1, STRING_UNITS_2, buf, sizeof(buf)); 1040 1050 drm_printf(p, "LMEM size:\t%llu (%s)\n", config->lmem_size, buf); ··· 1073 1079 */ 1074 1080 void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) 1075 1081 { 1076 - struct xe_gt_sriov_vf_guc_version *guc_version = &gt->sriov.vf.guc_version; 1082 + struct xe_uc_fw_version *guc_version = &gt->sriov.vf.guc_version; 1083 + struct xe_uc_fw_version *wanted = &gt->sriov.vf.wanted_guc_version; 1077 1084 struct xe_gt_sriov_vf_relay_version *pf_version = &gt->sriov.vf.pf_version; 1078 - u32 branch, major, minor; 1085 + struct xe_uc_fw_version ver; 1079 1086 1080 1087 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 1081 1088 1082 1089 drm_printf(p, "GuC ABI:\n"); 1083 1090 1084 - vf_minimum_guc_version(gt, &branch, &major, &minor); 1085 - drm_printf(p, "\tbase:\t%u.%u.%u.*\n", branch, major, minor); 1091 + vf_minimum_guc_version(gt, &ver); 1092 + drm_printf(p, "\tbase:\t%u.%u.%u.*\n", ver.branch, ver.major, ver.minor); 1086 1093 1087 - vf_wanted_guc_version(gt, &branch, &major, &minor); 1088 - drm_printf(p, "\twanted:\t%u.%u.%u.*\n", branch, major, minor); 1094 + drm_printf(p, "\twanted:\t%u.%u.%u.*\n", 1095 + wanted->branch, wanted->major, wanted->minor); 1089 1096 1090 1097 drm_printf(p, "\thandshake:\t%u.%u.%u.%u\n", 1091 1098 guc_version->branch, guc_version->major,

+8 -1

drivers/gpu/drm/xe/xe_gt_sriov_vf.h

··· 11 11 struct drm_printer; 12 12 struct xe_gt; 13 13 struct xe_reg; 14 + struct xe_uc_fw_version; 14 15 15 16 int xe_gt_sriov_vf_reset(struct xe_gt *gt); 16 17 int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt); 18 + void xe_gt_sriov_vf_guc_versions(struct xe_gt *gt, 19 + struct xe_uc_fw_version *wanted, 20 + struct xe_uc_fw_version *found); 17 21 int xe_gt_sriov_vf_query_config(struct xe_gt *gt); 18 22 int xe_gt_sriov_vf_connect(struct xe_gt *gt); 19 23 int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt); 20 - int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt); 21 24 int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt); 22 25 void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt); 23 26 24 27 u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt); 25 28 u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt); 26 29 u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt); 30 + u64 xe_gt_sriov_vf_ggtt(struct xe_gt *gt); 31 + u64 xe_gt_sriov_vf_ggtt_base(struct xe_gt *gt); 32 + s64 xe_gt_sriov_vf_ggtt_shift(struct xe_gt *gt); 33 + 27 34 u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg); 28 35 void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val); 29 36

+6 -15

drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h

··· 7 7 #define _XE_GT_SRIOV_VF_TYPES_H_ 8 8 9 9 #include <linux/types.h> 10 - 11 - /** 12 - * struct xe_gt_sriov_vf_guc_version - GuC ABI version details. 13 - */ 14 - struct xe_gt_sriov_vf_guc_version { 15 - /** @branch: branch version. */ 16 - u8 branch; 17 - /** @major: major version. */ 18 - u8 major; 19 - /** @minor: minor version. */ 20 - u8 minor; 21 - /** @patch: patch version. */ 22 - u8 patch; 23 - }; 10 + #include "xe_uc_fw_types.h" 24 11 25 12 /** 26 13 * struct xe_gt_sriov_vf_relay_version - PF ABI version details. ··· 27 40 u64 ggtt_base; 28 41 /** @ggtt_size: assigned size of the GGTT region. */ 29 42 u64 ggtt_size; 43 + /** @ggtt_shift: difference in ggtt_base on last migration */ 44 + s64 ggtt_shift; 30 45 /** @lmem_size: assigned size of the LMEM. */ 31 46 u64 lmem_size; 32 47 /** @num_ctxs: assigned number of GuC submission context IDs. */ ··· 60 71 * struct xe_gt_sriov_vf - GT level VF virtualization data. 61 72 */ 62 73 struct xe_gt_sriov_vf { 74 + /** @wanted_guc_version: minimum wanted GuC ABI version. */ 75 + struct xe_uc_fw_version wanted_guc_version; 63 76 /** @guc_version: negotiated GuC ABI version. */ 64 - struct xe_gt_sriov_vf_guc_version guc_version; 77 + struct xe_uc_fw_version guc_version; 65 78 /** @self_config: resource configurations. */ 66 79 struct xe_gt_sriov_vf_selfconfig self_config; 67 80 /** @pf_version: negotiated VF/PF ABI version. */

+8 -24

drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c

··· 138 138 int pending_seqno; 139 139 140 140 /* 141 + * we can get here before the CTs are even initialized if we're wedging 142 + * very early, in which case there are not going to be any pending 143 + * fences so we can bail immediately. 144 + */ 145 + if (!xe_guc_ct_initialized(&gt->uc.guc.ct)) 146 + return; 147 + 148 + /* 141 149 * CT channel is already disabled at this point. No new TLB requests can 142 150 * appear. 143 151 */ ··· 446 438 return; 447 439 448 440 xe_gt_tlb_invalidation_fence_wait(&fence); 449 - } 450 - 451 - /** 452 - * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA 453 - * @gt: GT structure 454 - * @fence: invalidation fence which will be signal on TLB invalidation 455 - * completion, can be NULL 456 - * @vma: VMA to invalidate 457 - * 458 - * Issue a range based TLB invalidation if supported, if not fallback to a full 459 - * TLB invalidation. Completion of TLB is asynchronous and caller can use 460 - * the invalidation fence to wait for completion. 461 - * 462 - * Return: Negative error code on error, 0 on success 463 - */ 464 - int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, 465 - struct xe_gt_tlb_invalidation_fence *fence, 466 - struct xe_vma *vma) 467 - { 468 - xe_gt_assert(gt, vma); 469 - 470 - return xe_gt_tlb_invalidation_range(gt, fence, xe_vma_start(vma), 471 - xe_vma_end(vma), 472 - xe_vma_vm(vma)->usm.asid); 473 441 } 474 442 475 443 /**

-3

drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h

··· 19 19 20 20 void xe_gt_tlb_invalidation_reset(struct xe_gt *gt); 21 21 int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt); 22 - int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, 23 - struct xe_gt_tlb_invalidation_fence *fence, 24 - struct xe_vma *vma); 25 22 void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm); 26 23 int xe_gt_tlb_invalidation_range(struct xe_gt *gt, 27 24 struct xe_gt_tlb_invalidation_fence *fence,

+23 -21

drivers/gpu/drm/xe/xe_gt_topology.c

··· 12 12 #include "regs/xe_gt_regs.h" 13 13 #include "xe_assert.h" 14 14 #include "xe_gt.h" 15 + #include "xe_gt_printk.h" 15 16 #include "xe_mmio.h" 16 17 #include "xe_wa.h" 17 18 18 - static void 19 - load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...) 19 + static void load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, 20 + const struct xe_reg regs[]) 20 21 { 21 - va_list argp; 22 22 u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {}; 23 23 int i; 24 24 25 - if (drm_WARN_ON(&gt_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS)) 26 - numregs = XE_MAX_DSS_FUSE_REGS; 25 + xe_gt_assert(gt, numregs <= ARRAY_SIZE(fuse_val)); 27 26 28 - va_start(argp, numregs); 29 27 for (i = 0; i < numregs; i++) 30 - fuse_val[i] = xe_mmio_read32(&gt->mmio, va_arg(argp, struct xe_reg)); 31 - va_end(argp); 28 + fuse_val[i] = xe_mmio_read32(&gt->mmio, regs[i]); 32 29 33 30 bitmap_from_arr32(mask, fuse_val, numregs * 32); 34 31 } ··· 215 218 void 216 219 xe_gt_topology_init(struct xe_gt *gt) 217 220 { 221 + static const struct xe_reg geometry_regs[] = { 222 + XELP_GT_GEOMETRY_DSS_ENABLE, 223 + XE2_GT_GEOMETRY_DSS_1, 224 + XE2_GT_GEOMETRY_DSS_2, 225 + }; 226 + static const struct xe_reg compute_regs[] = { 227 + XEHP_GT_COMPUTE_DSS_ENABLE, 228 + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT, 229 + XE2_GT_COMPUTE_DSS_2, 230 + }; 231 + int num_geometry_regs, num_compute_regs; 218 232 struct xe_device *xe = gt_to_xe(gt); 219 233 struct drm_printer p; 220 - int num_geometry_regs, num_compute_regs; 221 234 222 235 get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs); 223 236 ··· 235 228 * Register counts returned shouldn't exceed the number of registers 236 229 * passed as parameters below. 237 230 */ 238 - drm_WARN_ON(&xe->drm, num_geometry_regs > 3); 239 - drm_WARN_ON(&xe->drm, num_compute_regs > 3); 231 + xe_gt_assert(gt, num_geometry_regs <= ARRAY_SIZE(geometry_regs)); 232 + xe_gt_assert(gt, num_compute_regs <= ARRAY_SIZE(compute_regs)); 240 233 241 234 load_dss_mask(gt, gt->fuse_topo.g_dss_mask, 242 - num_geometry_regs, 243 - XELP_GT_GEOMETRY_DSS_ENABLE, 244 - XE2_GT_GEOMETRY_DSS_1, 245 - XE2_GT_GEOMETRY_DSS_2); 246 - load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs, 247 - XEHP_GT_COMPUTE_DSS_ENABLE, 248 - XEHPC_GT_COMPUTE_DSS_ENABLE_EXT, 249 - XE2_GT_COMPUTE_DSS_2); 235 + num_geometry_regs, geometry_regs); 236 + load_dss_mask(gt, gt->fuse_topo.c_dss_mask, 237 + num_compute_regs, compute_regs); 238 + 250 239 load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, &gt->fuse_topo.eu_type); 251 240 load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask); 252 241 253 - p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology"); 254 - 242 + p = xe_gt_dbg_printer(gt); 255 243 xe_gt_topology_dump(gt, &p); 256 244 } 257 245

+11 -8

drivers/gpu/drm/xe/xe_guc.c

··· 710 710 if (err) 711 711 return err; 712 712 713 + err = xe_guc_buf_cache_init(&guc->buf); 714 + if (err) 715 + return err; 716 + 713 717 /* XXX xe_guc_db_mgr_init not needed for now */ 714 718 715 719 return 0; ··· 1102 1098 struct xe_gt *gt = guc_to_gt(guc); 1103 1099 int ret; 1104 1100 1105 - ret = xe_gt_sriov_vf_bootstrap(gt); 1106 - if (ret) 1107 - return ret; 1108 - 1109 - ret = xe_gt_sriov_vf_query_config(gt); 1110 - if (ret) 1111 - return ret; 1112 - 1113 1101 ret = xe_guc_hwconfig_init(guc); 1114 1102 if (ret) 1115 1103 return ret; ··· 1281 1285 struct xe_reg reply_reg = xe_gt_is_media_type(gt) ? 1282 1286 MED_VF_SW_FLAG(0) : VF_SW_FLAG(0); 1283 1287 const u32 LAST_INDEX = VF_SW_FLAG_COUNT - 1; 1288 + bool lost = false; 1284 1289 int ret; 1285 1290 int i; 1286 1291 ··· 1315 1318 FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC), 1316 1319 50000, &reply, false); 1317 1320 if (ret) { 1321 + /* scratch registers might be cleared during FLR, try once more */ 1322 + if (!reply && !lost) { 1323 + xe_gt_dbg(gt, "GuC mmio request %#x: lost, trying again\n", request[0]); 1324 + lost = true; 1325 + goto retry; 1326 + } 1318 1327 timeout: 1319 1328 xe_gt_err(gt, "GuC mmio request %#x: no reply %#x\n", 1320 1329 request[0], reply);

+13 -14

drivers/gpu/drm/xe/xe_guc_ads.c

··· 20 20 #include "xe_gt_ccs_mode.h" 21 21 #include "xe_gt_printk.h" 22 22 #include "xe_guc.h" 23 + #include "xe_guc_buf.h" 23 24 #include "xe_guc_capture.h" 24 25 #include "xe_guc_ct.h" 25 26 #include "xe_hw_engine.h" ··· 995 994 return xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); 996 995 } 997 996 997 + static int guc_ads_update_policies(struct xe_guc_ads *ads, const struct guc_policies *policies) 998 + { 999 + CLASS(xe_guc_buf_from_data, buf)(&ads_to_guc(ads)->buf, policies, sizeof(*policies)); 1000 + 1001 + if (!xe_guc_buf_is_valid(buf)) 1002 + return -ENOBUFS; 1003 + 1004 + return guc_ads_action_update_policies(ads, xe_guc_buf_flush(buf)); 1005 + } 1006 + 998 1007 /** 999 1008 * xe_guc_ads_scheduler_policy_toggle_reset - Toggle reset policy 1000 1009 * @ads: Additional data structures object ··· 1016 1005 int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads) 1017 1006 { 1018 1007 struct xe_device *xe = ads_to_xe(ads); 1019 - struct xe_gt *gt = ads_to_gt(ads); 1020 - struct xe_tile *tile = gt_to_tile(gt); 1021 1008 struct guc_policies *policies; 1022 - struct xe_bo *bo; 1023 - int ret = 0; 1009 + int ret; 1024 1010 1025 1011 policies = kmalloc(sizeof(*policies), GFP_KERNEL); 1026 1012 if (!policies) ··· 1031 1023 else 1032 1024 policies->global_flags &= ~GLOBAL_POLICY_DISABLE_ENGINE_RESET; 1033 1025 1034 - bo = xe_managed_bo_create_from_data(xe, tile, policies, sizeof(struct guc_policies), 1035 - XE_BO_FLAG_VRAM_IF_DGFX(tile) | 1036 - XE_BO_FLAG_GGTT); 1037 - if (IS_ERR(bo)) { 1038 - ret = PTR_ERR(bo); 1039 - goto out; 1040 - } 1041 - 1042 - ret = guc_ads_action_update_policies(ads, xe_bo_ggtt_addr(bo)); 1043 - out: 1026 + ret = guc_ads_update_policies(ads, policies); 1044 1027 kfree(policies); 1045 1028 return ret; 1046 1029 }

-4

drivers/gpu/drm/xe/xe_guc_buf.c

··· 37 37 struct xe_gt *gt = cache_to_gt(cache); 38 38 struct xe_sa_manager *sam; 39 39 40 - /* XXX: currently it's useful only for the PF actions */ 41 - if (!IS_SRIOV_PF(gt_to_xe(gt))) 42 - return 0; 43 - 44 40 sam = __xe_sa_bo_manager_init(gt_to_tile(gt), SZ_8K, 0, sizeof(u32)); 45 41 if (IS_ERR(sam)) 46 42 return PTR_ERR(sam);

+308 -22

drivers/gpu/drm/xe/xe_guc_ct.c

··· 25 25 #include "xe_gt_printk.h" 26 26 #include "xe_gt_sriov_pf_control.h" 27 27 #include "xe_gt_sriov_pf_monitor.h" 28 + #include "xe_gt_sriov_printk.h" 28 29 #include "xe_gt_tlb_invalidation.h" 29 30 #include "xe_guc.h" 30 31 #include "xe_guc_log.h" ··· 84 83 bool fail; 85 84 bool done; 86 85 }; 86 + 87 + #define make_u64(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo))) 87 88 88 89 static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer) 89 90 { ··· 517 514 */ 518 515 void xe_guc_ct_stop(struct xe_guc_ct *ct) 519 516 { 517 + if (!xe_guc_ct_initialized(ct)) 518 + return; 519 + 520 520 xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED); 521 521 stop_g2h_handler(ct); 522 522 } ··· 631 625 spin_unlock_irq(&ct->fast_lock); 632 626 } 633 627 628 + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) 629 + static void fast_req_track(struct xe_guc_ct *ct, u16 fence, u16 action) 630 + { 631 + unsigned int slot = fence % ARRAY_SIZE(ct->fast_req); 632 + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) 633 + unsigned long entries[SZ_32]; 634 + unsigned int n; 635 + 636 + n = stack_trace_save(entries, ARRAY_SIZE(entries), 1); 637 + 638 + /* May be called under spinlock, so avoid sleeping */ 639 + ct->fast_req[slot].stack = stack_depot_save(entries, n, GFP_NOWAIT); 640 + #endif 641 + ct->fast_req[slot].fence = fence; 642 + ct->fast_req[slot].action = action; 643 + } 644 + #else 645 + static void fast_req_track(struct xe_guc_ct *ct, u16 fence, u16 action) 646 + { 647 + } 648 + #endif 649 + 650 + /* 651 + * The CT protocol accepts a 16 bits fence. This field is fully owned by the 652 + * driver, the GuC will just copy it to the reply message. Since we need to 653 + * be able to distinguish between replies to REQUEST and FAST_REQUEST messages, 654 + * we use one bit of the seqno as an indicator for that and a rolling counter 655 + * for the remaining 15 bits. 656 + */ 657 + #define CT_SEQNO_MASK GENMASK(14, 0) 658 + #define CT_SEQNO_UNTRACKED BIT(15) 659 + static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence) 660 + { 661 + u32 seqno = ct->fence_seqno++ & CT_SEQNO_MASK; 662 + 663 + if (!is_g2h_fence) 664 + seqno |= CT_SEQNO_UNTRACKED; 665 + 666 + return seqno; 667 + } 668 + 634 669 #define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW HxG header */ 635 670 636 671 static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, ··· 748 701 FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | 749 702 GUC_HXG_EVENT_MSG_0_DATA0, action[0]); 750 703 } else { 704 + fast_req_track(ct, ct_fence_value, 705 + FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, action[0])); 706 + 751 707 cmd[1] = 752 708 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_FAST_REQUEST) | 753 709 FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | ··· 783 733 return -EPIPE; 784 734 } 785 735 786 - /* 787 - * The CT protocol accepts a 16 bits fence. This field is fully owned by the 788 - * driver, the GuC will just copy it to the reply message. Since we need to 789 - * be able to distinguish between replies to REQUEST and FAST_REQUEST messages, 790 - * we use one bit of the seqno as an indicator for that and a rolling counter 791 - * for the remaining 15 bits. 792 - */ 793 - #define CT_SEQNO_MASK GENMASK(14, 0) 794 - #define CT_SEQNO_UNTRACKED BIT(15) 795 - static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence) 796 - { 797 - u32 seqno = ct->fence_seqno++ & CT_SEQNO_MASK; 798 - 799 - if (!is_g2h_fence) 800 - seqno |= CT_SEQNO_UNTRACKED; 801 - 802 - return seqno; 803 - } 804 - 805 736 static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, 806 737 u32 len, u32 g2h_len, u32 num_g2h, 807 738 struct g2h_fence *g2h_fence) ··· 791 760 u16 seqno; 792 761 int ret; 793 762 794 - xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); 763 + xe_gt_assert(gt, xe_guc_ct_initialized(ct)); 795 764 xe_gt_assert(gt, !g2h_len || !g2h_fence); 796 765 xe_gt_assert(gt, !num_g2h || !g2h_fence); 797 766 xe_gt_assert(gt, !g2h_len || num_g2h); ··· 1174 1143 return 0; 1175 1144 } 1176 1145 1146 + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) 1147 + static void fast_req_report(struct xe_guc_ct *ct, u16 fence) 1148 + { 1149 + u16 fence_min = U16_MAX, fence_max = 0; 1150 + struct xe_gt *gt = ct_to_gt(ct); 1151 + bool found = false; 1152 + unsigned int n; 1153 + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) 1154 + char *buf; 1155 + #endif 1156 + 1157 + lockdep_assert_held(&ct->lock); 1158 + 1159 + for (n = 0; n < ARRAY_SIZE(ct->fast_req); n++) { 1160 + if (ct->fast_req[n].fence < fence_min) 1161 + fence_min = ct->fast_req[n].fence; 1162 + if (ct->fast_req[n].fence > fence_max) 1163 + fence_max = ct->fast_req[n].fence; 1164 + 1165 + if (ct->fast_req[n].fence != fence) 1166 + continue; 1167 + found = true; 1168 + 1169 + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) 1170 + buf = kmalloc(SZ_4K, GFP_NOWAIT); 1171 + if (buf && stack_depot_snprint(ct->fast_req[n].stack, buf, SZ_4K, 0)) 1172 + xe_gt_err(gt, "Fence 0x%x was used by action %#04x sent at:\n%s", 1173 + fence, ct->fast_req[n].action, buf); 1174 + else 1175 + xe_gt_err(gt, "Fence 0x%x was used by action %#04x [failed to retrieve stack]\n", 1176 + fence, ct->fast_req[n].action); 1177 + kfree(buf); 1178 + #else 1179 + xe_gt_err(gt, "Fence 0x%x was used by action %#04x\n", 1180 + fence, ct->fast_req[n].action); 1181 + #endif 1182 + break; 1183 + } 1184 + 1185 + if (!found) 1186 + xe_gt_warn(gt, "Fence 0x%x not found - tracking buffer wrapped? [range = 0x%x -> 0x%x, next = 0x%X]\n", 1187 + fence, fence_min, fence_max, ct->fence_seqno); 1188 + } 1189 + #else 1190 + static void fast_req_report(struct xe_guc_ct *ct, u16 fence) 1191 + { 1192 + } 1193 + #endif 1194 + 1177 1195 static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) 1178 1196 { 1179 1197 struct xe_gt *gt = ct_to_gt(ct); ··· 1251 1171 else 1252 1172 xe_gt_err(gt, "unexpected response %u for FAST_REQ H2G fence 0x%x!\n", 1253 1173 type, fence); 1174 + 1175 + fast_req_report(ct, fence); 1176 + 1254 1177 CT_DEAD(ct, NULL, PARSE_G2H_RESPONSE); 1255 1178 1256 1179 return -EPROTO; ··· 1427 1344 u32 action; 1428 1345 u32 *hxg; 1429 1346 1430 - xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); 1347 + xe_gt_assert(gt, xe_guc_ct_initialized(ct)); 1431 1348 lockdep_assert_held(&ct->fast_lock); 1432 1349 1433 1350 if (ct->state == XE_GUC_CT_STATE_DISABLED) ··· 1707 1624 receive_g2h(ct); 1708 1625 } 1709 1626 1627 + static void xe_fixup_u64_in_cmds(struct xe_device *xe, struct iosys_map *cmds, 1628 + u32 size, u32 idx, s64 shift) 1629 + { 1630 + u32 hi, lo; 1631 + u64 offset; 1632 + 1633 + lo = xe_map_rd_ring_u32(xe, cmds, idx, size); 1634 + hi = xe_map_rd_ring_u32(xe, cmds, idx + 1, size); 1635 + offset = make_u64(hi, lo); 1636 + offset += shift; 1637 + lo = lower_32_bits(offset); 1638 + hi = upper_32_bits(offset); 1639 + xe_map_wr_ring_u32(xe, cmds, idx, size, lo); 1640 + xe_map_wr_ring_u32(xe, cmds, idx + 1, size, hi); 1641 + } 1642 + 1643 + /* 1644 + * Shift any GGTT addresses within a single message left within CTB from 1645 + * before post-migration recovery. 1646 + * @ct: pointer to CT struct of the target GuC 1647 + * @cmds: iomap buffer containing CT messages 1648 + * @head: start of the target message within the buffer 1649 + * @len: length of the target message 1650 + * @size: size of the commands buffer 1651 + * @shift: the address shift to be added to each GGTT reference 1652 + * Return: true if the message was fixed or needed no fixups, false on failure 1653 + */ 1654 + static bool ct_fixup_ggtt_in_message(struct xe_guc_ct *ct, 1655 + struct iosys_map *cmds, u32 head, 1656 + u32 len, u32 size, s64 shift) 1657 + { 1658 + struct xe_gt *gt = ct_to_gt(ct); 1659 + struct xe_device *xe = ct_to_xe(ct); 1660 + u32 msg[GUC_HXG_MSG_MIN_LEN]; 1661 + u32 action, i, n; 1662 + 1663 + xe_gt_assert(gt, len >= GUC_HXG_MSG_MIN_LEN); 1664 + 1665 + msg[0] = xe_map_rd_ring_u32(xe, cmds, head, size); 1666 + action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]); 1667 + 1668 + xe_gt_sriov_dbg_verbose(gt, "fixing H2G %#x\n", action); 1669 + 1670 + switch (action) { 1671 + case XE_GUC_ACTION_REGISTER_CONTEXT: 1672 + if (len != XE_GUC_REGISTER_CONTEXT_MSG_LEN) 1673 + goto err_len; 1674 + xe_fixup_u64_in_cmds(xe, cmds, size, head + 1675 + XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER, 1676 + shift); 1677 + xe_fixup_u64_in_cmds(xe, cmds, size, head + 1678 + XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER, 1679 + shift); 1680 + xe_fixup_u64_in_cmds(xe, cmds, size, head + 1681 + XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR, shift); 1682 + break; 1683 + case XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC: 1684 + if (len < XE_GUC_REGISTER_CONTEXT_MULTI_LRC_MSG_MIN_LEN) 1685 + goto err_len; 1686 + n = xe_map_rd_ring_u32(xe, cmds, head + 1687 + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS, size); 1688 + if (len != XE_GUC_REGISTER_CONTEXT_MULTI_LRC_MSG_MIN_LEN + 2 * n) 1689 + goto err_len; 1690 + xe_fixup_u64_in_cmds(xe, cmds, size, head + 1691 + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER, 1692 + shift); 1693 + xe_fixup_u64_in_cmds(xe, cmds, size, head + 1694 + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER, 1695 + shift); 1696 + for (i = 0; i < n; i++) 1697 + xe_fixup_u64_in_cmds(xe, cmds, size, head + 1698 + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR 1699 + + 2 * i, shift); 1700 + break; 1701 + default: 1702 + break; 1703 + } 1704 + return true; 1705 + 1706 + err_len: 1707 + xe_gt_err(gt, "Skipped G2G %#x message fixups, unexpected length (%u)\n", action, len); 1708 + return false; 1709 + } 1710 + 1711 + /* 1712 + * Apply fixups to the next outgoing CT message within given CTB 1713 + * @ct: the &xe_guc_ct struct instance representing the target GuC 1714 + * @h2g: the &guc_ctb struct instance of the target buffer 1715 + * @shift: shift to be added to all GGTT addresses within the CTB 1716 + * @mhead: pointer to an integer storing message start position; the 1717 + * position is changed to next message before this function return 1718 + * @avail: size of the area available for parsing, that is length 1719 + * of all remaining messages stored within the CTB 1720 + * Return: size of the area available for parsing after one message 1721 + * has been parsed, that is length remaining from the updated mhead 1722 + */ 1723 + static int ct_fixup_ggtt_in_buffer(struct xe_guc_ct *ct, struct guc_ctb *h2g, 1724 + s64 shift, u32 *mhead, s32 avail) 1725 + { 1726 + struct xe_gt *gt = ct_to_gt(ct); 1727 + struct xe_device *xe = ct_to_xe(ct); 1728 + u32 msg[GUC_HXG_MSG_MIN_LEN]; 1729 + u32 size = h2g->info.size; 1730 + u32 head = *mhead; 1731 + u32 len; 1732 + 1733 + xe_gt_assert(gt, avail >= (s32)GUC_CTB_MSG_MIN_LEN); 1734 + 1735 + /* Read header */ 1736 + msg[0] = xe_map_rd_ring_u32(xe, &h2g->cmds, head, size); 1737 + len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN; 1738 + 1739 + if (unlikely(len > (u32)avail)) { 1740 + xe_gt_err(gt, "H2G channel broken on read, avail=%d, len=%d, fixups skipped\n", 1741 + avail, len); 1742 + return 0; 1743 + } 1744 + 1745 + head = (head + GUC_CTB_MSG_MIN_LEN) % size; 1746 + if (!ct_fixup_ggtt_in_message(ct, &h2g->cmds, head, msg_len_to_hxg_len(len), size, shift)) 1747 + return 0; 1748 + *mhead = (head + msg_len_to_hxg_len(len)) % size; 1749 + 1750 + return avail - len; 1751 + } 1752 + 1753 + /** 1754 + * xe_guc_ct_fixup_messages_with_ggtt - Fixup any pending H2G CTB messages 1755 + * @ct: pointer to CT struct of the target GuC 1756 + * @ggtt_shift: shift to be added to all GGTT addresses within the CTB 1757 + * 1758 + * Messages in GuC to Host CTB are owned by GuC and any fixups in them 1759 + * are made by GuC. But content of the Host to GuC CTB is owned by the 1760 + * KMD, so fixups to GGTT references in any pending messages need to be 1761 + * applied here. 1762 + * This function updates GGTT offsets in payloads of pending H2G CTB 1763 + * messages (messages which were not consumed by GuC before the VF got 1764 + * paused). 1765 + */ 1766 + void xe_guc_ct_fixup_messages_with_ggtt(struct xe_guc_ct *ct, s64 ggtt_shift) 1767 + { 1768 + struct guc_ctb *h2g = &ct->ctbs.h2g; 1769 + struct xe_guc *guc = ct_to_guc(ct); 1770 + struct xe_gt *gt = guc_to_gt(guc); 1771 + u32 head, tail, size; 1772 + s32 avail; 1773 + 1774 + if (unlikely(h2g->info.broken)) 1775 + return; 1776 + 1777 + h2g->info.head = desc_read(ct_to_xe(ct), h2g, head); 1778 + head = h2g->info.head; 1779 + tail = READ_ONCE(h2g->info.tail); 1780 + size = h2g->info.size; 1781 + 1782 + if (unlikely(head > size)) 1783 + goto corrupted; 1784 + 1785 + if (unlikely(tail >= size)) 1786 + goto corrupted; 1787 + 1788 + avail = tail - head; 1789 + 1790 + /* beware of buffer wrap case */ 1791 + if (unlikely(avail < 0)) 1792 + avail += size; 1793 + xe_gt_dbg(gt, "available %d (%u:%u:%u)\n", avail, head, tail, size); 1794 + xe_gt_assert(gt, avail >= 0); 1795 + 1796 + while (avail > 0) 1797 + avail = ct_fixup_ggtt_in_buffer(ct, h2g, ggtt_shift, &head, avail); 1798 + 1799 + return; 1800 + 1801 + corrupted: 1802 + xe_gt_err(gt, "Corrupted H2G descriptor head=%u tail=%u size=%u, fixups not applied\n", 1803 + head, tail, size); 1804 + h2g->info.broken = true; 1805 + } 1806 + 1710 1807 static struct xe_guc_ct_snapshot *guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic, 1711 1808 bool want_ctb) 1712 1809 { ··· 2033 1770 } 2034 1771 2035 1772 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) 1773 + 1774 + #ifdef CONFIG_FUNCTION_ERROR_INJECTION 1775 + /* 1776 + * This is a helper function which assists the driver in identifying if a fault 1777 + * injection test is currently active, allowing it to reduce unnecessary debug 1778 + * output. Typically, the function returns zero, but the fault injection 1779 + * framework can alter this to return an error. Since faults are injected 1780 + * through this function, it's important to ensure the compiler doesn't optimize 1781 + * it into an inline function. To avoid such optimization, the 'noinline' 1782 + * attribute is applied. Compiler optimizes the static function defined in the 1783 + * header file as an inline function. 1784 + */ 1785 + noinline int xe_is_injection_active(void) { return 0; } 1786 + ALLOW_ERROR_INJECTION(xe_is_injection_active, ERRNO); 1787 + #else 1788 + int xe_is_injection_active(void) { return 0; } 1789 + #endif 1790 + 2036 1791 static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code) 2037 1792 { 2038 1793 struct xe_guc_log_snapshot *snapshot_log; ··· 2061 1780 2062 1781 if (ctb) 2063 1782 ctb->info.broken = true; 1783 + /* 1784 + * Huge dump is getting generated when injecting error for guc CT/MMIO 1785 + * functions. So, let us suppress the dump when fault is injected. 1786 + */ 1787 + if (xe_is_injection_active()) 1788 + return; 2064 1789 2065 1790 /* Ignore further errors after the first dump until a reset */ 2066 1791 if (ct->dead.reported) ··· 2116 1829 xe_gt_err(gt, "CTB is dead for no reason!?\n"); 2117 1830 return; 2118 1831 } 2119 - 2120 1832 2121 1833 /* Can't generate a genuine core dump at this point, so just do the good bits */ 2122 1834 drm_puts(&lp, "**** Xe Device Coredump ****\n");

+7

drivers/gpu/drm/xe/xe_guc_ct.h

··· 22 22 void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot); 23 23 void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb); 24 24 25 + void xe_guc_ct_fixup_messages_with_ggtt(struct xe_guc_ct *ct, s64 ggtt_shift); 26 + 27 + static inline bool xe_guc_ct_initialized(struct xe_guc_ct *ct) 28 + { 29 + return ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED; 30 + } 31 + 25 32 static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct) 26 33 { 27 34 return ct->state == XE_GUC_CT_STATE_ENABLED;

+15

drivers/gpu/drm/xe/xe_guc_ct_types.h

··· 9 9 #include <linux/interrupt.h> 10 10 #include <linux/iosys-map.h> 11 11 #include <linux/spinlock_types.h> 12 + #include <linux/stackdepot.h> 12 13 #include <linux/wait.h> 13 14 #include <linux/xarray.h> 14 15 ··· 105 104 /** snapshot_log: copy of GuC log at point of error */ 106 105 struct xe_guc_log_snapshot *snapshot_log; 107 106 }; 107 + 108 + /** struct xe_fast_req_fence - Used to track FAST_REQ messages by fence to match error responses */ 109 + struct xe_fast_req_fence { 110 + /** @fence: sequence number sent in H2G and return in G2H error */ 111 + u16 fence; 112 + /** @action: H2G action code */ 113 + u16 action; 114 + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) 115 + /** @stack: call stack from when the H2G was sent */ 116 + depot_stack_handle_t stack; 117 + #endif 118 + }; 108 119 #endif 109 120 110 121 /** ··· 165 152 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) 166 153 /** @dead: information for debugging dead CTs */ 167 154 struct xe_dead_ct dead; 155 + /** @fast_req: history of FAST_REQ messages for matching with G2H error responses */ 156 + struct xe_fast_req_fence fast_req[SZ_32]; 168 157 #endif 169 158 }; 170 159

+1 -1

drivers/gpu/drm/xe/xe_guc_engine_activity.c

··· 124 124 static bool is_engine_activity_supported(struct xe_guc *guc) 125 125 { 126 126 struct xe_uc_fw_version *version = &guc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]; 127 - struct xe_uc_fw_version required = { 1, 14, 1 }; 127 + struct xe_uc_fw_version required = { .major = 1, .minor = 14, .patch = 1 }; 128 128 struct xe_gt *gt = guc_to_gt(guc); 129 129 130 130 if (IS_SRIOV_VF(gt_to_xe(gt))) {

+1 -1

drivers/gpu/drm/xe/xe_guc_log.h

··· 12 12 struct drm_printer; 13 13 struct xe_device; 14 14 15 - #if IS_ENABLED(CONFIG_DRM_XE_LARGE_GUC_BUFFER) 15 + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) 16 16 #define CRASH_BUFFER_SIZE SZ_1M 17 17 #define DEBUG_BUFFER_SIZE SZ_8M 18 18 #define CAPTURE_BUFFER_SIZE SZ_2M

+11 -6

drivers/gpu/drm/xe/xe_guc_pc.c

··· 51 51 52 52 #define LNL_MERT_FREQ_CAP 800 53 53 #define BMG_MERT_FREQ_CAP 2133 54 + #define BMG_MIN_FREQ 1200 54 55 55 56 #define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */ 56 57 #define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */ ··· 154 153 int ret; 155 154 156 155 ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); 157 - if (ret) 156 + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) 158 157 xe_gt_err(pc_to_gt(pc), "GuC PC reset failed: %pe\n", 159 158 ERR_PTR(ret)); 160 159 ··· 178 177 179 178 /* Blocking here to ensure the results are ready before reading them */ 180 179 ret = xe_guc_ct_send_block(ct, action, ARRAY_SIZE(action)); 181 - if (ret) 180 + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) 182 181 xe_gt_err(pc_to_gt(pc), "GuC PC query task state failed: %pe\n", 183 182 ERR_PTR(ret)); 184 183 ··· 201 200 return -EAGAIN; 202 201 203 202 ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); 204 - if (ret) 203 + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) 205 204 xe_gt_err(pc_to_gt(pc), "GuC PC set param[%u]=%u failed: %pe\n", 206 205 id, value, ERR_PTR(ret)); 207 206 ··· 223 222 return -EAGAIN; 224 223 225 224 ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); 226 - if (ret) 225 + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) 227 226 xe_gt_err(pc_to_gt(pc), "GuC PC unset param failed: %pe", 228 227 ERR_PTR(ret)); 229 228 ··· 240 239 int ret; 241 240 242 241 ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); 243 - if (ret) 242 + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) 244 243 xe_gt_err(pc_to_gt(pc), "GuC RC enable mode=%u failed: %pe\n", 245 244 mode, ERR_PTR(ret)); 246 245 return ret; ··· 818 817 819 818 static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) 820 819 { 820 + struct xe_tile *tile = gt_to_tile(pc_to_gt(pc)); 821 821 int ret; 822 822 823 823 lockdep_assert_held(&pc->freq_lock); ··· 844 842 */ 845 843 if (pc_get_min_freq(pc) > pc->rp0_freq) 846 844 ret = pc_set_min_freq(pc, pc->rp0_freq); 845 + 846 + if (XE_WA(tile->primary_gt, 14022085890)) 847 + ret = pc_set_min_freq(pc, max(BMG_MIN_FREQ, pc_get_min_freq(pc))); 847 848 848 849 out: 849 850 return ret; ··· 1073 1068 goto out; 1074 1069 } 1075 1070 1076 - memset(pc->bo->vmap.vaddr, 0, size); 1071 + xe_map_memset(xe, &pc->bo->vmap, 0, 0, size); 1077 1072 slpc_shared_data_write(pc, header.size, size); 1078 1073 1079 1074 earlier = ktime_get();

+21 -4

drivers/gpu/drm/xe/xe_guc_submit.c

··· 498 498 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); 499 499 } 500 500 501 + /* explicitly checks some fields that we might fixup later */ 502 + xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 503 + action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]); 504 + xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 505 + action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]); 506 + xe_gt_assert(guc_to_gt(guc), q->width == 507 + action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]); 508 + xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 509 + action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]); 501 510 xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE); 502 511 #undef MAX_MLRC_REG_SIZE 503 512 ··· 530 521 info->hwlrca_lo, 531 522 info->hwlrca_hi, 532 523 }; 524 + 525 + /* explicitly checks some fields that we might fixup later */ 526 + xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 527 + action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]); 528 + xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 529 + action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]); 530 + xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 531 + action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]); 533 532 534 533 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 535 534 } ··· 987 970 */ 988 971 xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); 989 972 990 - if (ctx_timestamp < ctx_job_timestamp) 991 - diff = ctx_timestamp + U32_MAX - ctx_job_timestamp; 992 - else 993 - diff = ctx_timestamp - ctx_job_timestamp; 973 + diff = ctx_timestamp - ctx_job_timestamp; 994 974 995 975 /* 996 976 * Ensure timeout is within 5% to account for an GuC scheduling latency ··· 1775 1761 int xe_guc_submit_reset_prepare(struct xe_guc *guc) 1776 1762 { 1777 1763 int ret; 1764 + 1765 + if (!guc->submission_state.initialized) 1766 + return 0; 1778 1767 1779 1768 /* 1780 1769 * Using an atomic here rather than submission_state.lock as this

+26 -11

drivers/gpu/drm/xe/xe_hw_engine.c

··· 17 17 #include "regs/xe_irq_regs.h" 18 18 #include "xe_assert.h" 19 19 #include "xe_bo.h" 20 + #include "xe_configfs.h" 20 21 #include "xe_device.h" 21 22 #include "xe_execlist.h" 22 23 #include "xe_force_wake.h" ··· 694 693 695 694 if (!(BIT(j) & vdbox_mask)) { 696 695 gt->info.engine_mask &= ~BIT(i); 697 - drm_info(&xe->drm, "vcs%u fused off\n", j); 696 + xe_gt_info(gt, "vcs%u fused off\n", j); 698 697 } 699 698 } 700 699 ··· 704 703 705 704 if (!(BIT(j) & vebox_mask)) { 706 705 gt->info.engine_mask &= ~BIT(i); 707 - drm_info(&xe->drm, "vecs%u fused off\n", j); 706 + xe_gt_info(gt, "vecs%u fused off\n", j); 708 707 } 709 708 } 710 709 } ··· 729 728 730 729 if (!(BIT(j / 2) & bcs_mask)) { 731 730 gt->info.engine_mask &= ~BIT(i); 732 - drm_info(&xe->drm, "bcs%u fused off\n", j); 731 + xe_gt_info(gt, "bcs%u fused off\n", j); 733 732 } 734 733 } 735 734 } 736 735 737 736 static void read_compute_fuses_from_dss(struct xe_gt *gt) 738 737 { 739 - struct xe_device *xe = gt_to_xe(gt); 740 - 741 738 /* 742 739 * CCS fusing based on DSS masks only applies to platforms that can 743 740 * have more than one CCS. ··· 754 755 755 756 if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) { 756 757 gt->info.engine_mask &= ~BIT(i); 757 - drm_info(&xe->drm, "ccs%u fused off\n", j); 758 + xe_gt_info(gt, "ccs%u fused off\n", j); 758 759 } 759 760 } 760 761 } 761 762 762 763 static void read_compute_fuses_from_reg(struct xe_gt *gt) 763 764 { 764 - struct xe_device *xe = gt_to_xe(gt); 765 765 u32 ccs_mask; 766 766 767 767 ccs_mask = xe_mmio_read32(&gt->mmio, XEHP_FUSE4); ··· 772 774 773 775 if ((ccs_mask & BIT(j)) == 0) { 774 776 gt->info.engine_mask &= ~BIT(i); 775 - drm_info(&xe->drm, "ccs%u fused off\n", j); 777 + xe_gt_info(gt, "ccs%u fused off\n", j); 776 778 } 777 779 } 778 780 } ··· 787 789 788 790 static void check_gsc_availability(struct xe_gt *gt) 789 791 { 790 - struct xe_device *xe = gt_to_xe(gt); 791 - 792 792 if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0))) 793 793 return; 794 794 ··· 802 806 xe_mmio_write32(&gt->mmio, GUNIT_GSC_INTR_ENABLE, 0); 803 807 xe_mmio_write32(&gt->mmio, GUNIT_GSC_INTR_MASK, ~0); 804 808 805 - drm_dbg(&xe->drm, "GSC FW not used, disabling gsccs\n"); 809 + xe_gt_dbg(gt, "GSC FW not used, disabling gsccs\n"); 810 + } 811 + } 812 + 813 + static void check_sw_disable(struct xe_gt *gt) 814 + { 815 + struct xe_device *xe = gt_to_xe(gt); 816 + u64 sw_allowed = xe_configfs_get_engines_allowed(to_pci_dev(xe->drm.dev)); 817 + enum xe_hw_engine_id id; 818 + 819 + for (id = 0; id < XE_NUM_HW_ENGINES; ++id) { 820 + if (!(gt->info.engine_mask & BIT(id))) 821 + continue; 822 + 823 + if (!(sw_allowed & BIT(id))) { 824 + gt->info.engine_mask &= ~BIT(id); 825 + xe_gt_info(gt, "%s disabled via configfs\n", 826 + engine_infos[id].name); 827 + } 806 828 } 807 829 } 808 830 ··· 832 818 read_copy_fuses(gt); 833 819 read_compute_fuses(gt); 834 820 check_gsc_availability(gt); 821 + check_sw_disable(gt); 835 822 836 823 BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN); 837 824 BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX);

+176 -88

drivers/gpu/drm/xe/xe_hwmon.c

··· 20 20 #include "xe_pcode_api.h" 21 21 #include "xe_sriov.h" 22 22 #include "xe_pm.h" 23 + #include "xe_vsec.h" 24 + #include "regs/xe_pmt.h" 23 25 24 26 enum xe_hwmon_reg { 25 27 REG_TEMP, ··· 53 51 FAN_MAX, 54 52 }; 55 53 54 + /* Attribute index for powerX_xxx_interval sysfs entries */ 55 + enum sensor_attr_power { 56 + SENSOR_INDEX_PSYS_PL1, 57 + SENSOR_INDEX_PKG_PL1, 58 + SENSOR_INDEX_PSYS_PL2, 59 + SENSOR_INDEX_PKG_PL2, 60 + }; 61 + 56 62 /* 57 63 * For platforms that support mailbox commands for power limits, REG_PKG_POWER_SKU_UNIT is 58 64 * not supported and below are SKU units to be used. ··· 82 72 * PL*_HWMON_ATTR - mapping of hardware power limits to corresponding hwmon power attribute. 83 73 */ 84 74 #define PL1_HWMON_ATTR hwmon_power_max 75 + #define PL2_HWMON_ATTR hwmon_power_cap 85 76 86 - #define PWR_ATTR_TO_STR(attr) (((attr) == hwmon_power_max) ? "PL1" : "Invalid") 77 + #define PWR_ATTR_TO_STR(attr) (((attr) == hwmon_power_max) ? "PL1" : "PL2") 87 78 88 79 /* 89 80 * Timeout for power limit write mailbox command. ··· 135 124 bool boot_power_limit_read; 136 125 /** @pl1_on_boot: power limit PL1 on boot */ 137 126 u32 pl1_on_boot[CHANNEL_MAX]; 127 + /** @pl2_on_boot: power limit PL2 on boot */ 128 + u32 pl2_on_boot[CHANNEL_MAX]; 129 + 138 130 }; 139 131 140 132 static int xe_hwmon_pcode_read_power_limit(const struct xe_hwmon *hwmon, u32 attr, int channel, ··· 165 151 /* return the value only if limit is enabled */ 166 152 if (attr == PL1_HWMON_ATTR) 167 153 *uval = (val0 & PWR_LIM_EN) ? val0 : 0; 154 + else if (attr == PL2_HWMON_ATTR) 155 + *uval = (val1 & PWR_LIM_EN) ? val1 : 0; 168 156 else if (attr == hwmon_power_label) 169 - *uval = (val0 & PWR_LIM_EN) ? 1 : 0; 157 + *uval = (val0 & PWR_LIM_EN) ? 1 : (val1 & PWR_LIM_EN) ? 1 : 0; 170 158 else 171 159 *uval = 0; 172 160 173 161 return ret; 174 162 } 175 163 176 - static int xe_hwmon_pcode_write_power_limit(const struct xe_hwmon *hwmon, u32 attr, u8 channel, 177 - u32 uval) 164 + static int xe_hwmon_pcode_rmw_power_limit(const struct xe_hwmon *hwmon, u32 attr, u8 channel, 165 + u32 clr, u32 set) 178 166 { 179 167 struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); 180 168 u32 val0, val1; ··· 195 179 channel, val0, val1, ret); 196 180 197 181 if (attr == PL1_HWMON_ATTR) 198 - val0 = uval; 182 + val0 = (val0 & ~clr) | set; 183 + else if (attr == PL2_HWMON_ATTR) 184 + val1 = (val1 & ~clr) | set; 199 185 else 200 186 return -EIO; 201 187 ··· 254 236 return GT_PERF_STATUS; 255 237 break; 256 238 case REG_PKG_ENERGY_STATUS: 257 - if (xe->info.platform == XE_BATTLEMAGE) { 258 - if (channel == CHANNEL_PKG) 259 - return BMG_PACKAGE_ENERGY_STATUS; 260 - else 261 - return BMG_PLATFORM_ENERGY_STATUS; 262 - } else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) { 239 + if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) { 263 240 return PVC_GT0_PLATFORM_ENERGY_STATUS; 264 241 } else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) { 265 242 return PCU_CR_PACKAGE_ENERGY_STATUS; ··· 286 273 */ 287 274 static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *value) 288 275 { 289 - u64 reg_val, min, max; 276 + u64 reg_val = 0, min, max; 290 277 struct xe_device *xe = hwmon->xe; 291 278 struct xe_reg rapl_limit, pkg_power_sku; 292 279 struct xe_mmio *mmio = xe_root_tile_mmio(xe); ··· 298 285 } else { 299 286 rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); 300 287 pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); 301 - 302 - /* 303 - * Valid check of REG_PKG_RAPL_LIMIT is already done in xe_hwmon_power_is_visible. 304 - * So not checking it again here. 305 - */ 306 - if (!xe_reg_is_valid(pkg_power_sku)) { 307 - drm_warn(&xe->drm, "pkg_power_sku invalid\n"); 308 - *value = 0; 309 - goto unlock; 310 - } 311 288 reg_val = xe_mmio_read32(mmio, rapl_limit); 312 289 } 313 290 ··· 330 327 { 331 328 struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); 332 329 int ret = 0; 333 - u32 reg_val; 330 + u32 reg_val, max; 334 331 struct xe_reg rapl_limit; 335 332 336 333 mutex_lock(&hwmon->hwmon_lock); ··· 342 339 if (hwmon->xe->info.has_mbx_power_limits) { 343 340 drm_dbg(&hwmon->xe->drm, "disabling %s on channel %d\n", 344 341 PWR_ATTR_TO_STR(attr), channel); 345 - xe_hwmon_pcode_write_power_limit(hwmon, attr, channel, 0); 342 + xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM_EN, 0); 346 343 xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &reg_val); 347 344 } else { 348 345 reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM_EN, 0); ··· 358 355 359 356 /* Computation in 64-bits to avoid overflow. Round to nearest. */ 360 357 reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER); 361 - reg_val = PWR_LIM_EN | REG_FIELD_PREP(PWR_LIM_VAL, reg_val); 362 358 363 359 /* 364 - * Clamp power limit to card-firmware default as maximum, as an additional protection to 360 + * Clamp power limit to GPU firmware default as maximum, as an additional protection to 365 361 * pcode clamp. 366 362 */ 367 363 if (hwmon->xe->info.has_mbx_power_limits) { 368 - if (reg_val > REG_FIELD_GET(PWR_LIM_VAL, hwmon->pl1_on_boot[channel])) { 369 - reg_val = REG_FIELD_GET(PWR_LIM_VAL, hwmon->pl1_on_boot[channel]); 370 - drm_dbg(&hwmon->xe->drm, "Clamping power limit to firmware default 0x%x\n", 364 + max = (attr == PL1_HWMON_ATTR) ? 365 + hwmon->pl1_on_boot[channel] : hwmon->pl2_on_boot[channel]; 366 + max = REG_FIELD_PREP(PWR_LIM_VAL, max); 367 + if (reg_val > max) { 368 + reg_val = max; 369 + drm_dbg(&hwmon->xe->drm, 370 + "Clamping power limit to GPU firmware default 0x%x\n", 371 371 reg_val); 372 372 } 373 373 } 374 374 375 + reg_val = PWR_LIM_EN | REG_FIELD_PREP(PWR_LIM_VAL, reg_val); 376 + 375 377 if (hwmon->xe->info.has_mbx_power_limits) 376 - ret = xe_hwmon_pcode_write_power_limit(hwmon, attr, channel, reg_val); 378 + ret = xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM, reg_val); 377 379 else 378 - reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM_EN | PWR_LIM_VAL, 379 - reg_val); 380 + reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM, reg_val); 380 381 unlock: 381 382 mutex_unlock(&hwmon->hwmon_lock); 382 383 return ret; ··· 435 428 { 436 429 struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); 437 430 struct xe_hwmon_energy_info *ei = &hwmon->ei[channel]; 438 - u64 reg_val; 431 + u32 reg_val; 432 + int ret = 0; 439 433 440 - reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, 441 - channel)); 434 + /* Energy is supported only for card and pkg */ 435 + if (channel > CHANNEL_PKG) { 436 + *energy = 0; 437 + return; 438 + } 442 439 443 - if (reg_val >= ei->reg_val_prev) 444 - ei->accum_energy += reg_val - ei->reg_val_prev; 445 - else 446 - ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val; 440 + if (hwmon->xe->info.platform == XE_BATTLEMAGE) { 441 + u64 pmt_val; 447 442 443 + ret = xe_pmt_telem_read(to_pci_dev(hwmon->xe->drm.dev), 444 + xe_mmio_read32(mmio, PUNIT_TELEMETRY_GUID), 445 + &pmt_val, BMG_ENERGY_STATUS_PMT_OFFSET, sizeof(pmt_val)); 446 + if (ret != sizeof(pmt_val)) { 447 + drm_warn(&hwmon->xe->drm, "energy read from pmt failed, ret %d\n", ret); 448 + *energy = 0; 449 + return; 450 + } 451 + 452 + if (channel == CHANNEL_PKG) 453 + reg_val = REG_FIELD_GET64(ENERGY_PKG, pmt_val); 454 + else 455 + reg_val = REG_FIELD_GET64(ENERGY_CARD, pmt_val); 456 + } else { 457 + reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, 458 + channel)); 459 + } 460 + 461 + ei->accum_energy += reg_val - ei->reg_val_prev; 448 462 ei->reg_val_prev = reg_val; 449 463 450 464 *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY, ··· 480 452 struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); 481 453 u32 x, y, x_w = 2; /* 2 bits */ 482 454 u64 r, tau4, out; 483 - int channel = to_sensor_dev_attr(attr)->index; 484 - u32 power_attr = PL1_HWMON_ATTR; 455 + int channel = (to_sensor_dev_attr(attr)->index % 2) ? CHANNEL_PKG : CHANNEL_CARD; 456 + u32 power_attr = (to_sensor_dev_attr(attr)->index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR; 457 + 485 458 int ret = 0; 486 459 487 460 xe_pm_runtime_get(hwmon->xe); ··· 535 506 u32 x, y, rxy, x_w = 2; /* 2 bits */ 536 507 u64 tau4, r, max_win; 537 508 unsigned long val; 509 + int channel = (to_sensor_dev_attr(attr)->index % 2) ? CHANNEL_PKG : CHANNEL_CARD; 510 + u32 power_attr = (to_sensor_dev_attr(attr)->index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR; 538 511 int ret; 539 - int channel = to_sensor_dev_attr(attr)->index; 540 - u32 power_attr = PL1_HWMON_ATTR; 541 512 542 513 ret = kstrtoul(buf, 0, &val); 543 514 if (ret) ··· 564 535 tau4 = (u64)((1 << x_w) | x) << y; 565 536 max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); 566 537 567 - if (val > max_win) { 568 - drm_warn(&hwmon->xe->drm, "power_interval invalid val 0x%lx\n", val); 538 + if (val > max_win) 569 539 return -EINVAL; 570 - } 571 540 572 541 /* val in hw units */ 573 542 val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME) + 1; ··· 590 563 591 564 mutex_lock(&hwmon->hwmon_lock); 592 565 593 - if (hwmon->xe->info.has_mbx_power_limits) { 594 - ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, (u32 *)&r); 595 - r = (r & ~PWR_LIM_TIME) | rxy; 596 - xe_hwmon_pcode_write_power_limit(hwmon, power_attr, channel, r); 597 - } else { 566 + if (hwmon->xe->info.has_mbx_power_limits) 567 + xe_hwmon_pcode_rmw_power_limit(hwmon, power_attr, channel, PWR_LIM_TIME, rxy); 568 + else 598 569 r = xe_mmio_rmw32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel), 599 570 PWR_LIM_TIME, rxy); 600 - } 601 571 602 572 mutex_unlock(&hwmon->hwmon_lock); 603 573 ··· 606 582 /* PSYS PL1 */ 607 583 static SENSOR_DEVICE_ATTR(power1_max_interval, 0664, 608 584 xe_hwmon_power_max_interval_show, 609 - xe_hwmon_power_max_interval_store, CHANNEL_CARD); 610 - 585 + xe_hwmon_power_max_interval_store, SENSOR_INDEX_PSYS_PL1); 586 + /* PKG PL1 */ 611 587 static SENSOR_DEVICE_ATTR(power2_max_interval, 0664, 612 588 xe_hwmon_power_max_interval_show, 613 - xe_hwmon_power_max_interval_store, CHANNEL_PKG); 589 + xe_hwmon_power_max_interval_store, SENSOR_INDEX_PKG_PL1); 590 + /* PSYS PL2 */ 591 + static SENSOR_DEVICE_ATTR(power1_cap_interval, 0664, 592 + xe_hwmon_power_max_interval_show, 593 + xe_hwmon_power_max_interval_store, SENSOR_INDEX_PSYS_PL2); 594 + /* PKG PL2 */ 595 + static SENSOR_DEVICE_ATTR(power2_cap_interval, 0664, 596 + xe_hwmon_power_max_interval_show, 597 + xe_hwmon_power_max_interval_store, SENSOR_INDEX_PKG_PL2); 614 598 615 599 static struct attribute *hwmon_attributes[] = { 616 600 &sensor_dev_attr_power1_max_interval.dev_attr.attr, 617 601 &sensor_dev_attr_power2_max_interval.dev_attr.attr, 602 + &sensor_dev_attr_power1_cap_interval.dev_attr.attr, 603 + &sensor_dev_attr_power2_cap_interval.dev_attr.attr, 618 604 NULL 619 605 }; 620 606 ··· 634 600 struct device *dev = kobj_to_dev(kobj); 635 601 struct xe_hwmon *hwmon = dev_get_drvdata(dev); 636 602 int ret = 0; 637 - int channel = index ? CHANNEL_PKG : CHANNEL_CARD; 638 - u32 power_attr = PL1_HWMON_ATTR; 639 - u32 uval; 603 + int channel = (index % 2) ? CHANNEL_PKG : CHANNEL_CARD; 604 + u32 power_attr = (index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR; 605 + u32 uval = 0; 606 + struct xe_reg rapl_limit; 607 + struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); 640 608 641 609 xe_pm_runtime_get(hwmon->xe); 642 610 643 611 if (hwmon->xe->info.has_mbx_power_limits) { 644 612 xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, &uval); 645 - ret = (uval & PWR_LIM_EN) ? attr->mode : 0; 646 - } else { 647 - ret = xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, 648 - channel)) ? attr->mode : 0; 613 + } else if (power_attr != PL2_HWMON_ATTR) { 614 + rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); 615 + if (xe_reg_is_valid(rapl_limit)) 616 + uval = xe_mmio_read32(mmio, rapl_limit); 649 617 } 618 + ret = (uval & PWR_LIM_EN) ? attr->mode : 0; 650 619 651 620 xe_pm_runtime_put(hwmon->xe); 652 621 ··· 669 632 static const struct hwmon_channel_info * const hwmon_info[] = { 670 633 HWMON_CHANNEL_INFO(temp, HWMON_T_LABEL, HWMON_T_INPUT | HWMON_T_LABEL, 671 634 HWMON_T_INPUT | HWMON_T_LABEL), 672 - HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CRIT, 673 - HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL), 635 + HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CRIT | 636 + HWMON_P_CAP, 637 + HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CAP), 674 638 HWMON_CHANNEL_INFO(curr, HWMON_C_LABEL, HWMON_C_CRIT | HWMON_C_LABEL), 675 639 HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL), 676 640 HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT | HWMON_E_LABEL, HWMON_E_INPUT | HWMON_E_LABEL), ··· 792 754 static umode_t 793 755 xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) 794 756 { 795 - u32 uval; 757 + u32 uval = 0; 758 + struct xe_reg reg; 759 + struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); 796 760 797 761 switch (attr) { 798 762 case hwmon_power_max: 763 + case hwmon_power_cap: 799 764 if (hwmon->xe->info.has_mbx_power_limits) { 800 765 xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &uval); 801 - return (uval) ? 0664 : 0; 802 - } else { 803 - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, 804 - channel)) ? 0664 : 0; 766 + } else if (attr != PL2_HWMON_ATTR) { 767 + reg = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); 768 + if (xe_reg_is_valid(reg)) 769 + uval = xe_mmio_read32(mmio, reg); 805 770 } 771 + if (uval & PWR_LIM_EN) { 772 + drm_info(&hwmon->xe->drm, "%s is supported on channel %d\n", 773 + PWR_ATTR_TO_STR(attr), channel); 774 + return 0664; 775 + } 776 + drm_dbg(&hwmon->xe->drm, "%s is unsupported on channel %d\n", 777 + PWR_ATTR_TO_STR(attr), channel); 778 + return 0; 806 779 case hwmon_power_rated_max: 807 - if (hwmon->xe->info.has_mbx_power_limits) 780 + if (hwmon->xe->info.has_mbx_power_limits) { 808 781 return 0; 809 - else 810 - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, 811 - channel)) ? 0444 : 0; 782 + } else { 783 + reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); 784 + if (xe_reg_is_valid(reg)) 785 + uval = xe_mmio_read32(mmio, reg); 786 + return uval ? 0444 : 0; 787 + } 812 788 case hwmon_power_crit: 813 - case hwmon_power_label: 814 789 if (channel == CHANNEL_CARD) { 815 790 xe_hwmon_pcode_read_i1(hwmon, &uval); 816 - return (uval & POWER_SETUP_I1_WATTS) ? (attr == hwmon_power_label) ? 817 - 0444 : 0644 : 0; 791 + return (uval & POWER_SETUP_I1_WATTS) ? 0644 : 0; 818 792 } 819 793 break; 794 + case hwmon_power_label: 795 + if (hwmon->xe->info.has_mbx_power_limits) { 796 + xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &uval); 797 + } else { 798 + reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); 799 + if (xe_reg_is_valid(reg)) 800 + uval = xe_mmio_read32(mmio, reg); 801 + 802 + if (!uval) { 803 + reg = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); 804 + if (xe_reg_is_valid(reg)) 805 + uval = xe_mmio_read32(mmio, reg); 806 + } 807 + } 808 + if ((!(uval & PWR_LIM_EN)) && channel == CHANNEL_CARD) { 809 + xe_hwmon_pcode_read_i1(hwmon, &uval); 810 + return (uval & POWER_SETUP_I1_WATTS) ? 0444 : 0; 811 + } 812 + return (uval) ? 0444 : 0; 820 813 default: 821 814 return 0; 822 815 } ··· 859 790 { 860 791 switch (attr) { 861 792 case hwmon_power_max: 793 + case hwmon_power_cap: 862 794 xe_hwmon_power_max_read(hwmon, attr, channel, val); 863 795 return 0; 864 796 case hwmon_power_rated_max: ··· 876 806 xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val) 877 807 { 878 808 switch (attr) { 809 + case hwmon_power_cap: 879 810 case hwmon_power_max: 880 811 return xe_hwmon_power_max_write(hwmon, attr, channel, val); 881 812 case hwmon_power_crit: ··· 959 888 static umode_t 960 889 xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) 961 890 { 891 + long energy = 0; 892 + 962 893 switch (attr) { 963 894 case hwmon_energy_input: 964 895 case hwmon_energy_label: 965 - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, 966 - channel)) ? 0444 : 0; 896 + if (hwmon->xe->info.platform == XE_BATTLEMAGE) { 897 + xe_hwmon_energy_get(hwmon, channel, &energy); 898 + return energy ? 0444 : 0; 899 + } else { 900 + return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, 901 + channel)) ? 0444 : 0; 902 + } 967 903 default: 968 904 return 0; 969 905 } ··· 1206 1128 struct xe_reg pkg_power_sku_unit; 1207 1129 1208 1130 if (hwmon->xe->info.has_mbx_power_limits) { 1209 - /* Check if card firmware support mailbox power limits commands. */ 1131 + /* Check if GPU firmware support mailbox power limits commands. */ 1210 1132 if (xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, CHANNEL_CARD, 1211 1133 &hwmon->pl1_on_boot[CHANNEL_CARD]) | 1212 1134 xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, CHANNEL_PKG, 1213 - &hwmon->pl1_on_boot[CHANNEL_PKG])) { 1135 + &hwmon->pl1_on_boot[CHANNEL_PKG]) | 1136 + xe_hwmon_pcode_read_power_limit(hwmon, PL2_HWMON_ATTR, CHANNEL_CARD, 1137 + &hwmon->pl2_on_boot[CHANNEL_CARD]) | 1138 + xe_hwmon_pcode_read_power_limit(hwmon, PL2_HWMON_ATTR, CHANNEL_PKG, 1139 + &hwmon->pl2_on_boot[CHANNEL_PKG])) { 1214 1140 drm_warn(&hwmon->xe->drm, 1215 - "Failed to read power limits, check card firmware !\n"); 1141 + "Failed to read power limits, check GPU firmware !\n"); 1216 1142 } else { 1217 1143 drm_info(&hwmon->xe->drm, "Using mailbox commands for power limits\n"); 1218 1144 /* Write default limits to read from pcode from now on. */ 1219 - xe_hwmon_pcode_write_power_limit(hwmon, PL1_HWMON_ATTR, 1220 - CHANNEL_CARD, 1221 - hwmon->pl1_on_boot[CHANNEL_CARD]); 1222 - xe_hwmon_pcode_write_power_limit(hwmon, PL1_HWMON_ATTR, 1223 - CHANNEL_PKG, 1224 - hwmon->pl1_on_boot[CHANNEL_PKG]); 1145 + xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR, 1146 + CHANNEL_CARD, PWR_LIM | PWR_LIM_TIME, 1147 + hwmon->pl1_on_boot[CHANNEL_CARD]); 1148 + xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR, 1149 + CHANNEL_PKG, PWR_LIM | PWR_LIM_TIME, 1150 + hwmon->pl1_on_boot[CHANNEL_PKG]); 1151 + xe_hwmon_pcode_rmw_power_limit(hwmon, PL2_HWMON_ATTR, 1152 + CHANNEL_CARD, PWR_LIM | PWR_LIM_TIME, 1153 + hwmon->pl2_on_boot[CHANNEL_CARD]); 1154 + xe_hwmon_pcode_rmw_power_limit(hwmon, PL2_HWMON_ATTR, 1155 + CHANNEL_PKG, PWR_LIM | PWR_LIM_TIME, 1156 + hwmon->pl2_on_boot[CHANNEL_PKG]); 1225 1157 hwmon->scl_shift_power = PWR_UNIT; 1226 1158 hwmon->scl_shift_energy = ENERGY_UNIT; 1227 1159 hwmon->scl_shift_time = TIME_UNIT; ··· 1315 1227 1316 1228 return 0; 1317 1229 } 1318 - 1230 + MODULE_IMPORT_NS("INTEL_PMT_TELEMETRY");

+71 -30

drivers/gpu/drm/xe/xe_lrc.c

··· 40 40 41 41 #define LRC_PPHWSP_SIZE SZ_4K 42 42 #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K 43 + #define LRC_WA_BB_SIZE SZ_4K 43 44 44 45 static struct xe_device * 45 46 lrc_to_xe(struct xe_lrc *lrc) ··· 655 654 #define LRC_SEQNO_PPHWSP_OFFSET 512 656 655 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) 657 656 #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) 657 + #define LRC_ENGINE_ID_PPHWSP_OFFSET 1024 658 658 #define LRC_PARALLEL_PPHWSP_OFFSET 2048 659 - #define LRC_ENGINE_ID_PPHWSP_OFFSET 2096 660 659 661 660 u32 xe_lrc_regs_offset(struct xe_lrc *lrc) 662 661 { ··· 911 910 { 912 911 xe_hw_fence_ctx_finish(&lrc->fence_ctx); 913 912 xe_bo_unpin_map_no_vm(lrc->bo); 914 - xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo); 915 913 } 916 914 917 915 /* 918 - * xe_lrc_setup_utilization() - Setup wa bb to assist in calculating active 919 - * context run ticks. 920 - * @lrc: Pointer to the lrc. 916 + * wa_bb_setup_utilization() - Write commands to wa bb to assist 917 + * in calculating active context run ticks. 921 918 * 922 919 * Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the 923 920 * context, but only gets updated when the context switches out. In order to ··· 940 941 * store it in the PPHSWP. 941 942 */ 942 943 #define CONTEXT_ACTIVE 1ULL 943 - static int xe_lrc_setup_utilization(struct xe_lrc *lrc) 944 + static ssize_t wa_bb_setup_utilization(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 945 + u32 *batch, size_t max_len) 944 946 { 945 - u32 *cmd, *buf = NULL; 947 + u32 *cmd = batch; 946 948 947 - if (lrc->bb_per_ctx_bo->vmap.is_iomem) { 948 - buf = kmalloc(lrc->bb_per_ctx_bo->size, GFP_KERNEL); 949 - if (!buf) 950 - return -ENOMEM; 951 - cmd = buf; 952 - } else { 953 - cmd = lrc->bb_per_ctx_bo->vmap.vaddr; 954 - } 949 + if (xe_gt_WARN_ON(lrc->gt, max_len < 12)) 950 + return -ENOSPC; 955 951 956 952 *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; 957 953 *cmd++ = ENGINE_ID(0).addr; ··· 965 971 *cmd++ = upper_32_bits(CONTEXT_ACTIVE); 966 972 } 967 973 974 + return cmd - batch; 975 + } 976 + 977 + struct wa_bb_setup { 978 + ssize_t (*setup)(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 979 + u32 *batch, size_t max_size); 980 + }; 981 + 982 + static size_t wa_bb_offset(struct xe_lrc *lrc) 983 + { 984 + return lrc->bo->size - LRC_WA_BB_SIZE; 985 + } 986 + 987 + static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) 988 + { 989 + const size_t max_size = LRC_WA_BB_SIZE; 990 + static const struct wa_bb_setup funcs[] = { 991 + { .setup = wa_bb_setup_utilization }, 992 + }; 993 + ssize_t remain; 994 + u32 *cmd, *buf = NULL; 995 + 996 + if (lrc->bo->vmap.is_iomem) { 997 + buf = kmalloc(max_size, GFP_KERNEL); 998 + if (!buf) 999 + return -ENOMEM; 1000 + cmd = buf; 1001 + } else { 1002 + cmd = lrc->bo->vmap.vaddr + wa_bb_offset(lrc); 1003 + } 1004 + 1005 + remain = max_size / sizeof(*cmd); 1006 + 1007 + for (size_t i = 0; i < ARRAY_SIZE(funcs); i++) { 1008 + ssize_t len = funcs[i].setup(lrc, hwe, cmd, remain); 1009 + 1010 + remain -= len; 1011 + 1012 + /* 1013 + * There should always be at least 1 additional dword for 1014 + * the end marker 1015 + */ 1016 + if (len < 0 || xe_gt_WARN_ON(lrc->gt, remain < 1)) 1017 + goto fail; 1018 + 1019 + cmd += len; 1020 + } 1021 + 968 1022 *cmd++ = MI_BATCH_BUFFER_END; 969 1023 970 1024 if (buf) { 971 - xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bb_per_ctx_bo->vmap, 0, 972 - buf, (cmd - buf) * sizeof(*cmd)); 1025 + xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap, 1026 + wa_bb_offset(lrc), buf, 1027 + (cmd - buf) * sizeof(*cmd)); 973 1028 kfree(buf); 974 1029 } 975 1030 976 - xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, 977 - xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1); 1031 + xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bo) + 1032 + wa_bb_offset(lrc) + 1); 978 1033 979 1034 return 0; 1035 + 1036 + fail: 1037 + kfree(buf); 1038 + return -ENOSPC; 980 1039 } 981 1040 982 1041 #define PVC_CTX_ASID (0x2e + 1) ··· 1065 1018 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address 1066 1019 * via VM bind calls. 1067 1020 */ 1068 - lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, lrc_size, 1021 + lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, 1022 + lrc_size + LRC_WA_BB_SIZE, 1069 1023 ttm_bo_type_kernel, 1070 1024 bo_flags); 1071 1025 if (IS_ERR(lrc->bo)) 1072 1026 return PTR_ERR(lrc->bo); 1073 - 1074 - lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, 1075 - ttm_bo_type_kernel, 1076 - bo_flags); 1077 - if (IS_ERR(lrc->bb_per_ctx_bo)) { 1078 - err = PTR_ERR(lrc->bb_per_ctx_bo); 1079 - goto err_lrc_finish; 1080 - } 1081 1027 1082 1028 lrc->size = lrc_size; 1083 1029 lrc->ring.size = ring_size; ··· 1179 1139 map = __xe_lrc_start_seqno_map(lrc); 1180 1140 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 1181 1141 1182 - err = xe_lrc_setup_utilization(lrc); 1142 + err = setup_wa_bb(lrc, hwe); 1183 1143 if (err) 1184 1144 goto err_lrc_finish; 1185 1145 ··· 1859 1819 snapshot->seqno = xe_lrc_seqno(lrc); 1860 1820 snapshot->lrc_bo = xe_bo_get(lrc->bo); 1861 1821 snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); 1862 - snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; 1822 + snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset - 1823 + LRC_WA_BB_SIZE; 1863 1824 snapshot->lrc_snapshot = NULL; 1864 1825 snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc)); 1865 1826 snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);

-3

drivers/gpu/drm/xe/xe_lrc_types.h

··· 53 53 54 54 /** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */ 55 55 u64 ctx_timestamp; 56 - 57 - /** @bb_per_ctx_bo: buffer object for per context batch wa buffer */ 58 - struct xe_bo *bb_per_ctx_bo; 59 56 }; 60 57 61 58 struct xe_lrc_snapshot;

+18

drivers/gpu/drm/xe/xe_map.h

··· 78 78 iosys_map_wr(map__, offset__, type__, val__); \ 79 79 }) 80 80 81 + #define xe_map_rd_array(xe__, map__, index__, type__) \ 82 + xe_map_rd(xe__, map__, (index__) * sizeof(type__), type__) 83 + 84 + #define xe_map_wr_array(xe__, map__, index__, type__, val__) \ 85 + xe_map_wr(xe__, map__, (index__) * sizeof(type__), type__, val__) 86 + 87 + #define xe_map_rd_array_u32(xe__, map__, index__) \ 88 + xe_map_rd_array(xe__, map__, index__, u32) 89 + 90 + #define xe_map_wr_array_u32(xe__, map__, index__, val__) \ 91 + xe_map_wr_array(xe__, map__, index__, u32, val__) 92 + 93 + #define xe_map_rd_ring_u32(xe__, map__, index__, size__) \ 94 + xe_map_rd_array_u32(xe__, map__, (index__) % (size__)) 95 + 96 + #define xe_map_wr_ring_u32(xe__, map__, index__, size__, val__) \ 97 + xe_map_wr_array_u32(xe__, map__, (index__) % (size__), val__) 98 + 81 99 #define xe_map_rd_field(xe__, map__, struct_offset__, struct_type__, field__) ({ \ 82 100 struct xe_device *__xe = xe__; \ 83 101 xe_device_assert_mem_access(__xe); \

+9 -2

drivers/gpu/drm/xe/xe_module.c

··· 18 18 #include "xe_observation.h" 19 19 #include "xe_sched_job.h" 20 20 21 + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) 22 + #define DEFAULT_GUC_LOG_LEVEL 3 23 + #else 24 + #define DEFAULT_GUC_LOG_LEVEL 1 25 + #endif 26 + 21 27 struct xe_modparam xe_modparam = { 22 28 .probe_display = true, 23 - .guc_log_level = 3, 29 + .guc_log_level = DEFAULT_GUC_LOG_LEVEL, 24 30 .force_probe = CONFIG_DRM_XE_FORCE_PROBE, 25 31 .wedged_mode = 1, 26 32 .svm_notifier_size = 512, ··· 46 40 MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size (in MiB) - <0=disable-resize, 0=max-needed-size[default], >0=force-size"); 47 41 48 42 module_param_named(guc_log_level, xe_modparam.guc_log_level, int, 0600); 49 - MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1..5=enable with verbosity min..max)"); 43 + MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1=normal, 2..5=verbose-levels " 44 + "[default=" __stringify(DEFAULT_GUC_LOG_LEVEL) "])"); 50 45 51 46 module_param_named_unsafe(guc_firmware_path, xe_modparam.guc_firmware_path, charp, 0400); 52 47 MODULE_PARM_DESC(guc_firmware_path,

+159 -53

drivers/gpu/drm/xe/xe_oa.c

··· 43 43 #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) 44 44 #define XE_OA_UNIT_INVALID U32_MAX 45 45 46 + enum xe_oam_unit_type { 47 + XE_OAM_UNIT_SAG, 48 + XE_OAM_UNIT_SCMI_0, 49 + XE_OAM_UNIT_SCMI_1, 50 + }; 51 + 46 52 enum xe_oa_submit_deps { 47 53 XE_OA_SUBMIT_NO_DEPS, 48 54 XE_OA_SUBMIT_ADD_DEPS, ··· 83 77 84 78 struct xe_oa_open_param { 85 79 struct xe_file *xef; 86 - u32 oa_unit_id; 80 + struct xe_oa_unit *oa_unit; 87 81 bool sample; 88 82 u32 metric_set; 89 83 enum xe_oa_format_name oa_format; ··· 200 194 201 195 static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream) 202 196 { 203 - return &stream->hwe->oa_unit->regs; 197 + return &stream->oa_unit->regs; 204 198 } 205 199 206 200 static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream) ··· 460 454 461 455 static u32 __oactrl_used_bits(struct xe_oa_stream *stream) 462 456 { 463 - return stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG ? 457 + return stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG ? 464 458 OAG_OACONTROL_USED_BITS : OAM_OACONTROL_USED_BITS; 465 459 } 466 460 ··· 481 475 __oa_ccs_select(stream) | OAG_OACONTROL_OA_COUNTER_ENABLE; 482 476 483 477 if (GRAPHICS_VER(stream->oa->xe) >= 20 && 484 - stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) 478 + stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) 485 479 val |= OAG_OACONTROL_OA_PES_DISAG_EN; 486 480 487 481 xe_mmio_rmw32(&stream->gt->mmio, regs->oa_ctrl, __oactrl_used_bits(stream), val); ··· 844 838 845 839 /* Reset PMON Enable to save power. */ 846 840 xe_mmio_rmw32(mmio, XELPMP_SQCNT1, sqcnt1, 0); 841 + 842 + if ((stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM || 843 + stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM_SAG) && 844 + GRAPHICS_VER(stream->oa->xe) >= 30) 845 + xe_mmio_rmw32(mmio, OAM_COMPRESSION_T3_CONTROL, OAM_LAT_MEASURE_ENABLE, 0); 847 846 } 848 847 849 848 static void xe_oa_stream_destroy(struct xe_oa_stream *stream) 850 849 { 851 - struct xe_oa_unit *u = stream->hwe->oa_unit; 850 + struct xe_oa_unit *u = stream->oa_unit; 852 851 struct xe_gt *gt = stream->hwe->gt; 853 852 854 853 if (WARN_ON(stream != u->exclusive_stream)) ··· 1116 1105 */ 1117 1106 sqcnt1 = SQCNT1_PMON_ENABLE | 1118 1107 (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); 1119 - 1120 1108 xe_mmio_rmw32(mmio, XELPMP_SQCNT1, 0, sqcnt1); 1109 + 1110 + if ((stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM || 1111 + stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM_SAG) && 1112 + GRAPHICS_VER(stream->oa->xe) >= 30) 1113 + xe_mmio_rmw32(mmio, OAM_COMPRESSION_T3_CONTROL, 0, OAM_LAT_MEASURE_ENABLE); 1121 1114 1122 1115 /* Configure OAR/OAC */ 1123 1116 if (stream->exec_q) { ··· 1154 1139 return -EINVAL; 1155 1140 } 1156 1141 1142 + static struct xe_oa_unit *xe_oa_lookup_oa_unit(struct xe_oa *oa, u32 oa_unit_id) 1143 + { 1144 + struct xe_gt *gt; 1145 + int gt_id, i; 1146 + 1147 + for_each_gt(gt, oa->xe, gt_id) { 1148 + for (i = 0; i < gt->oa.num_oa_units; i++) { 1149 + struct xe_oa_unit *u = &gt->oa.oa_unit[i]; 1150 + 1151 + if (u->oa_unit_id == oa_unit_id) 1152 + return u; 1153 + } 1154 + } 1155 + 1156 + return NULL; 1157 + } 1158 + 1157 1159 static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, 1158 1160 struct xe_oa_open_param *param) 1159 1161 { 1160 - if (value >= oa->oa_unit_ids) { 1162 + param->oa_unit = xe_oa_lookup_oa_unit(oa, value); 1163 + if (!param->oa_unit) { 1161 1164 drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value); 1162 1165 return -EINVAL; 1163 1166 } 1164 - param->oa_unit_id = value; 1165 1167 return 0; 1166 1168 } 1167 1169 ··· 1709 1677 static int xe_oa_stream_init(struct xe_oa_stream *stream, 1710 1678 struct xe_oa_open_param *param) 1711 1679 { 1712 - struct xe_oa_unit *u = param->hwe->oa_unit; 1713 1680 struct xe_gt *gt = param->hwe->gt; 1714 1681 unsigned int fw_ref; 1715 1682 int ret; 1716 1683 1717 1684 stream->exec_q = param->exec_q; 1718 1685 stream->poll_period_ns = DEFAULT_POLL_PERIOD_NS; 1686 + stream->oa_unit = param->oa_unit; 1719 1687 stream->hwe = param->hwe; 1720 1688 stream->gt = stream->hwe->gt; 1721 1689 stream->oa_buffer.format = &stream->oa->oa_formats[param->oa_format]; ··· 1736 1704 * buffer whose size, circ_size, is a multiple of the report size 1737 1705 */ 1738 1706 if (GRAPHICS_VER(stream->oa->xe) >= 20 && 1739 - stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) 1707 + stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) 1740 1708 stream->oa_buffer.circ_size = 1741 1709 param->oa_buffer_size - 1742 1710 param->oa_buffer_size % stream->oa_buffer.format->size; ··· 1794 1762 drm_dbg(&stream->oa->xe->drm, "opening stream oa config uuid=%s\n", 1795 1763 stream->oa_config->uuid); 1796 1764 1797 - WRITE_ONCE(u->exclusive_stream, stream); 1765 + WRITE_ONCE(stream->oa_unit->exclusive_stream, stream); 1798 1766 1799 1767 hrtimer_setup(&stream->poll_check_timer, xe_oa_poll_check_timer_cb, CLOCK_MONOTONIC, 1800 1768 HRTIMER_MODE_REL); ··· 1830 1798 int ret; 1831 1799 1832 1800 /* We currently only allow exclusive access */ 1833 - if (param->hwe->oa_unit->exclusive_stream) { 1801 + if (param->oa_unit->exclusive_stream) { 1834 1802 drm_dbg(&oa->xe->drm, "OA unit already in use\n"); 1835 1803 ret = -EBUSY; 1836 1804 goto exit; ··· 1906 1874 return div_u64(nom + den - 1, den); 1907 1875 } 1908 1876 1909 - static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type) 1877 + static bool oa_unit_supports_oa_format(struct xe_oa_open_param *param, int type) 1910 1878 { 1911 - switch (hwe->oa_unit->type) { 1879 + switch (param->oa_unit->type) { 1912 1880 case DRM_XE_OA_UNIT_TYPE_OAG: 1913 1881 return type == DRM_XE_OA_FMT_TYPE_OAG || type == DRM_XE_OA_FMT_TYPE_OAR || 1914 1882 type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC; 1915 1883 case DRM_XE_OA_UNIT_TYPE_OAM: 1884 + case DRM_XE_OA_UNIT_TYPE_OAM_SAG: 1916 1885 return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC; 1917 1886 default: 1918 1887 return false; ··· 1932 1899 hwe->oa_unit->oa_unit_id : U16_MAX; 1933 1900 } 1934 1901 1902 + /* A hwe must be assigned to stream/oa_unit for batch submissions */ 1935 1903 static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param) 1936 1904 { 1937 - struct xe_gt *gt; 1938 - int i, ret = 0; 1905 + struct xe_hw_engine *hwe; 1906 + enum xe_hw_engine_id id; 1907 + int ret = 0; 1939 1908 1909 + /* If not provided, OA unit defaults to OA unit 0 as per uapi */ 1910 + if (!param->oa_unit) 1911 + param->oa_unit = &xe_device_get_gt(oa->xe, 0)->oa.oa_unit[0]; 1912 + 1913 + /* When we have an exec_q, get hwe from the exec_q */ 1940 1914 if (param->exec_q) { 1941 - /* When we have an exec_q, get hwe from the exec_q */ 1942 1915 param->hwe = xe_gt_hw_engine(param->exec_q->gt, param->exec_q->class, 1943 1916 param->engine_instance, true); 1944 - } else { 1945 - struct xe_hw_engine *hwe; 1946 - enum xe_hw_engine_id id; 1917 + if (!param->hwe || param->hwe->oa_unit != param->oa_unit) 1918 + goto err; 1919 + goto out; 1920 + } 1947 1921 1948 - /* Else just get the first hwe attached to the oa unit */ 1949 - for_each_gt(gt, oa->xe, i) { 1950 - for_each_hw_engine(hwe, gt, id) { 1951 - if (xe_oa_unit_id(hwe) == param->oa_unit_id) { 1952 - param->hwe = hwe; 1953 - goto out; 1954 - } 1955 - } 1922 + /* Else just get the first hwe attached to the oa unit */ 1923 + for_each_hw_engine(hwe, param->oa_unit->gt, id) { 1924 + if (hwe->oa_unit == param->oa_unit) { 1925 + param->hwe = hwe; 1926 + goto out; 1956 1927 } 1957 1928 } 1958 - out: 1959 - if (!param->hwe || xe_oa_unit_id(param->hwe) != param->oa_unit_id) { 1960 - drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n", 1961 - param->exec_q ? param->exec_q->class : -1, 1962 - param->engine_instance, param->oa_unit_id); 1963 - ret = -EINVAL; 1964 - } 1965 1929 1930 + /* If we still didn't find a hwe, just get one with a valid oa_unit from the same gt */ 1931 + for_each_hw_engine(hwe, param->oa_unit->gt, id) { 1932 + if (!hwe->oa_unit) 1933 + continue; 1934 + 1935 + param->hwe = hwe; 1936 + goto out; 1937 + } 1938 + err: 1939 + drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n", 1940 + param->exec_q ? param->exec_q->class : -1, 1941 + param->engine_instance, param->oa_unit->oa_unit_id); 1942 + ret = -EINVAL; 1943 + out: 1966 1944 return ret; 1967 1945 } 1968 1946 ··· 2051 2007 2052 2008 f = &oa->oa_formats[param.oa_format]; 2053 2009 if (!param.oa_format || !f->size || 2054 - !engine_supports_oa_format(param.hwe, f->type)) { 2010 + !oa_unit_supports_oa_format(&param, f->type)) { 2055 2011 drm_dbg(&oa->xe->drm, "Invalid OA format %d type %d size %d for class %d\n", 2056 2012 param.oa_format, f->type, f->size, param.hwe->class); 2057 2013 ret = -EINVAL; ··· 2199 2155 static const struct xe_mmio_range xe2_oa_mux_regs[] = { 2200 2156 { .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */ 2201 2157 { .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */ 2158 + { .start = 0xB01C, .end = 0xB01C }, /* LNCF_MISC_CONFIG_REGISTER0 */ 2202 2159 { .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */ 2203 2160 { .start = 0xD0E0, .end = 0xD0F4 }, /* VISACTL */ 2204 2161 { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ ··· 2493 2448 2494 2449 static u32 num_oa_units_per_gt(struct xe_gt *gt) 2495 2450 { 2496 - return 1; 2451 + if (!xe_gt_is_media_type(gt) || GRAPHICS_VER(gt_to_xe(gt)) < 20) 2452 + return 1; 2453 + else if (!IS_DGFX(gt_to_xe(gt))) 2454 + return XE_OAM_UNIT_SCMI_0 + 1; /* SAG + SCMI_0 */ 2455 + else 2456 + return XE_OAM_UNIT_SCMI_1 + 1; /* SAG + SCMI_0 + SCMI_1 */ 2497 2457 } 2498 2458 2499 2459 static u32 __hwe_oam_unit(struct xe_hw_engine *hwe) 2500 2460 { 2501 - if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) >= 1270) { 2502 - /* 2503 - * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices 2504 - * within the gt use the same OAM. All MTL/LNL SKUs list 1 SA MEDIA 2505 - */ 2506 - xe_gt_WARN_ON(hwe->gt, hwe->gt->info.type != XE_GT_TYPE_MEDIA); 2461 + if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) < 1270) 2462 + return XE_OA_UNIT_INVALID; 2507 2463 2464 + xe_gt_WARN_ON(hwe->gt, !xe_gt_is_media_type(hwe->gt)); 2465 + 2466 + if (GRAPHICS_VER(gt_to_xe(hwe->gt)) < 20) 2508 2467 return 0; 2509 - } 2468 + /* 2469 + * XE_OAM_UNIT_SAG has only GSCCS attached to it, but only on some platforms. Also 2470 + * GSCCS cannot be used to submit batches to program the OAM unit. Therefore we don't 2471 + * assign an OA unit to GSCCS. This means that XE_OAM_UNIT_SAG is exposed as an OA 2472 + * unit without attached engines. Fused off engines can also result in oa_unit's with 2473 + * num_engines == 0. OA streams can be opened on all OA units. 2474 + */ 2475 + else if (hwe->engine_id == XE_HW_ENGINE_GSCCS0) 2476 + return XE_OA_UNIT_INVALID; 2477 + else if (!IS_DGFX(gt_to_xe(hwe->gt))) 2478 + return XE_OAM_UNIT_SCMI_0; 2479 + else if (hwe->class == XE_ENGINE_CLASS_VIDEO_DECODE) 2480 + return (hwe->instance / 2 & 0x1) + 1; 2481 + else if (hwe->class == XE_ENGINE_CLASS_VIDEO_ENHANCE) 2482 + return (hwe->instance & 0x1) + 1; 2510 2483 2511 2484 return XE_OA_UNIT_INVALID; 2512 2485 } ··· 2538 2475 2539 2476 case XE_ENGINE_CLASS_VIDEO_DECODE: 2540 2477 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 2478 + case XE_ENGINE_CLASS_OTHER: 2541 2479 return __hwe_oam_unit(hwe); 2542 2480 2543 2481 default: ··· 2578 2514 2579 2515 static void __xe_oa_init_oa_units(struct xe_gt *gt) 2580 2516 { 2581 - const u32 mtl_oa_base[] = { 0x13000 }; 2517 + /* Actual address is MEDIA_GT_GSI_OFFSET + oam_base_addr[i] */ 2518 + const u32 oam_base_addr[] = { 2519 + [XE_OAM_UNIT_SAG] = 0x13000, 2520 + [XE_OAM_UNIT_SCMI_0] = 0x14000, 2521 + [XE_OAM_UNIT_SCMI_1] = 0x14800, 2522 + }; 2582 2523 int i, num_units = gt->oa.num_oa_units; 2583 2524 2584 2525 for (i = 0; i < num_units; i++) { 2585 2526 struct xe_oa_unit *u = &gt->oa.oa_unit[i]; 2586 2527 2587 - if (gt->info.type != XE_GT_TYPE_MEDIA) { 2528 + if (!xe_gt_is_media_type(gt)) { 2588 2529 u->regs = __oag_regs(); 2589 2530 u->type = DRM_XE_OA_UNIT_TYPE_OAG; 2590 - } else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { 2591 - u->regs = __oam_regs(mtl_oa_base[i]); 2592 - u->type = DRM_XE_OA_UNIT_TYPE_OAM; 2531 + } else { 2532 + xe_gt_assert(gt, GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270); 2533 + u->regs = __oam_regs(oam_base_addr[i]); 2534 + u->type = i == XE_OAM_UNIT_SAG && GRAPHICS_VER(gt_to_xe(gt)) >= 20 ? 2535 + DRM_XE_OA_UNIT_TYPE_OAM_SAG : DRM_XE_OA_UNIT_TYPE_OAM; 2593 2536 } 2537 + 2538 + u->gt = gt; 2594 2539 2595 2540 xe_mmio_write32(&gt->mmio, u->regs.oa_ctrl, 0); 2596 2541 ··· 2633 2560 } 2634 2561 } 2635 2562 2636 - /* 2637 - * Fused off engines can result in oa_unit's with num_engines == 0. These units 2638 - * will appear in OA unit query, but no OA streams can be opened on them. 2639 - */ 2640 2563 gt->oa.num_oa_units = num_oa_units; 2641 2564 gt->oa.oa_unit = u; 2642 2565 ··· 2643 2574 return 0; 2644 2575 } 2645 2576 2577 + static void xe_oa_print_gt_oa_units(struct xe_gt *gt) 2578 + { 2579 + enum xe_hw_engine_id hwe_id; 2580 + struct xe_hw_engine *hwe; 2581 + struct xe_oa_unit *u; 2582 + char buf[256]; 2583 + int i, n; 2584 + 2585 + for (i = 0; i < gt->oa.num_oa_units; i++) { 2586 + u = &gt->oa.oa_unit[i]; 2587 + buf[0] = '\0'; 2588 + n = 0; 2589 + 2590 + for_each_hw_engine(hwe, gt, hwe_id) 2591 + if (xe_oa_unit_id(hwe) == u->oa_unit_id) 2592 + n += scnprintf(buf + n, sizeof(buf) - n, "%s ", hwe->name); 2593 + 2594 + xe_gt_dbg(gt, "oa_unit %d, type %d, Engines: %s\n", u->oa_unit_id, u->type, buf); 2595 + } 2596 + } 2597 + 2598 + static void xe_oa_print_oa_units(struct xe_oa *oa) 2599 + { 2600 + struct xe_gt *gt; 2601 + int gt_id; 2602 + 2603 + for_each_gt(gt, oa->xe, gt_id) 2604 + xe_oa_print_gt_oa_units(gt); 2605 + } 2606 + 2646 2607 static int xe_oa_init_oa_units(struct xe_oa *oa) 2647 2608 { 2648 2609 struct xe_gt *gt; 2649 2610 int i, ret; 2611 + 2612 + /* Needed for OAM implementation here */ 2613 + BUILD_BUG_ON(XE_OAM_UNIT_SAG != 0); 2614 + BUILD_BUG_ON(XE_OAM_UNIT_SCMI_0 != 1); 2615 + BUILD_BUG_ON(XE_OAM_UNIT_SCMI_1 != 2); 2650 2616 2651 2617 for_each_gt(gt, oa->xe, i) { 2652 2618 ret = xe_oa_init_gt(gt); 2653 2619 if (ret) 2654 2620 return ret; 2655 2621 } 2622 + 2623 + xe_oa_print_oa_units(oa); 2656 2624 2657 2625 return 0; 2658 2626 }

+6

drivers/gpu/drm/xe/xe_oa_types.h

··· 95 95 /** @oa_unit_id: identifier for the OA unit */ 96 96 u16 oa_unit_id; 97 97 98 + /** @gt: gt associated with the OA unit */ 99 + struct xe_gt *gt; 100 + 98 101 /** @type: Type of OA unit - OAM, OAG etc. */ 99 102 enum drm_xe_oa_unit_type type; 100 103 ··· 184 181 185 182 /** @gt: gt associated with the oa stream */ 186 183 struct xe_gt *gt; 184 + 185 + /** @oa_unit: oa unit for this stream */ 186 + struct xe_oa_unit *oa_unit; 187 187 188 188 /** @hwe: hardware engine associated with this oa stream */ 189 189 struct xe_hw_engine *hwe;

+21 -23

drivers/gpu/drm/xe/xe_pat.c

··· 103 103 * 104 104 * Note: There is an implicit assumption in the driver that compression and 105 105 * coh_1way+ are mutually exclusive. If this is ever not true then userptr 106 - * and imported dma-buf from external device will have uncleared ccs state. 106 + * and imported dma-buf from external device will have uncleared ccs state. See 107 + * also xe_bo_needs_ccs_pages(). 107 108 */ 108 109 #define XE2_PAT(no_promote, comp_en, l3clos, l3_policy, l4_policy, __coh_mode) \ 109 110 { \ ··· 163 162 static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[], 164 163 int n_entries) 165 164 { 165 + struct xe_device *xe = gt_to_xe(gt); 166 + 166 167 for (int i = 0; i < n_entries; i++) { 167 168 struct xe_reg reg = XE_REG(_PAT_INDEX(i)); 168 169 169 170 xe_mmio_write32(&gt->mmio, reg, table[i].value); 170 171 } 172 + 173 + if (xe->pat.pat_ats) 174 + xe_mmio_write32(&gt->mmio, XE_REG(_PAT_ATS), xe->pat.pat_ats->value); 175 + if (xe->pat.pat_pta) 176 + xe_mmio_write32(&gt->mmio, XE_REG(_PAT_PTA), xe->pat.pat_pta->value); 171 177 } 172 178 173 179 static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry table[], 174 180 int n_entries) 175 181 { 182 + struct xe_device *xe = gt_to_xe(gt); 183 + 176 184 for (int i = 0; i < n_entries; i++) { 177 185 struct xe_reg_mcr reg_mcr = XE_REG_MCR(_PAT_INDEX(i)); 178 186 179 187 xe_gt_mcr_multicast_write(gt, reg_mcr, table[i].value); 180 188 } 189 + 190 + if (xe->pat.pat_ats) 191 + xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), xe->pat.pat_ats->value); 192 + if (xe->pat.pat_pta) 193 + xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe->pat.pat_pta->value); 181 194 } 182 195 183 196 static void xelp_dump(struct xe_gt *gt, struct drm_printer *p) ··· 318 303 .dump = xelpg_dump, 319 304 }; 320 305 321 - static void xe2lpg_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[], 322 - int n_entries) 323 - { 324 - program_pat_mcr(gt, table, n_entries); 325 - xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), xe2_pat_ats.value); 326 - 327 - if (IS_DGFX(gt_to_xe(gt))) 328 - xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe2_pat_pta.value); 329 - } 330 - 331 - static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[], 332 - int n_entries) 333 - { 334 - program_pat(gt, table, n_entries); 335 - xe_mmio_write32(&gt->mmio, XE_REG(_PAT_ATS), xe2_pat_ats.value); 336 - 337 - if (IS_DGFX(gt_to_xe(gt))) 338 - xe_mmio_write32(&gt->mmio, XE_REG(_PAT_PTA), xe2_pat_pta.value); 339 - } 340 - 341 306 static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) 342 307 { 343 308 struct xe_device *xe = gt_to_xe(gt); ··· 370 375 } 371 376 372 377 static const struct xe_pat_ops xe2_pat_ops = { 373 - .program_graphics = xe2lpg_program_pat, 374 - .program_media = xe2lpm_program_pat, 378 + .program_graphics = program_pat_mcr, 379 + .program_media = program_pat, 375 380 .dump = xe2_dump, 376 381 }; 377 382 ··· 380 385 if (GRAPHICS_VER(xe) == 30 || GRAPHICS_VER(xe) == 20) { 381 386 xe->pat.ops = &xe2_pat_ops; 382 387 xe->pat.table = xe2_pat_table; 388 + xe->pat.pat_ats = &xe2_pat_ats; 389 + if (IS_DGFX(xe)) 390 + xe->pat.pat_pta = &xe2_pat_pta; 383 391 384 392 /* Wa_16023588340. XXX: Should use XE_WA */ 385 393 if (GRAPHICS_VERx100(xe) == 2001)

+1

drivers/gpu/drm/xe/xe_pci.c

··· 180 180 { 1271, "Xe_LPG", &graphics_xelpg }, 181 181 { 1274, "Xe_LPG+", &graphics_xelpg }, 182 182 { 2001, "Xe2_HPG", &graphics_xe2 }, 183 + { 2002, "Xe2_HPG", &graphics_xe2 }, 183 184 { 2004, "Xe2_LPG", &graphics_xe2 }, 184 185 { 3000, "Xe3_LPG", &graphics_xe2 }, 185 186 { 3001, "Xe3_LPG", &graphics_xe2 },

+106 -29

drivers/gpu/drm/xe/xe_pt.c

··· 907 907 struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; 908 908 u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated); 909 909 910 + if (xe_vma_bo(vma)) 911 + xe_bo_assert_held(xe_vma_bo(vma)); 912 + else if (xe_vma_is_userptr(vma)) 913 + lockdep_assert_held(&xe_vma_vm(vma)->userptr.notifier_lock); 914 + 910 915 if (!(pt_mask & BIT(tile->id))) 911 916 return false; 912 917 ··· 1463 1458 struct xe_vm *vm = pt_update->vops->vm; 1464 1459 struct xe_vma_ops *vops = pt_update->vops; 1465 1460 struct xe_vma_op *op; 1461 + unsigned long i; 1466 1462 int err; 1467 1463 1468 1464 err = xe_pt_pre_commit(pt_update); ··· 1473 1467 xe_svm_notifier_lock(vm); 1474 1468 1475 1469 list_for_each_entry(op, &vops->list, link) { 1476 - struct xe_svm_range *range = op->map_range.range; 1470 + struct xe_svm_range *range = NULL; 1477 1471 1478 1472 if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) 1479 1473 continue; 1480 1474 1481 - xe_svm_range_debug(range, "PRE-COMMIT"); 1475 + if (op->base.op == DRM_GPUVA_OP_PREFETCH) { 1476 + xe_assert(vm->xe, 1477 + xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va))); 1478 + xa_for_each(&op->prefetch_range.range, i, range) { 1479 + xe_svm_range_debug(range, "PRE-COMMIT"); 1482 1480 1483 - xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma)); 1484 - xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE); 1481 + if (!xe_svm_range_pages_valid(range)) { 1482 + xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); 1483 + xe_svm_notifier_unlock(vm); 1484 + return -ENODATA; 1485 + } 1486 + } 1487 + } else { 1488 + xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma)); 1489 + xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE); 1490 + range = op->map_range.range; 1485 1491 1486 - if (!xe_svm_range_pages_valid(range)) { 1487 - xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); 1488 - xe_svm_notifier_unlock(vm); 1489 - return -EAGAIN; 1492 + xe_svm_range_debug(range, "PRE-COMMIT"); 1493 + 1494 + if (!xe_svm_range_pages_valid(range)) { 1495 + xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); 1496 + xe_svm_notifier_unlock(vm); 1497 + return -EAGAIN; 1498 + } 1490 1499 } 1491 1500 } 1492 1501 ··· 1995 1974 return 0; 1996 1975 } 1997 1976 1977 + static bool 1978 + xe_pt_op_check_range_skip_invalidation(struct xe_vm_pgtable_update_op *pt_op, 1979 + struct xe_svm_range *range) 1980 + { 1981 + struct xe_vm_pgtable_update *update = pt_op->entries; 1982 + 1983 + XE_WARN_ON(!pt_op->num_entries); 1984 + 1985 + /* 1986 + * We can't skip the invalidation if we are removing PTEs that span more 1987 + * than the range, do some checks to ensure we are removing PTEs that 1988 + * are invalid. 1989 + */ 1990 + 1991 + if (pt_op->num_entries > 1) 1992 + return false; 1993 + 1994 + if (update->pt->level == 0) 1995 + return true; 1996 + 1997 + if (update->pt->level == 1) 1998 + return xe_svm_range_size(range) >= SZ_2M; 1999 + 2000 + return false; 2001 + } 2002 + 1998 2003 static int unbind_range_prepare(struct xe_vm *vm, 1999 2004 struct xe_tile *tile, 2000 2005 struct xe_vm_pgtable_update_ops *pt_update_ops, ··· 2049 2002 range->base.itree.last + 1); 2050 2003 ++pt_update_ops->current_op; 2051 2004 pt_update_ops->needs_svm_lock = true; 2052 - pt_update_ops->needs_invalidation = true; 2005 + pt_update_ops->needs_invalidation |= xe_vm_has_scratch(vm) || 2006 + xe_vm_has_valid_gpu_mapping(tile, range->tile_present, 2007 + range->tile_invalidated) || 2008 + !xe_pt_op_check_range_skip_invalidation(pt_op, range); 2053 2009 2054 2010 xe_pt_commit_prepare_unbind(XE_INVALID_VMA, pt_op->entries, 2055 2011 pt_op->num_entries); ··· 2115 2065 { 2116 2066 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2117 2067 2118 - if (xe_vma_is_cpu_addr_mirror(vma)) 2119 - break; 2068 + if (xe_vma_is_cpu_addr_mirror(vma)) { 2069 + struct xe_svm_range *range; 2070 + unsigned long i; 2120 2071 2121 - err = bind_op_prepare(vm, tile, pt_update_ops, vma, false); 2122 - pt_update_ops->wait_vm_kernel = true; 2072 + xa_for_each(&op->prefetch_range.range, i, range) { 2073 + err = bind_range_prepare(vm, tile, pt_update_ops, 2074 + vma, range); 2075 + if (err) 2076 + return err; 2077 + } 2078 + } else { 2079 + err = bind_op_prepare(vm, tile, pt_update_ops, vma, false); 2080 + pt_update_ops->wait_vm_kernel = true; 2081 + } 2123 2082 break; 2124 2083 } 2125 2084 case DRM_GPUVA_OP_DRIVER: ··· 2225 2166 DMA_RESV_USAGE_KERNEL : 2226 2167 DMA_RESV_USAGE_BOOKKEEP); 2227 2168 } 2228 - vma->tile_present |= BIT(tile->id); 2229 - vma->tile_staged &= ~BIT(tile->id); 2169 + /* All WRITE_ONCE pair with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ 2170 + WRITE_ONCE(vma->tile_present, vma->tile_present | BIT(tile->id)); 2230 2171 if (invalidate_on_bind) 2231 - vma->tile_invalidated |= BIT(tile->id); 2172 + WRITE_ONCE(vma->tile_invalidated, 2173 + vma->tile_invalidated | BIT(tile->id)); 2174 + else 2175 + WRITE_ONCE(vma->tile_invalidated, 2176 + vma->tile_invalidated & ~BIT(tile->id)); 2177 + vma->tile_staged &= ~BIT(tile->id); 2232 2178 if (xe_vma_is_userptr(vma)) { 2233 2179 lockdep_assert_held_read(&vm->userptr.notifier_lock); 2234 2180 to_userptr_vma(vma)->userptr.initial_bind = true; ··· 2278 2214 spin_unlock(&vm->userptr.invalidated_lock); 2279 2215 } 2280 2216 } 2217 + } 2218 + 2219 + static void range_present_and_invalidated_tile(struct xe_vm *vm, 2220 + struct xe_svm_range *range, 2221 + u8 tile_id) 2222 + { 2223 + /* All WRITE_ONCE pair with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ 2224 + 2225 + lockdep_assert_held(&vm->svm.gpusvm.notifier_lock); 2226 + 2227 + WRITE_ONCE(range->tile_present, range->tile_present | BIT(tile_id)); 2228 + WRITE_ONCE(range->tile_invalidated, range->tile_invalidated & ~BIT(tile_id)); 2281 2229 } 2282 2230 2283 2231 static void op_commit(struct xe_vm *vm, ··· 2339 2263 { 2340 2264 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2341 2265 2342 - if (!xe_vma_is_cpu_addr_mirror(vma)) 2266 + if (xe_vma_is_cpu_addr_mirror(vma)) { 2267 + struct xe_svm_range *range = NULL; 2268 + unsigned long i; 2269 + 2270 + xa_for_each(&op->prefetch_range.range, i, range) 2271 + range_present_and_invalidated_tile(vm, range, tile->id); 2272 + } else { 2343 2273 bind_op_commit(vm, tile, pt_update_ops, vma, fence, 2344 2274 fence2, false); 2275 + } 2345 2276 break; 2346 2277 } 2347 2278 case DRM_GPUVA_OP_DRIVER: 2348 2279 { 2349 - /* WRITE_ONCE pairs with READ_ONCE in xe_svm.c */ 2350 - 2351 - if (op->subop == XE_VMA_SUBOP_MAP_RANGE) { 2352 - WRITE_ONCE(op->map_range.range->tile_present, 2353 - op->map_range.range->tile_present | 2354 - BIT(tile->id)); 2355 - WRITE_ONCE(op->map_range.range->tile_invalidated, 2356 - op->map_range.range->tile_invalidated & 2357 - ~BIT(tile->id)); 2358 - } else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) { 2280 + /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ 2281 + if (op->subop == XE_VMA_SUBOP_MAP_RANGE) 2282 + range_present_and_invalidated_tile(vm, op->map_range.range, tile->id); 2283 + else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) 2359 2284 WRITE_ONCE(op->unmap_range.range->tile_present, 2360 2285 op->unmap_range.range->tile_present & 2361 2286 ~BIT(tile->id)); 2362 - } 2287 + 2363 2288 break; 2364 2289 } 2365 2290 default: ··· 2553 2476 kfree(mfence); 2554 2477 kfree(ifence); 2555 2478 kill_vm_tile1: 2556 - if (err != -EAGAIN && tile->id) 2479 + if (err != -EAGAIN && err != -ENODATA && tile->id) 2557 2480 xe_vm_kill(vops->vm, false); 2558 2481 2559 2482 return ERR_PTR(err);

+89 -64

drivers/gpu/drm/xe/xe_pxp.c

··· 504 504 return 0; 505 505 } 506 506 507 - static void __exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) 508 - { 509 - spin_lock_irq(&pxp->queues.lock); 510 - list_add_tail(&q->pxp.link, &pxp->queues.list); 511 - spin_unlock_irq(&pxp->queues.lock); 512 - } 513 - 514 - /** 515 - * xe_pxp_exec_queue_add - add a queue to the PXP list 516 - * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled) 517 - * @q: the queue to add to the list 518 - * 519 - * If PXP is enabled and the prerequisites are done, start the PXP ARB 520 - * session (if not already running) and add the queue to the PXP list. Note 521 - * that the queue must have previously been marked as using PXP with 522 - * xe_pxp_exec_queue_set_type. 523 - * 524 - * Returns 0 if the PXP ARB session is running and the queue is in the list, 525 - * -ENODEV if PXP is disabled, -EBUSY if the PXP prerequisites are not done, 526 - * other errno value if something goes wrong during the session start. 527 - */ 528 - int xe_pxp_exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) 507 + static int __exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) 529 508 { 530 509 int ret = 0; 510 + 511 + /* 512 + * A queue can be added to the list only if the PXP is in active status, 513 + * otherwise the termination might not handle it correctly. 514 + */ 515 + mutex_lock(&pxp->mutex); 516 + 517 + if (pxp->status == XE_PXP_ACTIVE) { 518 + spin_lock_irq(&pxp->queues.lock); 519 + list_add_tail(&q->pxp.link, &pxp->queues.list); 520 + spin_unlock_irq(&pxp->queues.lock); 521 + } else if (pxp->status == XE_PXP_ERROR || pxp->status == XE_PXP_SUSPENDED) { 522 + ret = -EIO; 523 + } else { 524 + ret = -EBUSY; /* try again later */ 525 + } 526 + 527 + mutex_unlock(&pxp->mutex); 528 + 529 + return ret; 530 + } 531 + 532 + static int pxp_start(struct xe_pxp *pxp, u8 type) 533 + { 534 + int ret = 0; 535 + bool restart = false; 531 536 532 537 if (!xe_pxp_is_enabled(pxp)) 533 538 return -ENODEV; 534 539 535 540 /* we only support HWDRM sessions right now */ 536 - xe_assert(pxp->xe, q->pxp.type == DRM_XE_PXP_TYPE_HWDRM); 537 - 538 - /* 539 - * Runtime suspend kills PXP, so we take a reference to prevent it from 540 - * happening while we have active queues that use PXP 541 - */ 542 - xe_pm_runtime_get(pxp->xe); 541 + xe_assert(pxp->xe, type == DRM_XE_PXP_TYPE_HWDRM); 543 542 544 543 /* get_readiness_status() returns 0 for in-progress and 1 for done */ 545 544 ret = xe_pxp_get_readiness_status(pxp); 546 - if (ret <= 0) { 547 - if (!ret) 548 - ret = -EBUSY; 549 - goto out; 550 - } 545 + if (ret <= 0) 546 + return ret ?: -EBUSY; 547 + 551 548 ret = 0; 552 549 553 550 wait_for_idle: 554 551 /* 555 552 * if there is an action in progress, wait for it. We need to wait 556 553 * outside the lock because the completion is done from within the lock. 557 - * Note that the two action should never be pending at the same time. 554 + * Note that the two actions should never be pending at the same time. 558 555 */ 559 556 if (!wait_for_completion_timeout(&pxp->termination, 560 - msecs_to_jiffies(PXP_TERMINATION_TIMEOUT_MS))) { 561 - ret = -ETIMEDOUT; 562 - goto out; 563 - } 557 + msecs_to_jiffies(PXP_TERMINATION_TIMEOUT_MS))) 558 + return -ETIMEDOUT; 564 559 565 560 if (!wait_for_completion_timeout(&pxp->activation, 566 - msecs_to_jiffies(PXP_ACTIVATION_TIMEOUT_MS))) { 567 - ret = -ETIMEDOUT; 568 - goto out; 569 - } 561 + msecs_to_jiffies(PXP_ACTIVATION_TIMEOUT_MS))) 562 + return -ETIMEDOUT; 570 563 571 564 mutex_lock(&pxp->mutex); 572 565 ··· 567 574 switch (pxp->status) { 568 575 case XE_PXP_ERROR: 569 576 ret = -EIO; 570 - break; 577 + goto out_unlock; 571 578 case XE_PXP_ACTIVE: 572 - __exec_queue_add(pxp, q); 573 - mutex_unlock(&pxp->mutex); 574 - goto out; 579 + goto out_unlock; 575 580 case XE_PXP_READY_TO_START: 576 581 pxp->status = XE_PXP_START_IN_PROGRESS; 577 582 reinit_completion(&pxp->activation); ··· 577 586 case XE_PXP_START_IN_PROGRESS: 578 587 /* If a start is in progress then the completion must not be done */ 579 588 XE_WARN_ON(completion_done(&pxp->activation)); 580 - mutex_unlock(&pxp->mutex); 581 - goto wait_for_idle; 589 + restart = true; 590 + goto out_unlock; 582 591 case XE_PXP_NEEDS_TERMINATION: 583 592 mark_termination_in_progress(pxp); 584 593 break; ··· 586 595 case XE_PXP_NEEDS_ADDITIONAL_TERMINATION: 587 596 /* If a termination is in progress then the completion must not be done */ 588 597 XE_WARN_ON(completion_done(&pxp->termination)); 589 - mutex_unlock(&pxp->mutex); 590 - goto wait_for_idle; 598 + restart = true; 599 + goto out_unlock; 591 600 case XE_PXP_SUSPENDED: 592 601 default: 593 602 drm_err(&pxp->xe->drm, "unexpected state during PXP start: %u\n", pxp->status); 594 603 ret = -EIO; 595 - break; 604 + goto out_unlock; 596 605 } 597 606 598 607 mutex_unlock(&pxp->mutex); 599 - 600 - if (ret) 601 - goto out; 602 608 603 609 if (!completion_done(&pxp->termination)) { 604 610 ret = pxp_terminate_hw(pxp); ··· 603 615 drm_err(&pxp->xe->drm, "PXP termination failed before start\n"); 604 616 mutex_lock(&pxp->mutex); 605 617 pxp->status = XE_PXP_ERROR; 606 - mutex_unlock(&pxp->mutex); 607 618 608 - goto out; 619 + goto out_unlock; 609 620 } 610 621 611 622 goto wait_for_idle; ··· 626 639 if (pxp->status != XE_PXP_START_IN_PROGRESS) { 627 640 drm_err(&pxp->xe->drm, "unexpected state after PXP start: %u\n", pxp->status); 628 641 pxp->status = XE_PXP_NEEDS_TERMINATION; 629 - mutex_unlock(&pxp->mutex); 630 - goto wait_for_idle; 642 + restart = true; 643 + goto out_unlock; 631 644 } 632 645 633 646 /* If everything went ok, update the status and add the queue to the list */ 634 - if (!ret) { 647 + if (!ret) 635 648 pxp->status = XE_PXP_ACTIVE; 636 - __exec_queue_add(pxp, q); 637 - } else { 649 + else 638 650 pxp->status = XE_PXP_ERROR; 639 - } 640 651 652 + out_unlock: 641 653 mutex_unlock(&pxp->mutex); 642 654 643 - out: 655 + if (restart) 656 + goto wait_for_idle; 657 + 658 + return ret; 659 + } 660 + 661 + /** 662 + * xe_pxp_exec_queue_add - add a queue to the PXP list 663 + * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled) 664 + * @q: the queue to add to the list 665 + * 666 + * If PXP is enabled and the prerequisites are done, start the PXP default 667 + * session (if not already running) and add the queue to the PXP list. 668 + * 669 + * Returns 0 if the PXP session is running and the queue is in the list, 670 + * -ENODEV if PXP is disabled, -EBUSY if the PXP prerequisites are not done, 671 + * other errno value if something goes wrong during the session start. 672 + */ 673 + int xe_pxp_exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) 674 + { 675 + int ret; 676 + 677 + if (!xe_pxp_is_enabled(pxp)) 678 + return -ENODEV; 679 + 680 + /* 681 + * Runtime suspend kills PXP, so we take a reference to prevent it from 682 + * happening while we have active queues that use PXP 683 + */ 684 + xe_pm_runtime_get(pxp->xe); 685 + 686 + start: 687 + ret = pxp_start(pxp, q->pxp.type); 688 + 689 + if (!ret) { 690 + ret = __exec_queue_add(pxp, q); 691 + if (ret == -EBUSY) 692 + goto start; 693 + } 694 + 644 695 /* 645 696 * in the successful case the PM ref is released from 646 697 * xe_pxp_exec_queue_remove

+2 -2

drivers/gpu/drm/xe/xe_query.c

··· 683 683 du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); 684 684 du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS | 685 685 DRM_XE_OA_CAPS_OA_BUFFER_SIZE | 686 - DRM_XE_OA_CAPS_WAIT_NUM_REPORTS; 687 - 686 + DRM_XE_OA_CAPS_WAIT_NUM_REPORTS | 687 + DRM_XE_OA_CAPS_OAM; 688 688 j = 0; 689 689 for_each_hw_engine(hwe, gt, hwe_id) { 690 690 if (!xe_hw_engine_is_reserved(hwe) &&

+18 -19

drivers/gpu/drm/xe/xe_shrinker.c

··· 5 5 6 6 #include <linux/shrinker.h> 7 7 8 + #include <drm/drm_managed.h> 8 9 #include <drm/ttm/ttm_backup.h> 9 10 #include <drm/ttm/ttm_bo.h> 10 11 #include <drm/ttm/ttm_tt.h> ··· 214 213 xe_pm_runtime_put(shrinker->xe); 215 214 } 216 215 216 + static void xe_shrinker_fini(struct drm_device *drm, void *arg) 217 + { 218 + struct xe_shrinker *shrinker = arg; 219 + 220 + xe_assert(shrinker->xe, !shrinker->shrinkable_pages); 221 + xe_assert(shrinker->xe, !shrinker->purgeable_pages); 222 + shrinker_free(shrinker->shrink); 223 + flush_work(&shrinker->pm_worker); 224 + kfree(shrinker); 225 + } 226 + 217 227 /** 218 228 * xe_shrinker_create() - Create an xe per-device shrinker 219 229 * @xe: Pointer to the xe device. 220 230 * 221 - * Returns: A pointer to the created shrinker on success, 222 - * Negative error code on failure. 231 + * Return: %0 on success. Negative error code on failure. 223 232 */ 224 - struct xe_shrinker *xe_shrinker_create(struct xe_device *xe) 233 + int xe_shrinker_create(struct xe_device *xe) 225 234 { 226 235 struct xe_shrinker *shrinker = kzalloc(sizeof(*shrinker), GFP_KERNEL); 227 236 228 237 if (!shrinker) 229 - return ERR_PTR(-ENOMEM); 238 + return -ENOMEM; 230 239 231 240 shrinker->shrink = shrinker_alloc(0, "drm-xe_gem:%s", xe->drm.unique); 232 241 if (!shrinker->shrink) { 233 242 kfree(shrinker); 234 - return ERR_PTR(-ENOMEM); 243 + return -ENOMEM; 235 244 } 236 245 237 246 INIT_WORK(&shrinker->pm_worker, xe_shrinker_pm); ··· 251 240 shrinker->shrink->scan_objects = xe_shrinker_scan; 252 241 shrinker->shrink->private_data = shrinker; 253 242 shrinker_register(shrinker->shrink); 243 + xe->mem.shrinker = shrinker; 254 244 255 - return shrinker; 256 - } 257 - 258 - /** 259 - * xe_shrinker_destroy() - Destroy an xe per-device shrinker 260 - * @shrinker: Pointer to the shrinker to destroy. 261 - */ 262 - void xe_shrinker_destroy(struct xe_shrinker *shrinker) 263 - { 264 - xe_assert(shrinker->xe, !shrinker->shrinkable_pages); 265 - xe_assert(shrinker->xe, !shrinker->purgeable_pages); 266 - shrinker_free(shrinker->shrink); 267 - flush_work(&shrinker->pm_worker); 268 - kfree(shrinker); 245 + return drmm_add_action_or_reset(&xe->drm, xe_shrinker_fini, shrinker); 269 246 }

+1 -3

drivers/gpu/drm/xe/xe_shrinker.h

··· 11 11 12 12 void xe_shrinker_mod_pages(struct xe_shrinker *shrinker, long shrinkable, long purgeable); 13 13 14 - struct xe_shrinker *xe_shrinker_create(struct xe_device *xe); 15 - 16 - void xe_shrinker_destroy(struct xe_shrinker *shrinker); 14 + int xe_shrinker_create(struct xe_device *xe); 17 15 18 16 #endif

+58

drivers/gpu/drm/xe/xe_sriov_vf.c

··· 7 7 8 8 #include "xe_assert.h" 9 9 #include "xe_device.h" 10 + #include "xe_gt.h" 10 11 #include "xe_gt_sriov_printk.h" 11 12 #include "xe_gt_sriov_vf.h" 13 + #include "xe_guc_ct.h" 12 14 #include "xe_pm.h" 13 15 #include "xe_sriov.h" 14 16 #include "xe_sriov_printk.h" 15 17 #include "xe_sriov_vf.h" 18 + #include "xe_tile_sriov_vf.h" 16 19 17 20 /** 18 21 * DOC: VF restore procedure in PF KMD and VF KMD ··· 124 121 * | | | 125 122 */ 126 123 124 + static bool vf_migration_supported(struct xe_device *xe) 125 + { 126 + /* 127 + * TODO: Add conditions to allow specific platforms, when they're 128 + * supported at production quality. 129 + */ 130 + return IS_ENABLED(CONFIG_DRM_XE_DEBUG); 131 + } 132 + 127 133 static void migration_worker_func(struct work_struct *w); 128 134 129 135 /** ··· 142 130 void xe_sriov_vf_init_early(struct xe_device *xe) 143 131 { 144 132 INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); 133 + 134 + if (!vf_migration_supported(xe)) 135 + xe_sriov_info(xe, "migration not supported by this module version\n"); 145 136 } 146 137 147 138 /** ··· 172 157 return ret; 173 158 } 174 159 160 + static void vf_post_migration_fixup_ctb(struct xe_device *xe) 161 + { 162 + struct xe_gt *gt; 163 + unsigned int id; 164 + 165 + xe_assert(xe, IS_SRIOV_VF(xe)); 166 + 167 + for_each_gt(gt, xe, id) { 168 + s32 shift = xe_gt_sriov_vf_ggtt_shift(gt); 169 + 170 + xe_guc_ct_fixup_messages_with_ggtt(&gt->uc.guc.ct, shift); 171 + } 172 + } 173 + 175 174 /* 176 175 * vf_post_migration_imminent - Check if post-restore recovery is coming. 177 176 * @xe: the &xe_device struct instance ··· 197 168 { 198 169 return xe->sriov.vf.migration.gt_flags != 0 || 199 170 work_pending(&xe->sriov.vf.migration.worker); 171 + } 172 + 173 + static bool vf_post_migration_fixup_ggtt_nodes(struct xe_device *xe) 174 + { 175 + bool need_fixups = false; 176 + struct xe_tile *tile; 177 + unsigned int id; 178 + 179 + for_each_tile(tile, xe, id) { 180 + struct xe_gt *gt = tile->primary_gt; 181 + s64 shift; 182 + 183 + shift = xe_gt_sriov_vf_ggtt_shift(gt); 184 + if (shift) { 185 + need_fixups = true; 186 + xe_tile_sriov_vf_fixup_ggtt_nodes(tile, shift); 187 + } 188 + } 189 + return need_fixups; 200 190 } 201 191 202 192 /* ··· 239 191 240 192 static void vf_post_migration_recovery(struct xe_device *xe) 241 193 { 194 + bool need_fixups; 242 195 int err; 243 196 244 197 drm_dbg(&xe->drm, "migration recovery in progress\n"); ··· 249 200 goto defer; 250 201 if (unlikely(err)) 251 202 goto fail; 203 + if (!vf_migration_supported(xe)) { 204 + xe_sriov_err(xe, "migration not supported by this module version\n"); 205 + err = -ENOTRECOVERABLE; 206 + goto fail; 207 + } 252 208 209 + need_fixups = vf_post_migration_fixup_ggtt_nodes(xe); 253 210 /* FIXME: add the recovery steps */ 211 + if (need_fixups) 212 + vf_post_migration_fixup_ctb(xe); 213 + 254 214 vf_post_migration_notify_resfix_done(xe); 255 215 xe_pm_runtime_put(xe); 256 216 drm_notice(&xe->drm, "migration recovery ended\n");

+183 -125

drivers/gpu/drm/xe/xe_svm.c

··· 45 45 return gpusvm_to_vm(r->gpusvm); 46 46 } 47 47 48 - static unsigned long xe_svm_range_start(struct xe_svm_range *range) 49 - { 50 - return drm_gpusvm_range_start(&range->base); 51 - } 52 - 53 - static unsigned long xe_svm_range_end(struct xe_svm_range *range) 54 - { 55 - return drm_gpusvm_range_end(&range->base); 56 - } 57 - 58 - static unsigned long xe_svm_range_size(struct xe_svm_range *range) 59 - { 60 - return drm_gpusvm_range_size(&range->base); 61 - } 62 - 63 48 #define range_debug(r__, operaton__) \ 64 49 vm_dbg(&range_to_vm(&(r__)->base)->xe->drm, \ 65 50 "%s: asid=%u, gpusvm=%p, vram=%d,%d, seqno=%lu, " \ ··· 86 101 { 87 102 xe_vm_put(range_to_vm(range)); 88 103 kfree(range); 89 - } 90 - 91 - static struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r) 92 - { 93 - return container_of(r, struct xe_svm_range, base); 94 104 } 95 105 96 106 static void ··· 141 161 for_each_tile(tile, xe, id) 142 162 if (xe_pt_zap_ptes_range(tile, vm, range)) { 143 163 tile_mask |= BIT(id); 144 - range->tile_invalidated |= BIT(id); 164 + /* 165 + * WRITE_ONCE pairs with READ_ONCE in 166 + * xe_vm_has_valid_gpu_mapping() 167 + */ 168 + WRITE_ONCE(range->tile_invalidated, 169 + range->tile_invalidated | BIT(id)); 145 170 } 146 171 147 172 return tile_mask; ··· 172 187 { 173 188 struct xe_vm *vm = gpusvm_to_vm(gpusvm); 174 189 struct xe_device *xe = vm->xe; 175 - struct xe_tile *tile; 176 190 struct drm_gpusvm_range *r, *first; 177 - struct xe_gt_tlb_invalidation_fence 178 - fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 179 191 u64 adj_start = mmu_range->start, adj_end = mmu_range->end; 180 192 u8 tile_mask = 0; 181 - u8 id; 182 - u32 fence_id = 0; 183 193 long err; 184 194 185 195 xe_svm_assert_in_notifier(vm); ··· 220 240 221 241 xe_device_wmb(xe); 222 242 223 - for_each_tile(tile, xe, id) { 224 - if (tile_mask & BIT(id)) { 225 - int err; 226 - 227 - xe_gt_tlb_invalidation_fence_init(tile->primary_gt, 228 - &fence[fence_id], true); 229 - 230 - err = xe_gt_tlb_invalidation_range(tile->primary_gt, 231 - &fence[fence_id], 232 - adj_start, 233 - adj_end, 234 - vm->usm.asid); 235 - if (WARN_ON_ONCE(err < 0)) 236 - goto wait; 237 - ++fence_id; 238 - 239 - if (!tile->media_gt) 240 - continue; 241 - 242 - xe_gt_tlb_invalidation_fence_init(tile->media_gt, 243 - &fence[fence_id], true); 244 - 245 - err = xe_gt_tlb_invalidation_range(tile->media_gt, 246 - &fence[fence_id], 247 - adj_start, 248 - adj_end, 249 - vm->usm.asid); 250 - if (WARN_ON_ONCE(err < 0)) 251 - goto wait; 252 - ++fence_id; 253 - } 254 - } 255 - 256 - wait: 257 - for (id = 0; id < fence_id; ++id) 258 - xe_gt_tlb_invalidation_fence_wait(&fence[id]); 243 + err = xe_vm_range_tilemask_tlb_invalidation(vm, adj_start, adj_end, tile_mask); 244 + WARN_ON_ONCE(err); 259 245 260 246 range_notifier_event_end: 261 247 r = first; ··· 608 662 struct xe_tile *tile, 609 663 bool devmem_only) 610 664 { 611 - /* 612 - * Advisory only check whether the range currently has a valid mapping, 613 - * READ_ONCE pairs with WRITE_ONCE in xe_pt.c 614 - */ 615 - return ((READ_ONCE(range->tile_present) & 616 - ~READ_ONCE(range->tile_invalidated)) & BIT(tile->id)) && 617 - (!devmem_only || xe_svm_range_in_vram(range)); 665 + return (xe_vm_has_valid_gpu_mapping(tile, range->tile_present, 666 + range->tile_invalidated) && 667 + (!devmem_only || xe_svm_range_in_vram(range))); 668 + } 669 + 670 + /** xe_svm_range_migrate_to_smem() - Move range pages from VRAM to SMEM 671 + * @vm: xe_vm pointer 672 + * @range: Pointer to the SVM range structure 673 + * 674 + * The xe_svm_range_migrate_to_smem() checks range has pages in VRAM 675 + * and migrates them to SMEM 676 + */ 677 + void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range) 678 + { 679 + if (xe_svm_range_in_vram(range)) 680 + drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base); 681 + } 682 + 683 + /** 684 + * xe_svm_range_validate() - Check if the SVM range is valid 685 + * @vm: xe_vm pointer 686 + * @range: Pointer to the SVM range structure 687 + * @tile_mask: Mask representing the tiles to be checked 688 + * @devmem_preferred : if true range needs to be in devmem 689 + * 690 + * The xe_svm_range_validate() function checks if a range is 691 + * valid and located in the desired memory region. 692 + * 693 + * Return: true if the range is valid, false otherwise 694 + */ 695 + bool xe_svm_range_validate(struct xe_vm *vm, 696 + struct xe_svm_range *range, 697 + u8 tile_mask, bool devmem_preferred) 698 + { 699 + bool ret; 700 + 701 + xe_svm_notifier_lock(vm); 702 + 703 + ret = (range->tile_present & ~range->tile_invalidated & tile_mask) == tile_mask && 704 + (devmem_preferred == range->base.flags.has_devmem_pages); 705 + 706 + xe_svm_notifier_unlock(vm); 707 + 708 + return ret; 709 + } 710 + 711 + /** 712 + * xe_svm_find_vma_start - Find start of CPU VMA 713 + * @vm: xe_vm pointer 714 + * @start: start address 715 + * @end: end address 716 + * @vma: Pointer to struct xe_vma 717 + * 718 + * 719 + * This function searches for a cpu vma, within the specified 720 + * range [start, end] in the given VM. It adjusts the range based on the 721 + * xe_vma start and end addresses. If no cpu VMA is found, it returns ULONG_MAX. 722 + * 723 + * Return: The starting address of the VMA within the range, 724 + * or ULONG_MAX if no VMA is found 725 + */ 726 + u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 start, u64 end, struct xe_vma *vma) 727 + { 728 + return drm_gpusvm_find_vma_start(&vm->svm.gpusvm, 729 + max(start, xe_vma_start(vma)), 730 + min(end, xe_vma_end(vma))); 618 731 } 619 732 620 733 #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ··· 682 677 return &tile->mem.vram; 683 678 } 684 679 685 - static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, 686 - struct xe_svm_range *range, 687 - const struct drm_gpusvm_ctx *ctx) 680 + /** 681 + * xe_svm_alloc_vram()- Allocate device memory pages for range, 682 + * migrating existing data. 683 + * @vm: The VM. 684 + * @tile: tile to allocate vram from 685 + * @range: SVM range 686 + * @ctx: DRM GPU SVM context 687 + * 688 + * Return: 0 on success, error code on failure. 689 + */ 690 + int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, 691 + struct xe_svm_range *range, 692 + const struct drm_gpusvm_ctx *ctx) 688 693 { 689 694 struct mm_struct *mm = vm->svm.gpusvm.mm; 690 695 struct xe_vram_region *vr = tile_to_vr(tile); ··· 748 733 749 734 return err; 750 735 } 751 - #else 752 - static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, 753 - struct xe_svm_range *range, 754 - const struct drm_gpusvm_ctx *ctx) 755 - { 756 - return -EOPNOTSUPP; 757 - } 758 736 #endif 759 737 760 738 static bool supports_4K_migration(struct xe_device *xe) ··· 758 750 return true; 759 751 } 760 752 761 - static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, 762 - struct xe_vma *vma) 753 + /** 754 + * xe_svm_range_needs_migrate_to_vram() - SVM range needs migrate to VRAM or not 755 + * @range: SVM range for which migration needs to be decided 756 + * @vma: vma which has range 757 + * @preferred_region_is_vram: preferred region for range is vram 758 + * 759 + * Return: True for range needing migration and migration is supported else false 760 + */ 761 + bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma, 762 + bool preferred_region_is_vram) 763 763 { 764 764 struct xe_vm *vm = range_to_vm(&range->base); 765 765 u64 range_size = xe_svm_range_size(range); 766 766 767 - if (!range->base.flags.migrate_devmem) 767 + if (!range->base.flags.migrate_devmem || !preferred_region_is_vram) 768 768 return false; 769 769 770 - if (xe_svm_range_in_vram(range)) { 771 - drm_dbg(&vm->xe->drm, "Range is already in VRAM\n"); 770 + xe_assert(vm->xe, IS_DGFX(vm->xe)); 771 + 772 + if (preferred_region_is_vram && xe_svm_range_in_vram(range)) { 773 + drm_info(&vm->xe->drm, "Range is already in VRAM\n"); 772 774 return false; 773 775 } 774 776 775 - if (range_size < SZ_64K && !supports_4K_migration(vm->xe)) { 777 + if (preferred_region_is_vram && range_size < SZ_64K && !supports_4K_migration(vm->xe)) { 776 778 drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n"); 777 779 return false; 778 780 } ··· 816 798 .devmem_only = atomic && IS_DGFX(vm->xe) && 817 799 IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), 818 800 .timeslice_ms = atomic && IS_DGFX(vm->xe) && 819 - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? 5 : 0, 801 + IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? 802 + vm->xe->atomic_svm_timeslice_ms : 0, 820 803 }; 821 804 struct xe_svm_range *range; 822 - struct drm_gpusvm_range *r; 823 - struct drm_exec exec; 824 805 struct dma_fence *fence; 825 - int migrate_try_count = ctx.devmem_only ? 3 : 1; 826 806 struct xe_tile *tile = gt_to_tile(gt); 807 + int migrate_try_count = ctx.devmem_only ? 3 : 1; 827 808 ktime_t end = 0; 828 809 int err; 829 810 ··· 837 820 if (err) 838 821 return err; 839 822 840 - r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, fault_addr, 841 - xe_vma_start(vma), xe_vma_end(vma), 842 - &ctx); 843 - if (IS_ERR(r)) 844 - return PTR_ERR(r); 823 + range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx); 845 824 846 - if (ctx.devmem_only && !r->flags.migrate_devmem) 825 + if (IS_ERR(range)) 826 + return PTR_ERR(range); 827 + 828 + if (ctx.devmem_only && !range->base.flags.migrate_devmem) 847 829 return -EACCES; 848 830 849 - range = to_xe_range(r); 850 831 if (xe_svm_range_is_valid(range, tile, ctx.devmem_only)) 851 832 return 0; 852 833 853 834 range_debug(range, "PAGE FAULT"); 854 835 855 836 if (--migrate_try_count >= 0 && 856 - xe_svm_range_needs_migrate_to_vram(range, vma)) { 837 + xe_svm_range_needs_migrate_to_vram(range, vma, IS_DGFX(vm->xe))) { 857 838 err = xe_svm_alloc_vram(vm, tile, range, &ctx); 858 839 ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ 859 840 if (err) { ··· 870 855 } 871 856 872 857 range_debug(range, "GET PAGES"); 873 - err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx); 858 + err = xe_svm_range_get_pages(vm, range, &ctx); 874 859 /* Corner where CPU mappings have changed */ 875 860 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { 876 861 ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ 877 862 if (migrate_try_count > 0 || !ctx.devmem_only) { 878 - if (err == -EOPNOTSUPP) { 879 - range_debug(range, "PAGE FAULT - EVICT PAGES"); 880 - drm_gpusvm_range_evict(&vm->svm.gpusvm, 881 - &range->base); 882 - } 883 863 drm_dbg(&vm->xe->drm, 884 864 "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n", 885 865 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); ··· 894 884 range_debug(range, "PAGE FAULT - BIND"); 895 885 896 886 retry_bind: 897 - drm_exec_init(&exec, 0, 0); 898 - drm_exec_until_all_locked(&exec) { 899 - err = drm_exec_lock_obj(&exec, vm->gpuvm.r_obj); 900 - drm_exec_retry_on_contention(&exec); 901 - if (err) { 902 - drm_exec_fini(&exec); 903 - goto err_out; 887 + xe_vm_lock(vm, false); 888 + fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id)); 889 + if (IS_ERR(fence)) { 890 + xe_vm_unlock(vm); 891 + err = PTR_ERR(fence); 892 + if (err == -EAGAIN) { 893 + ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ 894 + range_debug(range, "PAGE FAULT - RETRY BIND"); 895 + goto retry; 904 896 } 905 - 906 - fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id)); 907 - if (IS_ERR(fence)) { 908 - drm_exec_fini(&exec); 909 - err = PTR_ERR(fence); 910 - if (err == -EAGAIN) { 911 - ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ 912 - range_debug(range, "PAGE FAULT - RETRY BIND"); 913 - goto retry; 914 - } 915 - if (xe_vm_validate_should_retry(&exec, err, &end)) 916 - goto retry_bind; 917 - goto err_out; 918 - } 897 + if (xe_vm_validate_should_retry(NULL, err, &end)) 898 + goto retry_bind; 899 + goto err_out; 919 900 } 920 - drm_exec_fini(&exec); 901 + xe_vm_unlock(vm); 921 902 922 903 dma_fence_wait(fence, false); 923 904 dma_fence_put(fence); ··· 945 944 int xe_svm_bo_evict(struct xe_bo *bo) 946 945 { 947 946 return drm_gpusvm_evict_to_ram(&bo->devmem_allocation); 947 + } 948 + 949 + /** 950 + * xe_svm_range_find_or_insert- Find or insert GPU SVM range 951 + * @vm: xe_vm pointer 952 + * @addr: address for which range needs to be found/inserted 953 + * @vma: Pointer to struct xe_vma which mirrors CPU 954 + * @ctx: GPU SVM context 955 + * 956 + * This function finds or inserts a newly allocated a SVM range based on the 957 + * address. 958 + * 959 + * Return: Pointer to the SVM range on success, ERR_PTR() on failure. 960 + */ 961 + struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr, 962 + struct xe_vma *vma, struct drm_gpusvm_ctx *ctx) 963 + { 964 + struct drm_gpusvm_range *r; 965 + 966 + r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, max(addr, xe_vma_start(vma)), 967 + xe_vma_start(vma), xe_vma_end(vma), ctx); 968 + if (IS_ERR(r)) 969 + return ERR_PTR(PTR_ERR(r)); 970 + 971 + return to_xe_range(r); 972 + } 973 + 974 + /** 975 + * xe_svm_range_get_pages() - Get pages for a SVM range 976 + * @vm: Pointer to the struct xe_vm 977 + * @range: Pointer to the xe SVM range structure 978 + * @ctx: GPU SVM context 979 + * 980 + * This function gets pages for a SVM range and ensures they are mapped for 981 + * DMA access. In case of failure with -EOPNOTSUPP, it evicts the range. 982 + * 983 + * Return: 0 on success, negative error code on failure. 984 + */ 985 + int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range, 986 + struct drm_gpusvm_ctx *ctx) 987 + { 988 + int err = 0; 989 + 990 + err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, &range->base, ctx); 991 + if (err == -EOPNOTSUPP) { 992 + range_debug(range, "PAGE FAULT - EVICT PAGES"); 993 + drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base); 994 + } 995 + 996 + return err; 948 997 } 949 998 950 999 #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ··· 1075 1024 return 0; 1076 1025 } 1077 1026 #else 1027 + int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, 1028 + struct xe_svm_range *range, 1029 + const struct drm_gpusvm_ctx *ctx) 1030 + { 1031 + return -EOPNOTSUPP; 1032 + } 1033 + 1078 1034 int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) 1079 1035 { 1080 1036 return 0;

+138

drivers/gpu/drm/xe/xe_svm.h

··· 70 70 71 71 void xe_svm_range_debug(struct xe_svm_range *range, const char *operation); 72 72 73 + int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, 74 + struct xe_svm_range *range, 75 + const struct drm_gpusvm_ctx *ctx); 76 + 77 + struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr, 78 + struct xe_vma *vma, struct drm_gpusvm_ctx *ctx); 79 + 80 + int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range, 81 + struct drm_gpusvm_ctx *ctx); 82 + 83 + bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma, 84 + bool preferred_region_is_vram); 85 + 86 + void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range); 87 + 88 + bool xe_svm_range_validate(struct xe_vm *vm, 89 + struct xe_svm_range *range, 90 + u8 tile_mask, bool devmem_preferred); 91 + 92 + u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 addr, u64 end, struct xe_vma *vma); 93 + 73 94 /** 74 95 * xe_svm_range_has_dma_mapping() - SVM range has DMA mapping 75 96 * @range: SVM range ··· 101 80 { 102 81 lockdep_assert_held(&range->base.gpusvm->notifier_lock); 103 82 return range->base.flags.has_dma_mapping; 83 + } 84 + 85 + /** 86 + * to_xe_range - Convert a drm_gpusvm_range pointer to a xe_svm_range 87 + * @r: Pointer to the drm_gpusvm_range structure 88 + * 89 + * This function takes a pointer to a drm_gpusvm_range structure and 90 + * converts it to a pointer to the containing xe_svm_range structure. 91 + * 92 + * Return: Pointer to the xe_svm_range structure 93 + */ 94 + static inline struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r) 95 + { 96 + return container_of(r, struct xe_svm_range, base); 97 + } 98 + 99 + /** 100 + * xe_svm_range_start() - SVM range start address 101 + * @range: SVM range 102 + * 103 + * Return: start address of range. 104 + */ 105 + static inline unsigned long xe_svm_range_start(struct xe_svm_range *range) 106 + { 107 + return drm_gpusvm_range_start(&range->base); 108 + } 109 + 110 + /** 111 + * xe_svm_range_end() - SVM range end address 112 + * @range: SVM range 113 + * 114 + * Return: end address of range. 115 + */ 116 + static inline unsigned long xe_svm_range_end(struct xe_svm_range *range) 117 + { 118 + return drm_gpusvm_range_end(&range->base); 119 + } 120 + 121 + /** 122 + * xe_svm_range_size() - SVM range size 123 + * @range: SVM range 124 + * 125 + * Return: Size of range. 126 + */ 127 + static inline unsigned long xe_svm_range_size(struct xe_svm_range *range) 128 + { 129 + return drm_gpusvm_range_size(&range->base); 104 130 } 105 131 106 132 #define xe_svm_assert_in_notifier(vm__) \ ··· 165 97 #include <linux/interval_tree.h> 166 98 167 99 struct drm_pagemap_device_addr; 100 + struct drm_gpusvm_ctx; 101 + struct drm_gpusvm_range; 168 102 struct xe_bo; 169 103 struct xe_gt; 170 104 struct xe_vm; ··· 235 165 static inline 236 166 void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) 237 167 { 168 + } 169 + 170 + static inline 171 + int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, 172 + struct xe_svm_range *range, 173 + const struct drm_gpusvm_ctx *ctx) 174 + { 175 + return -EOPNOTSUPP; 176 + } 177 + 178 + static inline 179 + struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr, 180 + struct xe_vma *vma, struct drm_gpusvm_ctx *ctx) 181 + { 182 + return ERR_PTR(-EINVAL); 183 + } 184 + 185 + static inline 186 + int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range, 187 + struct drm_gpusvm_ctx *ctx) 188 + { 189 + return -EINVAL; 190 + } 191 + 192 + static inline struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r) 193 + { 194 + return NULL; 195 + } 196 + 197 + static inline unsigned long xe_svm_range_start(struct xe_svm_range *range) 198 + { 199 + return 0; 200 + } 201 + 202 + static inline unsigned long xe_svm_range_end(struct xe_svm_range *range) 203 + { 204 + return 0; 205 + } 206 + 207 + static inline unsigned long xe_svm_range_size(struct xe_svm_range *range) 208 + { 209 + return 0; 210 + } 211 + 212 + static inline 213 + bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma, 214 + u32 region) 215 + { 216 + return false; 217 + } 218 + 219 + static inline 220 + void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range) 221 + { 222 + } 223 + 224 + static inline 225 + bool xe_svm_range_validate(struct xe_vm *vm, 226 + struct xe_svm_range *range, 227 + u8 tile_mask, bool devmem_preferred) 228 + { 229 + return false; 230 + } 231 + 232 + static inline 233 + u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 addr, u64 end, struct xe_vma *vma) 234 + { 235 + return ULONG_MAX; 238 236 } 239 237 240 238 #define xe_svm_assert_in_notifier(...) do {} while (0)

+1 -5

drivers/gpu/drm/xe/xe_tile.c

··· 87 87 */ 88 88 static int xe_tile_alloc(struct xe_tile *tile) 89 89 { 90 - struct drm_device *drm = &tile_to_xe(tile)->drm; 91 - 92 - tile->mem.ggtt = drmm_kzalloc(drm, sizeof(*tile->mem.ggtt), 93 - GFP_KERNEL); 90 + tile->mem.ggtt = xe_ggtt_alloc(tile); 94 91 if (!tile->mem.ggtt) 95 92 return -ENOMEM; 96 - tile->mem.ggtt->tile = tile; 97 93 98 94 return 0; 99 95 }

+254

drivers/gpu/drm/xe/xe_tile_sriov_vf.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #include <drm/drm_managed.h> 7 + 8 + #include "regs/xe_gtt_defs.h" 9 + 10 + #include "xe_assert.h" 11 + #include "xe_ggtt.h" 12 + #include "xe_gt_sriov_vf.h" 13 + #include "xe_sriov.h" 14 + #include "xe_sriov_printk.h" 15 + #include "xe_tile_sriov_vf.h" 16 + #include "xe_wopcm.h" 17 + 18 + static int vf_init_ggtt_balloons(struct xe_tile *tile) 19 + { 20 + struct xe_ggtt *ggtt = tile->mem.ggtt; 21 + 22 + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); 23 + 24 + tile->sriov.vf.ggtt_balloon[0] = xe_ggtt_node_init(ggtt); 25 + if (IS_ERR(tile->sriov.vf.ggtt_balloon[0])) 26 + return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]); 27 + 28 + tile->sriov.vf.ggtt_balloon[1] = xe_ggtt_node_init(ggtt); 29 + if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) { 30 + xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]); 31 + return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]); 32 + } 33 + 34 + return 0; 35 + } 36 + 37 + /** 38 + * xe_tile_sriov_vf_balloon_ggtt_locked - Insert balloon nodes to limit used GGTT address range. 39 + * @tile: the &xe_tile struct instance 40 + * 41 + * Return: 0 on success or a negative error code on failure. 42 + */ 43 + int xe_tile_sriov_vf_balloon_ggtt_locked(struct xe_tile *tile) 44 + { 45 + u64 ggtt_base = xe_gt_sriov_vf_ggtt_base(tile->primary_gt); 46 + u64 ggtt_size = xe_gt_sriov_vf_ggtt(tile->primary_gt); 47 + struct xe_device *xe = tile_to_xe(tile); 48 + u64 wopcm = xe_wopcm_size(xe); 49 + u64 start, end; 50 + int err; 51 + 52 + xe_tile_assert(tile, IS_SRIOV_VF(xe)); 53 + xe_tile_assert(tile, ggtt_size); 54 + lockdep_assert_held(&tile->mem.ggtt->lock); 55 + 56 + /* 57 + * VF can only use part of the GGTT as allocated by the PF: 58 + * 59 + * WOPCM GUC_GGTT_TOP 60 + * |<------------ Total GGTT size ------------------>| 61 + * 62 + * VF GGTT base -->|<- size ->| 63 + * 64 + * +--------------------+----------+-----------------+ 65 + * |////////////////////| block |\\\\\\\\\\\\\\\\\| 66 + * +--------------------+----------+-----------------+ 67 + * 68 + * |<--- balloon[0] --->|<-- VF -->|<-- balloon[1] ->| 69 + */ 70 + 71 + if (ggtt_base < wopcm || ggtt_base > GUC_GGTT_TOP || 72 + ggtt_size > GUC_GGTT_TOP - ggtt_base) { 73 + xe_sriov_err(xe, "tile%u: Invalid GGTT configuration: %#llx-%#llx\n", 74 + tile->id, ggtt_base, ggtt_base + ggtt_size - 1); 75 + return -ERANGE; 76 + } 77 + 78 + start = wopcm; 79 + end = ggtt_base; 80 + if (end != start) { 81 + err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[0], 82 + start, end); 83 + if (err) 84 + return err; 85 + } 86 + 87 + start = ggtt_base + ggtt_size; 88 + end = GUC_GGTT_TOP; 89 + if (end != start) { 90 + err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[1], 91 + start, end); 92 + if (err) { 93 + xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]); 94 + return err; 95 + } 96 + } 97 + 98 + return 0; 99 + } 100 + 101 + static int vf_balloon_ggtt(struct xe_tile *tile) 102 + { 103 + struct xe_ggtt *ggtt = tile->mem.ggtt; 104 + int err; 105 + 106 + mutex_lock(&ggtt->lock); 107 + err = xe_tile_sriov_vf_balloon_ggtt_locked(tile); 108 + mutex_unlock(&ggtt->lock); 109 + 110 + return err; 111 + } 112 + 113 + /** 114 + * xe_tile_sriov_vf_deballoon_ggtt_locked - Remove balloon nodes. 115 + * @tile: the &xe_tile struct instance 116 + */ 117 + void xe_tile_sriov_vf_deballoon_ggtt_locked(struct xe_tile *tile) 118 + { 119 + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); 120 + 121 + xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[1]); 122 + xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]); 123 + } 124 + 125 + static void vf_deballoon_ggtt(struct xe_tile *tile) 126 + { 127 + mutex_lock(&tile->mem.ggtt->lock); 128 + xe_tile_sriov_vf_deballoon_ggtt_locked(tile); 129 + mutex_unlock(&tile->mem.ggtt->lock); 130 + } 131 + 132 + static void vf_fini_ggtt_balloons(struct xe_tile *tile) 133 + { 134 + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); 135 + 136 + xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[1]); 137 + xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]); 138 + } 139 + 140 + static void cleanup_ggtt(struct drm_device *drm, void *arg) 141 + { 142 + struct xe_tile *tile = arg; 143 + 144 + vf_deballoon_ggtt(tile); 145 + vf_fini_ggtt_balloons(tile); 146 + } 147 + 148 + /** 149 + * xe_tile_sriov_vf_prepare_ggtt - Prepare a VF's GGTT configuration. 150 + * @tile: the &xe_tile 151 + * 152 + * This function is for VF use only. 153 + * 154 + * Return: 0 on success or a negative error code on failure. 155 + */ 156 + int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile) 157 + { 158 + struct xe_device *xe = tile_to_xe(tile); 159 + int err; 160 + 161 + err = vf_init_ggtt_balloons(tile); 162 + if (err) 163 + return err; 164 + 165 + err = vf_balloon_ggtt(tile); 166 + if (err) { 167 + vf_fini_ggtt_balloons(tile); 168 + return err; 169 + } 170 + 171 + return drmm_add_action_or_reset(&xe->drm, cleanup_ggtt, tile); 172 + } 173 + 174 + /** 175 + * DOC: GGTT nodes shifting during VF post-migration recovery 176 + * 177 + * The first fixup applied to the VF KMD structures as part of post-migration 178 + * recovery is shifting nodes within &xe_ggtt instance. The nodes are moved 179 + * from range previously assigned to this VF, into newly provisioned area. 180 + * The changes include balloons, which are resized accordingly. 181 + * 182 + * The balloon nodes are there to eliminate unavailable ranges from use: one 183 + * reserves the GGTT area below the range for current VF, and another one 184 + * reserves area above. 185 + * 186 + * Below is a GGTT layout of example VF, with a certain address range assigned to 187 + * said VF, and inaccessible areas above and below: 188 + * 189 + * 0 4GiB 190 + * |<--------------------------- Total GGTT size ----------------------------->| 191 + * WOPCM GUC_TOP 192 + * |<-------------- Area mappable by xe_ggtt instance ---------------->| 193 + * 194 + * +---+---------------------------------+----------+----------------------+---+ 195 + * |\\\|/////////////////////////////////| VF mem |//////////////////////|\\\| 196 + * +---+---------------------------------+----------+----------------------+---+ 197 + * 198 + * Hardware enforced access rules before migration: 199 + * 200 + * |<------- inaccessible for VF ------->|<VF owned>|<-- inaccessible for VF ->| 201 + * 202 + * GGTT nodes used for tracking allocations: 203 + * 204 + * |<---------- balloon ------------>|<- nodes->|<----- balloon ------>| 205 + * 206 + * After the migration, GGTT area assigned to the VF might have shifted, either 207 + * to lower or to higher address. But we expect the total size and extra areas to 208 + * be identical, as migration can only happen between matching platforms. 209 + * Below is an example of GGTT layout of the VF after migration. Content of the 210 + * GGTT for VF has been moved to a new area, and we receive its address from GuC: 211 + * 212 + * +---+----------------------+----------+---------------------------------+---+ 213 + * |\\\|//////////////////////| VF mem |/////////////////////////////////|\\\| 214 + * +---+----------------------+----------+---------------------------------+---+ 215 + * 216 + * Hardware enforced access rules after migration: 217 + * 218 + * |<- inaccessible for VF -->|<VF owned>|<------- inaccessible for VF ------->| 219 + * 220 + * So the VF has a new slice of GGTT assigned, and during migration process, the 221 + * memory content was copied to that new area. But the &xe_ggtt nodes are still 222 + * tracking allocations using the old addresses. The nodes within VF owned area 223 + * have to be shifted, and balloon nodes need to be resized to properly mask out 224 + * areas not owned by the VF. 225 + * 226 + * Fixed &xe_ggtt nodes used for tracking allocations: 227 + * 228 + * |<------ balloon ------>|<- nodes->|<----------- balloon ----------->| 229 + * 230 + * Due to use of GPU profiles, we do not expect the old and new GGTT ares to 231 + * overlap; but our node shifting will fix addresses properly regardless. 232 + */ 233 + 234 + /** 235 + * xe_tile_sriov_vf_fixup_ggtt_nodes - Shift GGTT allocations to match assigned range. 236 + * @tile: the &xe_tile struct instance 237 + * @shift: the shift value 238 + * 239 + * Since Global GTT is not virtualized, each VF has an assigned range 240 + * within the global space. This range might have changed during migration, 241 + * which requires all memory addresses pointing to GGTT to be shifted. 242 + */ 243 + void xe_tile_sriov_vf_fixup_ggtt_nodes(struct xe_tile *tile, s64 shift) 244 + { 245 + struct xe_ggtt *ggtt = tile->mem.ggtt; 246 + 247 + mutex_lock(&ggtt->lock); 248 + 249 + xe_tile_sriov_vf_deballoon_ggtt_locked(tile); 250 + xe_ggtt_shift_nodes_locked(ggtt, shift); 251 + xe_tile_sriov_vf_balloon_ggtt_locked(tile); 252 + 253 + mutex_unlock(&ggtt->lock); 254 + }

+18

drivers/gpu/drm/xe/xe_tile_sriov_vf.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_TILE_SRIOV_VF_H_ 7 + #define _XE_TILE_SRIOV_VF_H_ 8 + 9 + #include <linux/types.h> 10 + 11 + struct xe_tile; 12 + 13 + int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile); 14 + int xe_tile_sriov_vf_balloon_ggtt_locked(struct xe_tile *tile); 15 + void xe_tile_sriov_vf_deballoon_ggtt_locked(struct xe_tile *tile); 16 + void xe_tile_sriov_vf_fixup_ggtt_nodes(struct xe_tile *tile, s64 shift); 17 + 18 + #endif

+5

drivers/gpu/drm/xe/xe_tuning.c

··· 98 98 ENGINE_CLASS(RENDER)), 99 99 XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE)) 100 100 }, 101 + { XE_RTP_NAME("Tuning: Disable NULL query for Anyhit Shader"), 102 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, XE_RTP_END_VERSION_UNDEFINED), 103 + FUNC(xe_rtp_match_first_render_or_compute)), 104 + XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY)) 105 + }, 101 106 }; 102 107 103 108 static const struct xe_rtp_entry_sr lrc_tunings[] = {

+39 -17

drivers/gpu/drm/xe/xe_uc_fw.c

··· 16 16 #include "xe_gsc.h" 17 17 #include "xe_gt.h" 18 18 #include "xe_gt_printk.h" 19 + #include "xe_gt_sriov_vf.h" 19 20 #include "xe_guc.h" 20 21 #include "xe_map.h" 21 22 #include "xe_mmio.h" ··· 663 662 ver_->major, ver_->minor, ver_->patch); \ 664 663 } while (0) 665 664 665 + static void uc_fw_vf_override(struct xe_uc_fw *uc_fw) 666 + { 667 + struct xe_uc_fw_version *compat = &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY]; 668 + struct xe_uc_fw_version *wanted = &uc_fw->versions.wanted; 669 + 670 + /* Only GuC/HuC are supported */ 671 + if (uc_fw->type != XE_UC_FW_TYPE_GUC && uc_fw->type != XE_UC_FW_TYPE_HUC) 672 + uc_fw->path = NULL; 673 + 674 + /* VF will support only firmwares that driver can autoselect */ 675 + xe_uc_fw_change_status(uc_fw, uc_fw->path ? 676 + XE_UC_FIRMWARE_PRELOADED : 677 + XE_UC_FIRMWARE_NOT_SUPPORTED); 678 + 679 + if (!xe_uc_fw_is_supported(uc_fw)) 680 + return; 681 + 682 + /* PF is doing the loading, so we don't need a path on the VF */ 683 + uc_fw->path = "Loaded by PF"; 684 + 685 + /* The GuC versions are set up during the VF bootstrap */ 686 + if (uc_fw->type == XE_UC_FW_TYPE_GUC) { 687 + uc_fw->versions.wanted_type = XE_UC_FW_VER_COMPATIBILITY; 688 + xe_gt_sriov_vf_guc_versions(uc_fw_to_gt(uc_fw), wanted, compat); 689 + } 690 + } 691 + 666 692 static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmware_p) 667 693 { 668 694 struct xe_device *xe = uc_fw_to_xe(uc_fw); 695 + struct xe_gt *gt = uc_fw_to_gt(uc_fw); 696 + struct drm_printer p = xe_gt_info_printer(gt); 669 697 struct device *dev = xe->drm.dev; 670 - struct drm_printer p = drm_info_printer(dev); 671 698 const struct firmware *fw = NULL; 672 699 int err; 673 700 ··· 704 675 * before we're looked at the HW caps to see if we have uc support 705 676 */ 706 677 BUILD_BUG_ON(XE_UC_FIRMWARE_UNINITIALIZED); 707 - xe_assert(xe, !uc_fw->status); 708 - xe_assert(xe, !uc_fw->path); 678 + xe_gt_assert(gt, !uc_fw->status); 679 + xe_gt_assert(gt, !uc_fw->path); 709 680 710 681 uc_fw_auto_select(xe, uc_fw); 711 682 712 683 if (IS_SRIOV_VF(xe)) { 713 - /* Only GuC/HuC are supported */ 714 - if (uc_fw->type != XE_UC_FW_TYPE_GUC && 715 - uc_fw->type != XE_UC_FW_TYPE_HUC) 716 - uc_fw->path = NULL; 717 - /* VF will support only firmwares that driver can autoselect */ 718 - xe_uc_fw_change_status(uc_fw, uc_fw->path ? 719 - XE_UC_FIRMWARE_PRELOADED : 720 - XE_UC_FIRMWARE_NOT_SUPPORTED); 684 + uc_fw_vf_override(uc_fw); 721 685 return 0; 722 686 } 723 687 ··· 722 700 723 701 if (!xe_uc_fw_is_supported(uc_fw)) { 724 702 if (uc_fw->type == XE_UC_FW_TYPE_GUC) { 725 - drm_err(&xe->drm, "No GuC firmware defined for platform\n"); 703 + xe_gt_err(gt, "No GuC firmware defined for platform\n"); 726 704 return -ENOENT; 727 705 } 728 706 return 0; ··· 731 709 /* an empty path means the firmware is disabled */ 732 710 if (!xe_device_uc_enabled(xe) || !(*uc_fw->path)) { 733 711 xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED); 734 - drm_dbg(&xe->drm, "%s disabled", xe_uc_fw_type_repr(uc_fw->type)); 712 + xe_gt_dbg(gt, "%s disabled\n", xe_uc_fw_type_repr(uc_fw->type)); 735 713 return 0; 736 714 } 737 715 ··· 764 742 XE_UC_FIRMWARE_MISSING : 765 743 XE_UC_FIRMWARE_ERROR); 766 744 767 - drm_notice(&xe->drm, "%s firmware %s: fetch failed with error %d\n", 768 - xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); 769 - drm_info(&xe->drm, "%s firmware(s) can be downloaded from %s\n", 770 - xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL); 745 + xe_gt_notice(gt, "%s firmware %s: fetch failed with error %pe\n", 746 + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, ERR_PTR(err)); 747 + xe_gt_info(gt, "%s firmware(s) can be downloaded from %s\n", 748 + xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL); 771 749 772 750 release_firmware(fw); /* OK even if fw is NULL */ 773 751

+2

drivers/gpu/drm/xe/xe_uc_fw_types.h

··· 65 65 * struct xe_uc_fw_version - Version for XE micro controller firmware 66 66 */ 67 67 struct xe_uc_fw_version { 68 + /** @branch: branch version of the FW (not always available) */ 69 + u16 branch; 68 70 /** @major: major version of the FW */ 69 71 u16 major; 70 72 /** @minor: minor version of the FW */

+325 -56

drivers/gpu/drm/xe/xe_vm.c

··· 732 732 DMA_RESV_USAGE_BOOKKEEP, 733 733 false, MAX_SCHEDULE_TIMEOUT); 734 734 735 + down_read(&vm->userptr.notifier_lock); 735 736 err = xe_vm_invalidate_vma(&uvma->vma); 737 + up_read(&vm->userptr.notifier_lock); 736 738 xe_vm_unlock(vm); 737 739 if (err) 738 740 break; ··· 800 798 } 801 799 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 802 800 801 + static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op) 802 + { 803 + struct xe_vma *vma; 804 + 805 + vma = gpuva_to_vma(op->base.prefetch.va); 806 + 807 + if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) 808 + xa_destroy(&op->prefetch_range.range); 809 + } 810 + 811 + static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops) 812 + { 813 + struct xe_vma_op *op; 814 + 815 + if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 816 + return; 817 + 818 + list_for_each_entry(op, &vops->list, link) 819 + xe_vma_svm_prefetch_op_fini(op); 820 + } 821 + 803 822 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 804 823 { 805 824 int i; 825 + 826 + xe_vma_svm_prefetch_ops_fini(vops); 806 827 807 828 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 808 829 kfree(vops->pt_update_ops[i].ops); 809 830 } 810 831 811 - static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask) 832 + static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val) 812 833 { 813 834 int i; 814 835 836 + if (!inc_val) 837 + return; 838 + 815 839 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) 816 840 if (BIT(i) & tile_mask) 817 - ++vops->pt_update_ops[i].num_ops; 841 + vops->pt_update_ops[i].num_ops += inc_val; 818 842 } 819 843 820 844 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, ··· 870 842 871 843 xe_vm_populate_rebind(op, vma, tile_mask); 872 844 list_add_tail(&op->link, &vops->list); 873 - xe_vma_ops_incr_pt_update_ops(vops, tile_mask); 845 + xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 874 846 875 847 return 0; 876 848 } ··· 1005 977 1006 978 xe_vm_populate_range_rebind(op, vma, range, tile_mask); 1007 979 list_add_tail(&op->link, &vops->list); 1008 - xe_vma_ops_incr_pt_update_ops(vops, tile_mask); 980 + xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); 1009 981 1010 982 return 0; 1011 983 } ··· 1090 1062 1091 1063 xe_vm_populate_range_unbind(op, range); 1092 1064 list_add_tail(&op->link, &vops->list); 1093 - xe_vma_ops_incr_pt_update_ops(vops, range->tile_present); 1065 + xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1); 1094 1066 1095 1067 return 0; 1096 1068 } ··· 2169 2141 return err; 2170 2142 } 2171 2143 2144 + static bool vma_matches(struct xe_vma *vma, u64 page_addr) 2145 + { 2146 + if (page_addr > xe_vma_end(vma) - 1 || 2147 + page_addr + SZ_4K - 1 < xe_vma_start(vma)) 2148 + return false; 2149 + 2150 + return true; 2151 + } 2152 + 2153 + /** 2154 + * xe_vm_find_vma_by_addr() - Find a VMA by its address 2155 + * 2156 + * @vm: the xe_vm the vma belongs to 2157 + * @page_addr: address to look up 2158 + */ 2159 + struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr) 2160 + { 2161 + struct xe_vma *vma = NULL; 2162 + 2163 + if (vm->usm.last_fault_vma) { /* Fast lookup */ 2164 + if (vma_matches(vm->usm.last_fault_vma, page_addr)) 2165 + vma = vm->usm.last_fault_vma; 2166 + } 2167 + if (!vma) 2168 + vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); 2169 + 2170 + return vma; 2171 + } 2172 + 2172 2173 static const u32 region_to_mem_type[] = { 2173 2174 XE_PL_TT, 2174 2175 XE_PL_VRAM0, ··· 2278 2221 return true; 2279 2222 } 2280 2223 2224 + static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops) 2225 + { 2226 + struct drm_gpuva_op *__op; 2227 + 2228 + drm_gpuva_for_each_op(__op, ops) { 2229 + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 2230 + 2231 + xe_vma_svm_prefetch_op_fini(op); 2232 + } 2233 + } 2234 + 2281 2235 /* 2282 2236 * Create operations list from IOCTL arguments, setup operations fields so parse 2283 2237 * and commit steps are decoupled from IOCTL arguments. This step can fail. 2284 2238 */ 2285 2239 static struct drm_gpuva_ops * 2286 - vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, 2287 - u64 bo_offset_or_userptr, u64 addr, u64 range, 2240 + vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, 2241 + struct xe_bo *bo, u64 bo_offset_or_userptr, 2242 + u64 addr, u64 range, 2288 2243 u32 operation, u32 flags, 2289 2244 u32 prefetch_region, u16 pat_index) 2290 2245 { ··· 2304 2235 struct drm_gpuva_ops *ops; 2305 2236 struct drm_gpuva_op *__op; 2306 2237 struct drm_gpuvm_bo *vm_bo; 2238 + u64 range_end = addr + range; 2307 2239 int err; 2308 2240 2309 2241 lockdep_assert_held_write(&vm->lock); ··· 2366 2296 op->map.invalidate_on_bind = 2367 2297 __xe_vm_needs_clear_scratch_pages(vm, flags); 2368 2298 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { 2369 - op->prefetch.region = prefetch_region; 2370 - } 2299 + struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2300 + struct xe_svm_range *svm_range; 2301 + struct drm_gpusvm_ctx ctx = {}; 2302 + struct xe_tile *tile; 2303 + u8 id, tile_mask = 0; 2304 + u32 i; 2371 2305 2306 + if (!xe_vma_is_cpu_addr_mirror(vma)) { 2307 + op->prefetch.region = prefetch_region; 2308 + break; 2309 + } 2310 + 2311 + ctx.read_only = xe_vma_read_only(vma); 2312 + ctx.devmem_possible = IS_DGFX(vm->xe) && 2313 + IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR); 2314 + 2315 + for_each_tile(tile, vm->xe, id) 2316 + tile_mask |= 0x1 << id; 2317 + 2318 + xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); 2319 + op->prefetch_range.region = prefetch_region; 2320 + op->prefetch_range.ranges_count = 0; 2321 + alloc_next_range: 2322 + svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); 2323 + 2324 + if (PTR_ERR(svm_range) == -ENOENT) { 2325 + u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma); 2326 + 2327 + addr = ret == ULONG_MAX ? 0 : ret; 2328 + if (addr) 2329 + goto alloc_next_range; 2330 + else 2331 + goto print_op_label; 2332 + } 2333 + 2334 + if (IS_ERR(svm_range)) { 2335 + err = PTR_ERR(svm_range); 2336 + goto unwind_prefetch_ops; 2337 + } 2338 + 2339 + if (xe_svm_range_validate(vm, svm_range, tile_mask, !!prefetch_region)) { 2340 + xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); 2341 + goto check_next_range; 2342 + } 2343 + 2344 + err = xa_alloc(&op->prefetch_range.range, 2345 + &i, svm_range, xa_limit_32b, 2346 + GFP_KERNEL); 2347 + 2348 + if (err) 2349 + goto unwind_prefetch_ops; 2350 + 2351 + op->prefetch_range.ranges_count++; 2352 + vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH; 2353 + xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED"); 2354 + check_next_range: 2355 + if (range_end > xe_svm_range_end(svm_range) && 2356 + xe_svm_range_end(svm_range) < xe_vma_end(vma)) { 2357 + addr = xe_svm_range_end(svm_range); 2358 + goto alloc_next_range; 2359 + } 2360 + } 2361 + print_op_label: 2372 2362 print_op(vm->xe, __op); 2373 2363 } 2374 2364 2375 2365 return ops; 2366 + 2367 + unwind_prefetch_ops: 2368 + xe_svm_prefetch_gpuva_ops_fini(ops); 2369 + drm_gpuva_ops_free(&vm->gpuvm, ops); 2370 + return ERR_PTR(err); 2376 2371 } 2372 + 2377 2373 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 2378 2374 2379 2375 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, ··· 2634 2498 !op->map.is_cpu_addr_mirror) || 2635 2499 op->map.invalidate_on_bind) 2636 2500 xe_vma_ops_incr_pt_update_ops(vops, 2637 - op->tile_mask); 2501 + op->tile_mask, 1); 2638 2502 break; 2639 2503 } 2640 2504 case DRM_GPUVA_OP_REMAP: ··· 2643 2507 gpuva_to_vma(op->base.remap.unmap->va); 2644 2508 bool skip = xe_vma_is_cpu_addr_mirror(old); 2645 2509 u64 start = xe_vma_start(old), end = xe_vma_end(old); 2510 + int num_remap_ops = 0; 2646 2511 2647 2512 if (op->base.remap.prev) 2648 2513 start = op->base.remap.prev->va.addr + ··· 2696 2559 (ULL)op->remap.start, 2697 2560 (ULL)op->remap.range); 2698 2561 } else { 2699 - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2562 + num_remap_ops++; 2700 2563 } 2701 2564 } 2702 2565 ··· 2725 2588 (ULL)op->remap.start, 2726 2589 (ULL)op->remap.range); 2727 2590 } else { 2728 - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2591 + num_remap_ops++; 2729 2592 } 2730 2593 } 2731 2594 if (!skip) 2732 - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2595 + num_remap_ops++; 2596 + 2597 + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops); 2733 2598 break; 2734 2599 } 2735 2600 case DRM_GPUVA_OP_UNMAP: ··· 2743 2604 return -EBUSY; 2744 2605 2745 2606 if (!xe_vma_is_cpu_addr_mirror(vma)) 2746 - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2607 + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2747 2608 break; 2748 2609 case DRM_GPUVA_OP_PREFETCH: 2749 2610 vma = gpuva_to_vma(op->base.prefetch.va); ··· 2754 2615 return err; 2755 2616 } 2756 2617 2757 - if (!xe_vma_is_cpu_addr_mirror(vma)) 2758 - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); 2618 + if (xe_vma_is_cpu_addr_mirror(vma)) 2619 + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 2620 + op->prefetch_range.ranges_count); 2621 + else 2622 + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); 2623 + 2759 2624 break; 2760 2625 default: 2761 2626 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); ··· 2885 2742 return 0; 2886 2743 } 2887 2744 2745 + static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) 2746 + { 2747 + bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR); 2748 + struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2749 + int err = 0; 2750 + 2751 + struct xe_svm_range *svm_range; 2752 + struct drm_gpusvm_ctx ctx = {}; 2753 + struct xe_tile *tile; 2754 + unsigned long i; 2755 + u32 region; 2756 + 2757 + if (!xe_vma_is_cpu_addr_mirror(vma)) 2758 + return 0; 2759 + 2760 + region = op->prefetch_range.region; 2761 + 2762 + ctx.read_only = xe_vma_read_only(vma); 2763 + ctx.devmem_possible = devmem_possible; 2764 + ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; 2765 + 2766 + /* TODO: Threading the migration */ 2767 + xa_for_each(&op->prefetch_range.range, i, svm_range) { 2768 + if (!region) 2769 + xe_svm_range_migrate_to_smem(vm, svm_range); 2770 + 2771 + if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) { 2772 + tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0]; 2773 + err = xe_svm_alloc_vram(vm, tile, svm_range, &ctx); 2774 + if (err) { 2775 + drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", 2776 + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2777 + return -ENODATA; 2778 + } 2779 + xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM"); 2780 + } 2781 + 2782 + err = xe_svm_range_get_pages(vm, svm_range, &ctx); 2783 + if (err) { 2784 + drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n", 2785 + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 2786 + if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) 2787 + err = -ENODATA; 2788 + return err; 2789 + } 2790 + xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE"); 2791 + } 2792 + 2793 + return err; 2794 + } 2795 + 2888 2796 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, 2889 2797 struct xe_vma_op *op) 2890 2798 { ··· 2973 2779 case DRM_GPUVA_OP_PREFETCH: 2974 2780 { 2975 2781 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2976 - u32 region = op->prefetch.region; 2782 + u32 region; 2783 + 2784 + if (xe_vma_is_cpu_addr_mirror(vma)) 2785 + region = op->prefetch_range.region; 2786 + else 2787 + region = op->prefetch.region; 2977 2788 2978 2789 xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); 2979 2790 ··· 2995 2796 } 2996 2797 2997 2798 return err; 2799 + } 2800 + 2801 + static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops) 2802 + { 2803 + struct xe_vma_op *op; 2804 + int err; 2805 + 2806 + if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) 2807 + return 0; 2808 + 2809 + list_for_each_entry(op, &vops->list, link) { 2810 + if (op->base.op == DRM_GPUVA_OP_PREFETCH) { 2811 + err = prefetch_ranges(vm, op); 2812 + if (err) 2813 + return err; 2814 + } 2815 + } 2816 + 2817 + return 0; 2998 2818 } 2999 2819 3000 2820 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, ··· 3457 3239 vops->q = q; 3458 3240 vops->syncs = syncs; 3459 3241 vops->num_syncs = num_syncs; 3242 + vops->flags = 0; 3460 3243 } 3461 3244 3462 3245 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, ··· 3665 3446 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; 3666 3447 u16 pat_index = bind_ops[i].pat_index; 3667 3448 3668 - ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, 3449 + ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset, 3669 3450 addr, range, op, flags, 3670 3451 prefetch_region, pat_index); 3671 3452 if (IS_ERR(ops[i])) { ··· 3695 3476 } 3696 3477 3697 3478 err = xe_vma_ops_alloc(&vops, args->num_binds > 1); 3479 + if (err) 3480 + goto unwind_ops; 3481 + 3482 + err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops); 3698 3483 if (err) 3699 3484 goto unwind_ops; 3700 3485 ··· 3771 3548 3772 3549 xe_vma_ops_init(&vops, vm, q, NULL, 0); 3773 3550 3774 - ops = vm_bind_ioctl_ops_create(vm, bo, 0, addr, bo->size, 3551 + ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, bo->size, 3775 3552 DRM_XE_VM_BIND_OP_MAP, 0, 0, 3776 3553 vm->xe->pat.idx[cache_lvl]); 3777 3554 if (IS_ERR(ops)) { ··· 3843 3620 } 3844 3621 3845 3622 /** 3623 + * xe_vm_range_tilemask_tlb_invalidation - Issue a TLB invalidation on this tilemask for an 3624 + * address range 3625 + * @vm: The VM 3626 + * @start: start address 3627 + * @end: end address 3628 + * @tile_mask: mask for which gt's issue tlb invalidation 3629 + * 3630 + * Issue a range based TLB invalidation for gt's in tilemask 3631 + * 3632 + * Returns 0 for success, negative error code otherwise. 3633 + */ 3634 + int xe_vm_range_tilemask_tlb_invalidation(struct xe_vm *vm, u64 start, 3635 + u64 end, u8 tile_mask) 3636 + { 3637 + struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 3638 + struct xe_tile *tile; 3639 + u32 fence_id = 0; 3640 + u8 id; 3641 + int err; 3642 + 3643 + if (!tile_mask) 3644 + return 0; 3645 + 3646 + for_each_tile(tile, vm->xe, id) { 3647 + if (tile_mask & BIT(id)) { 3648 + xe_gt_tlb_invalidation_fence_init(tile->primary_gt, 3649 + &fence[fence_id], true); 3650 + 3651 + err = xe_gt_tlb_invalidation_range(tile->primary_gt, 3652 + &fence[fence_id], 3653 + start, 3654 + end, 3655 + vm->usm.asid); 3656 + if (err) 3657 + goto wait; 3658 + ++fence_id; 3659 + 3660 + if (!tile->media_gt) 3661 + continue; 3662 + 3663 + xe_gt_tlb_invalidation_fence_init(tile->media_gt, 3664 + &fence[fence_id], true); 3665 + 3666 + err = xe_gt_tlb_invalidation_range(tile->media_gt, 3667 + &fence[fence_id], 3668 + start, 3669 + end, 3670 + vm->usm.asid); 3671 + if (err) 3672 + goto wait; 3673 + ++fence_id; 3674 + } 3675 + } 3676 + 3677 + wait: 3678 + for (id = 0; id < fence_id; ++id) 3679 + xe_gt_tlb_invalidation_fence_wait(&fence[id]); 3680 + 3681 + return err; 3682 + } 3683 + 3684 + /** 3846 3685 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 3847 3686 * @vma: VMA to invalidate 3848 3687 * ··· 3917 3632 int xe_vm_invalidate_vma(struct xe_vma *vma) 3918 3633 { 3919 3634 struct xe_device *xe = xe_vma_vm(vma)->xe; 3635 + struct xe_vm *vm = xe_vma_vm(vma); 3920 3636 struct xe_tile *tile; 3921 - struct xe_gt_tlb_invalidation_fence 3922 - fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 3923 - u8 id; 3924 - u32 fence_id = 0; 3637 + u8 tile_mask = 0; 3925 3638 int ret = 0; 3639 + u8 id; 3926 3640 3927 3641 xe_assert(xe, !xe_vma_is_null(vma)); 3928 3642 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma)); 3929 3643 trace_xe_vma_invalidate(vma); 3930 3644 3931 - vm_dbg(&xe_vma_vm(vma)->xe->drm, 3645 + vm_dbg(&vm->xe->drm, 3932 3646 "INVALIDATE: addr=0x%016llx, range=0x%016llx", 3933 3647 xe_vma_start(vma), xe_vma_size(vma)); 3934 3648 3935 - /* Check that we don't race with page-table updates */ 3649 + /* 3650 + * Check that we don't race with page-table updates, tile_invalidated 3651 + * update is safe 3652 + */ 3936 3653 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 3937 3654 if (xe_vma_is_userptr(vma)) { 3655 + lockdep_assert(lockdep_is_held_type(&vm->userptr.notifier_lock, 0) || 3656 + (lockdep_is_held_type(&vm->userptr.notifier_lock, 1) && 3657 + lockdep_is_held(&xe_vm_resv(vm)->lock.base))); 3658 + 3938 3659 WARN_ON_ONCE(!mmu_interval_check_retry 3939 3660 (&to_userptr_vma(vma)->userptr.notifier, 3940 3661 to_userptr_vma(vma)->userptr.notifier_seq)); 3941 - WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)), 3662 + WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), 3942 3663 DMA_RESV_USAGE_BOOKKEEP)); 3943 3664 3944 3665 } else { ··· 3952 3661 } 3953 3662 } 3954 3663 3955 - for_each_tile(tile, xe, id) { 3956 - if (xe_pt_zap_ptes(tile, vma)) { 3957 - xe_device_wmb(xe); 3958 - xe_gt_tlb_invalidation_fence_init(tile->primary_gt, 3959 - &fence[fence_id], 3960 - true); 3664 + for_each_tile(tile, xe, id) 3665 + if (xe_pt_zap_ptes(tile, vma)) 3666 + tile_mask |= BIT(id); 3961 3667 3962 - ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, 3963 - &fence[fence_id], vma); 3964 - if (ret) 3965 - goto wait; 3966 - ++fence_id; 3668 + xe_device_wmb(xe); 3967 3669 3968 - if (!tile->media_gt) 3969 - continue; 3670 + ret = xe_vm_range_tilemask_tlb_invalidation(xe_vma_vm(vma), xe_vma_start(vma), 3671 + xe_vma_end(vma), tile_mask); 3970 3672 3971 - xe_gt_tlb_invalidation_fence_init(tile->media_gt, 3972 - &fence[fence_id], 3973 - true); 3974 - 3975 - ret = xe_gt_tlb_invalidation_vma(tile->media_gt, 3976 - &fence[fence_id], vma); 3977 - if (ret) 3978 - goto wait; 3979 - ++fence_id; 3980 - } 3981 - } 3982 - 3983 - wait: 3984 - for (id = 0; id < fence_id; ++id) 3985 - xe_gt_tlb_invalidation_fence_wait(&fence[id]); 3986 - 3987 - vma->tile_invalidated = vma->tile_mask; 3673 + /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ 3674 + WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); 3988 3675 3989 3676 return ret; 3990 3677 }

+24

drivers/gpu/drm/xe/xe_vm.h

··· 169 169 !xe_vma_is_cpu_addr_mirror(vma); 170 170 } 171 171 172 + struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr); 173 + 172 174 /** 173 175 * to_userptr_vma() - Return a pointer to an embedding userptr vma 174 176 * @vma: Pointer to the embedded struct xe_vma ··· 227 225 u8 tile_mask); 228 226 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, 229 227 struct xe_svm_range *range); 228 + 229 + int xe_vm_range_tilemask_tlb_invalidation(struct xe_vm *vm, u64 start, 230 + u64 end, u8 tile_mask); 230 231 231 232 int xe_vm_invalidate_vma(struct xe_vma *vma); 232 233 ··· 374 369 } 375 370 return false; 376 371 } 372 + 373 + /** 374 + * xe_vm_has_valid_gpu_mapping() - Advisory helper to check if VMA or SVM range has 375 + * a valid GPU mapping 376 + * @tile: The tile which the GPU mapping belongs to 377 + * @tile_present: Tile present mask 378 + * @tile_invalidated: Tile invalidated mask 379 + * 380 + * The READ_ONCEs pair with WRITE_ONCEs in either the TLB invalidation paths 381 + * (xe_vm.c, xe_svm.c) or the binding paths (xe_pt.c). These are not reliable 382 + * without the notifier lock in userptr or SVM cases, and not reliable without 383 + * the BO dma-resv lock in the BO case. As such, they should only be used in 384 + * opportunistic cases (e.g., skipping a page fault fix or not skipping a TLB 385 + * invalidation) where it is harmless. 386 + * 387 + * Return: True is there are valid GPU pages, False otherwise 388 + */ 389 + #define xe_vm_has_valid_gpu_mapping(tile, tile_present, tile_invalidated) \ 390 + ((READ_ONCE(tile_present) & ~READ_ONCE(tile_invalidated)) & BIT((tile)->id)) 377 391 378 392 #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) 379 393 void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma);

+24 -2

drivers/gpu/drm/xe/xe_vm_types.h

··· 100 100 struct work_struct destroy_work; 101 101 }; 102 102 103 - /** @tile_invalidated: VMA has been invalidated */ 103 + /** 104 + * @tile_invalidated: Tile mask of binding are invalidated for this VMA. 105 + * protected by BO's resv and for userptrs, vm->userptr.notifier_lock in 106 + * write mode for writing or vm->userptr.notifier_lock in read mode and 107 + * the vm->resv. For stable reading, BO's resv or userptr 108 + * vm->userptr.notifier_lock in read mode is required. Can be 109 + * opportunistically read with READ_ONCE outside of locks. 110 + */ 104 111 u8 tile_invalidated; 105 112 106 113 /** @tile_mask: Tile mask of where to create binding for this VMA */ 107 114 u8 tile_mask; 108 115 109 116 /** 110 - * @tile_present: GT mask of binding are present for this VMA. 117 + * @tile_present: Tile mask of binding are present for this VMA. 111 118 * protected by vm->lock, vm->resv and for userptrs, 112 119 * vm->userptr.notifier_lock for writing. Needs either for reading, 113 120 * but if reading is done under the vm->lock only, it needs to be held ··· 389 382 struct xe_svm_range *range; 390 383 }; 391 384 385 + /** struct xe_vma_op_prefetch_range - VMA prefetch range operation */ 386 + struct xe_vma_op_prefetch_range { 387 + /** @range: xarray for SVM ranges data */ 388 + struct xarray range; 389 + /** @ranges_count: number of svm ranges to map */ 390 + u32 ranges_count; 391 + /** @region: memory region to prefetch to */ 392 + u32 region; 393 + }; 394 + 392 395 /** enum xe_vma_op_flags - flags for VMA operation */ 393 396 enum xe_vma_op_flags { 394 397 /** @XE_VMA_OP_COMMITTED: VMA operation committed */ ··· 441 424 struct xe_vma_op_map_range map_range; 442 425 /** @unmap_range: VMA unmap range operation specific data */ 443 426 struct xe_vma_op_unmap_range unmap_range; 427 + /** @prefetch_range: VMA prefetch range operation specific data */ 428 + struct xe_vma_op_prefetch_range prefetch_range; 444 429 }; 445 430 }; 446 431 ··· 460 441 u32 num_syncs; 461 442 /** @pt_update_ops: page table update operations */ 462 443 struct xe_vm_pgtable_update_ops pt_update_ops[XE_MAX_TILES_PER_DEVICE]; 444 + /** @flag: signify the properties within xe_vma_ops*/ 445 + #define XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH BIT(0) 446 + u32 flags; 463 447 #ifdef TEST_VM_OPS_ERROR 464 448 /** @inject_error: inject error to test error handling */ 465 449 bool inject_error;

+2 -2

drivers/gpu/drm/xe/xe_vsec.c

··· 149 149 return 0; 150 150 } 151 151 152 - static int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset, 153 - u32 count) 152 + int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset, 153 + u32 count) 154 154 { 155 155 struct xe_device *xe = pdev_to_xe_device(pdev); 156 156 void __iomem *telem_addr = xe->mmio.regs + BMG_TELEMETRY_OFFSET;

+4

drivers/gpu/drm/xe/xe_vsec.h

··· 4 4 #ifndef _XE_VSEC_H_ 5 5 #define _XE_VSEC_H_ 6 6 7 + #include <linux/types.h> 8 + 9 + struct pci_dev; 7 10 struct xe_device; 8 11 9 12 void xe_vsec_init(struct xe_device *xe); 13 + int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset, u32 count); 10 14 11 15 #endif

+27 -19

drivers/gpu/drm/xe/xe_wa.c

··· 503 503 XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), 504 504 XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL)) 505 505 }, 506 - { XE_RTP_NAME("16018737384"), 507 - XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), 508 - XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS)) 509 - }, 510 506 /* 511 507 * These two workarounds are the same, just applying to different 512 508 * engines. Although Wa_18032095049 (for the RCS) isn't required on ··· 529 533 /* Xe2_HPG */ 530 534 531 535 { XE_RTP_NAME("16018712365"), 532 - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), 536 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), 537 + FUNC(xe_rtp_match_first_render_or_compute)), 533 538 XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS)) 534 539 }, 535 540 { XE_RTP_NAME("16018737384"), 536 - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), 541 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED), 542 + FUNC(xe_rtp_match_first_render_or_compute)), 537 543 XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS)) 538 544 }, 539 545 { XE_RTP_NAME("14019988906"), 540 - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), 546 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), 547 + FUNC(xe_rtp_match_first_render_or_compute)), 541 548 XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) 542 549 }, 543 550 { XE_RTP_NAME("14019877138"), 544 - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), 551 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), 552 + FUNC(xe_rtp_match_first_render_or_compute)), 545 553 XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) 546 554 }, 547 555 { XE_RTP_NAME("14020338487"), 548 - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), 556 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), 557 + FUNC(xe_rtp_match_first_render_or_compute)), 549 558 XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS)) 550 559 }, 551 560 { XE_RTP_NAME("18032247524"), 552 - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), 561 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), 562 + FUNC(xe_rtp_match_first_render_or_compute)), 553 563 XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE)) 554 564 }, 555 565 { XE_RTP_NAME("14018471104"), 556 - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), 566 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), 567 + FUNC(xe_rtp_match_first_render_or_compute)), 557 568 XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL)) 558 569 }, 559 570 /* ··· 569 566 * apply this to all engines for simplicity. 570 567 */ 571 568 { XE_RTP_NAME("16021639441"), 572 - XE_RTP_RULES(GRAPHICS_VERSION(2001)), 569 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002)), 573 570 XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), 574 571 GHWSP_CSB_REPORT_DIS | 575 572 PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, ··· 581 578 XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, WR_REQ_CHAINING_DIS)) 582 579 }, 583 580 { XE_RTP_NAME("14021402888"), 584 - XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), 581 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), 585 582 XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) 586 583 }, 587 - { XE_RTP_NAME("14021821874"), 588 - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), 584 + { XE_RTP_NAME("14021821874, 14022954250"), 585 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), 586 + FUNC(xe_rtp_match_first_render_or_compute)), 589 587 XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, STK_ID_RESTRICT)) 590 588 }, 591 589 ··· 778 774 XE_RTP_ACTIONS(SET(INSTPM(RENDER_RING_BASE), ENABLE_SEMAPHORE_POLL_BIT)) 779 775 }, 780 776 { XE_RTP_NAME("18033852989"), 781 - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), 777 + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), 782 778 XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) 783 779 }, 784 780 { XE_RTP_NAME("14021567978"), ··· 811 807 XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN)) 812 808 }, 813 809 { XE_RTP_NAME("14019386621"), 814 - XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), 810 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), 815 811 XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE)) 816 812 }, 817 813 { XE_RTP_NAME("14020756599"), ··· 828 824 DIS_AUTOSTRIP)) 829 825 }, 830 826 { XE_RTP_NAME("15016589081"), 831 - XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), 827 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), 832 828 XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) 833 829 }, 834 830 { XE_RTP_NAME("22021007897"), 835 - XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), 831 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), 836 832 XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) 833 + }, 834 + { XE_RTP_NAME("18033852989"), 835 + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), 836 + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) 837 837 }, 838 838 839 839 /* Xe3_LPG */

+8 -3

drivers/gpu/drm/xe/xe_wa_oob.rules

··· 21 21 GRAPHICS_VERSION_RANGE(1270, 1274) 22 22 MEDIA_VERSION(1300) 23 23 PLATFORM(DG2) 24 - 14018094691 GRAPHICS_VERSION(2004) 24 + 14018094691 GRAPHICS_VERSION_RANGE(2001, 2002) 25 + GRAPHICS_VERSION(2004) 25 26 14019882105 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0) 26 27 18024947630 GRAPHICS_VERSION(2001) 27 28 GRAPHICS_VERSION(2004) ··· 31 30 GRAPHICS_VERSION(2004) 32 31 13011645652 GRAPHICS_VERSION(2004) 33 32 GRAPHICS_VERSION(3001) 34 - 14022293748 GRAPHICS_VERSION(2001) 33 + 14022293748 GRAPHICS_VERSION_RANGE(2001, 2002) 35 34 GRAPHICS_VERSION(2004) 36 35 GRAPHICS_VERSION_RANGE(3000, 3001) 37 - 22019794406 GRAPHICS_VERSION(2001) 36 + 22019794406 GRAPHICS_VERSION_RANGE(2001, 2002) 38 37 GRAPHICS_VERSION(2004) 39 38 GRAPHICS_VERSION_RANGE(3000, 3001) 40 39 22019338487 MEDIA_VERSION(2000) ··· 60 59 MEDIA_VERSION_RANGE(1301, 3000) 61 60 16026508708 GRAPHICS_VERSION_RANGE(1200, 3001) 62 61 MEDIA_VERSION_RANGE(1300, 3000) 62 + 63 + # SoC workaround - currently applies to all platforms with the following 64 + # primary GT GMDID 65 + 14022085890 GRAPHICS_VERSION(2001)

+5

include/drm/drm_gpusvm.h

··· 327 327 328 328 void drm_gpusvm_free(struct drm_gpusvm *gpusvm); 329 329 330 + unsigned long 331 + drm_gpusvm_find_vma_start(struct drm_gpusvm *gpusvm, 332 + unsigned long start, 333 + unsigned long end); 334 + 330 335 struct drm_gpusvm_range * 331 336 drm_gpusvm_range_find_or_insert(struct drm_gpusvm *gpusvm, 332 337 unsigned long fault_addr,

+5 -2

include/drm/intel/pciids.h

··· 852 852 MACRO__(0xE210, ## __VA_ARGS__), \ 853 853 MACRO__(0xE211, ## __VA_ARGS__), \ 854 854 MACRO__(0xE212, ## __VA_ARGS__), \ 855 - MACRO__(0xE215, ## __VA_ARGS__), \ 856 - MACRO__(0xE216, ## __VA_ARGS__) 855 + MACRO__(0xE216, ## __VA_ARGS__), \ 856 + MACRO__(0xE220, ## __VA_ARGS__), \ 857 + MACRO__(0xE221, ## __VA_ARGS__), \ 858 + MACRO__(0xE222, ## __VA_ARGS__), \ 859 + MACRO__(0xE223, ## __VA_ARGS__) 857 860 858 861 /* PTL */ 859 862 #define INTEL_PTL_IDS(MACRO__, ...) \

+4

include/uapi/drm/xe_drm.h

··· 1617 1617 1618 1618 /** @DRM_XE_OA_UNIT_TYPE_OAM: OAM OA unit */ 1619 1619 DRM_XE_OA_UNIT_TYPE_OAM, 1620 + 1621 + /** @DRM_XE_OA_UNIT_TYPE_OAM_SAG: OAM_SAG OA unit */ 1622 + DRM_XE_OA_UNIT_TYPE_OAM_SAG, 1620 1623 }; 1621 1624 1622 1625 /** ··· 1641 1638 #define DRM_XE_OA_CAPS_SYNCS (1 << 1) 1642 1639 #define DRM_XE_OA_CAPS_OA_BUFFER_SIZE (1 << 2) 1643 1640 #define DRM_XE_OA_CAPS_WAIT_NUM_REPORTS (1 << 3) 1641 + #define DRM_XE_OA_CAPS_OAM (1 << 4) 1644 1642 1645 1643 /** @oa_timestamp_freq: OA timestamp freq */ 1646 1644 __u64 oa_timestamp_freq;

Configure Feed

Configure Feed