Merge tag 'drm-xe-next-2024-08-28' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next

+15

Documentation/gpu/xe/xe_mm.rst

··· 7 7 .. kernel-doc:: drivers/gpu/drm/xe/xe_bo_doc.h 8 8 :doc: Buffer Objects (BO) 9 9 10 + GGTT 11 + ==== 12 + 13 + .. kernel-doc:: drivers/gpu/drm/xe/xe_ggtt.c 14 + :doc: Global Graphics Translation Table (GGTT) 15 + 16 + GGTT Internal API 17 + ----------------- 18 + 19 + .. kernel-doc:: drivers/gpu/drm/xe/xe_ggtt_types.h 20 + :internal: 21 + 22 + .. kernel-doc:: drivers/gpu/drm/xe/xe_ggtt.c 23 + :internal: 24 + 10 25 Pagetable building 11 26 ================== 12 27

+8 -5

drivers/gpu/drm/drm_print.c

··· 100 100 copy = iterator->remain; 101 101 102 102 /* Copy out the bit of the string that we need */ 103 - memcpy(iterator->data, 104 - str + (iterator->start - iterator->offset), copy); 103 + if (iterator->data) 104 + memcpy(iterator->data, 105 + str + (iterator->start - iterator->offset), copy); 105 106 106 107 iterator->offset = iterator->start + copy; 107 108 iterator->remain -= copy; ··· 111 110 112 111 len = min_t(ssize_t, strlen(str), iterator->remain); 113 112 114 - memcpy(iterator->data + pos, str, len); 113 + if (iterator->data) 114 + memcpy(iterator->data + pos, str, len); 115 115 116 116 iterator->offset += len; 117 117 iterator->remain -= len; ··· 142 140 if ((iterator->offset >= iterator->start) && (len < iterator->remain)) { 143 141 ssize_t pos = iterator->offset - iterator->start; 144 142 145 - snprintf(((char *) iterator->data) + pos, 146 - iterator->remain, "%pV", vaf); 143 + if (iterator->data) 144 + snprintf(((char *) iterator->data) + pos, 145 + iterator->remain, "%pV", vaf); 147 146 148 147 iterator->offset += len; 149 148 iterator->remain -= len;

+4

drivers/gpu/drm/i915/display/intel_dpt.c

··· 317 317 i915_vm_put(&dpt->vm); 318 318 } 319 319 320 + u64 intel_dpt_offset(struct i915_vma *dpt_vma) 321 + { 322 + return dpt_vma->node.start; 323 + }

+3

drivers/gpu/drm/i915/display/intel_dpt.h

··· 6 6 #ifndef __INTEL_DPT_H__ 7 7 #define __INTEL_DPT_H__ 8 8 9 + #include <linux/types.h> 10 + 9 11 struct drm_i915_private; 10 12 11 13 struct i915_address_space; ··· 22 20 void intel_dpt_resume(struct drm_i915_private *i915); 23 21 struct i915_address_space * 24 22 intel_dpt_create(struct intel_framebuffer *fb); 23 + u64 intel_dpt_offset(struct i915_vma *dpt_vma); 25 24 26 25 #endif /* __INTEL_DPT_H__ */

+2 -1

drivers/gpu/drm/i915/display/skl_universal_plane.c

··· 14 14 #include "intel_de.h" 15 15 #include "intel_display_irq.h" 16 16 #include "intel_display_types.h" 17 + #include "intel_dpt.h" 17 18 #include "intel_fb.h" 18 19 #include "intel_fbc.h" 19 20 #include "intel_frontbuffer.h" ··· 1163 1162 * within the DPT is always 0. 1164 1163 */ 1165 1164 drm_WARN_ON(&i915->drm, plane_state->dpt_vma && 1166 - plane_state->dpt_vma->node.start); 1165 + intel_dpt_offset(plane_state->dpt_vma)); 1167 1166 drm_WARN_ON(&i915->drm, offset & 0x1fffff); 1168 1167 return offset >> 9; 1169 1168 } else {

+11 -7

drivers/gpu/drm/xe/Makefile

··· 28 28 xe-y += xe_bb.o \ 29 29 xe_bo.o \ 30 30 xe_bo_evict.o \ 31 - xe_debugfs.o \ 32 31 xe_devcoredump.o \ 33 32 xe_device.o \ 34 33 xe_device_sysfs.o \ ··· 45 46 xe_gt.o \ 46 47 xe_gt_ccs_mode.o \ 47 48 xe_gt_clock.o \ 48 - xe_gt_debugfs.o \ 49 49 xe_gt_freq.o \ 50 50 xe_gt_idle.o \ 51 51 xe_gt_mcr.o \ ··· 57 59 xe_guc_ads.o \ 58 60 xe_guc_ct.o \ 59 61 xe_guc_db_mgr.o \ 60 - xe_guc_debugfs.o \ 61 62 xe_guc_hwconfig.o \ 62 63 xe_guc_id_mgr.o \ 63 64 xe_guc_klv_helpers.o \ ··· 66 69 xe_heci_gsc.o \ 67 70 xe_hw_engine.o \ 68 71 xe_hw_engine_class_sysfs.o \ 72 + xe_hw_engine_group.o \ 69 73 xe_hw_fence.o \ 70 74 xe_huc.o \ 71 - xe_huc_debugfs.o \ 72 75 xe_irq.o \ 73 76 xe_lrc.o \ 74 77 xe_migrate.o \ ··· 104 107 xe_ttm_vram_mgr.o \ 105 108 xe_tuning.o \ 106 109 xe_uc.o \ 107 - xe_uc_debugfs.o \ 108 110 xe_uc_fw.o \ 109 111 xe_vm.o \ 110 112 xe_vram.o \ ··· 120 124 # graphics virtualization (SR-IOV) support 121 125 xe-y += \ 122 126 xe_gt_sriov_vf.o \ 123 - xe_gt_sriov_vf_debugfs.o \ 124 127 xe_guc_relay.o \ 125 128 xe_memirq.o \ 126 129 xe_sriov.o ··· 128 133 xe_gt_sriov_pf.o \ 129 134 xe_gt_sriov_pf_config.o \ 130 135 xe_gt_sriov_pf_control.o \ 131 - xe_gt_sriov_pf_debugfs.o \ 132 136 xe_gt_sriov_pf_monitor.o \ 133 137 xe_gt_sriov_pf_policy.o \ 134 138 xe_gt_sriov_pf_service.o \ ··· 275 281 endif 276 282 277 283 ifeq ($(CONFIG_DEBUG_FS),y) 284 + xe-y += xe_debugfs.o \ 285 + xe_gt_debugfs.o \ 286 + xe_gt_sriov_vf_debugfs.o \ 287 + xe_gt_stats.o \ 288 + xe_guc_debugfs.o \ 289 + xe_huc_debugfs.o \ 290 + xe_uc_debugfs.o 291 + 292 + xe-$(CONFIG_PCI_IOV) += xe_gt_sriov_pf_debugfs.o 293 + 278 294 xe-$(CONFIG_DRM_XE_DISPLAY) += \ 279 295 i915-display/intel_display_debugfs.o \ 280 296 i915-display/intel_display_debugfs_params.o \

+1

drivers/gpu/drm/xe/abi/guc_klvs_abi.h

··· 351 351 GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING = 0x9005, 352 352 GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE = 0x9007, 353 353 GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008, 354 + GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET = 0x9009, 354 355 }; 355 356 356 357 #endif

+4 -3

drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h

··· 7 7 #define I915_VMA_H 8 8 9 9 #include <uapi/drm/i915_drm.h> 10 - #include <drm/drm_mm.h> 10 + 11 + #include "xe_ggtt_types.h" 11 12 12 13 /* We don't want these from i915_drm.h in case of Xe */ 13 14 #undef I915_TILING_X ··· 20 19 21 20 struct i915_vma { 22 21 struct xe_bo *bo, *dpt; 23 - struct drm_mm_node node; 22 + struct xe_ggtt_node *node; 24 23 }; 25 24 26 25 #define i915_ggtt_clear_scanout(bo) do { } while (0) ··· 29 28 30 29 static inline u32 i915_ggtt_offset(const struct i915_vma *vma) 31 30 { 32 - return vma->node.start; 31 + return vma->node->base.start; 33 32 } 34 33 35 34 #endif

+62 -32

drivers/gpu/drm/xe/display/xe_display.c

··· 46 46 */ 47 47 bool xe_display_driver_probe_defer(struct pci_dev *pdev) 48 48 { 49 - if (!xe_modparam.enable_display) 49 + if (!xe_modparam.probe_display) 50 50 return 0; 51 51 52 52 return intel_display_driver_probe_defer(pdev); ··· 62 62 */ 63 63 void xe_display_driver_set_hooks(struct drm_driver *driver) 64 64 { 65 - if (!xe_modparam.enable_display) 65 + if (!xe_modparam.probe_display) 66 66 return; 67 67 68 68 driver->driver_features |= DRIVER_MODESET | DRIVER_ATOMIC; ··· 104 104 { 105 105 struct xe_device *xe = to_xe_device(dev); 106 106 107 - if (!xe->info.enable_display) 107 + if (!xe->info.probe_display) 108 108 return; 109 109 110 110 intel_power_domains_cleanup(xe); ··· 112 112 113 113 int xe_display_init_nommio(struct xe_device *xe) 114 114 { 115 - if (!xe->info.enable_display) 115 + if (!xe->info.probe_display) 116 116 return 0; 117 117 118 118 /* Fake uncore lock */ ··· 129 129 struct xe_device *xe = arg; 130 130 struct intel_display *display = &xe->display; 131 131 132 - if (!xe->info.enable_display) 132 + if (!xe->info.probe_display) 133 133 return; 134 134 135 135 intel_display_driver_remove_noirq(xe); ··· 141 141 struct intel_display *display = &xe->display; 142 142 int err; 143 143 144 - if (!xe->info.enable_display) 144 + if (!xe->info.probe_display) 145 145 return 0; 146 146 147 147 intel_display_driver_early_probe(xe); ··· 172 172 { 173 173 struct xe_device *xe = arg; 174 174 175 - if (!xe->info.enable_display) 175 + if (!xe->info.probe_display) 176 176 return; 177 177 178 178 intel_display_driver_remove_nogem(xe); ··· 182 182 { 183 183 int err; 184 184 185 - if (!xe->info.enable_display) 185 + if (!xe->info.probe_display) 186 186 return 0; 187 187 188 188 err = intel_display_driver_probe_nogem(xe); ··· 194 194 195 195 int xe_display_init(struct xe_device *xe) 196 196 { 197 - if (!xe->info.enable_display) 197 + if (!xe->info.probe_display) 198 198 return 0; 199 199 200 200 return intel_display_driver_probe(xe); ··· 202 202 203 203 void xe_display_fini(struct xe_device *xe) 204 204 { 205 - if (!xe->info.enable_display) 205 + if (!xe->info.probe_display) 206 206 return; 207 207 208 208 intel_hpd_poll_fini(xe); ··· 213 213 214 214 void xe_display_register(struct xe_device *xe) 215 215 { 216 - if (!xe->info.enable_display) 216 + if (!xe->info.probe_display) 217 217 return; 218 218 219 219 intel_display_driver_register(xe); ··· 223 223 224 224 void xe_display_unregister(struct xe_device *xe) 225 225 { 226 - if (!xe->info.enable_display) 226 + if (!xe->info.probe_display) 227 227 return; 228 228 229 229 intel_unregister_dsm_handler(); ··· 233 233 234 234 void xe_display_driver_remove(struct xe_device *xe) 235 235 { 236 - if (!xe->info.enable_display) 236 + if (!xe->info.probe_display) 237 237 return; 238 238 239 239 intel_display_driver_remove(xe); ··· 243 243 244 244 void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) 245 245 { 246 - if (!xe->info.enable_display) 246 + if (!xe->info.probe_display) 247 247 return; 248 248 249 249 if (master_ctl & DISPLAY_IRQ) ··· 254 254 { 255 255 struct intel_display *display = &xe->display; 256 256 257 - if (!xe->info.enable_display) 257 + if (!xe->info.probe_display) 258 258 return; 259 259 260 260 if (gu_misc_iir & GU_MISC_GSE) ··· 263 263 264 264 void xe_display_irq_reset(struct xe_device *xe) 265 265 { 266 - if (!xe->info.enable_display) 266 + if (!xe->info.probe_display) 267 267 return; 268 268 269 269 gen11_display_irq_reset(xe); ··· 271 271 272 272 void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) 273 273 { 274 - if (!xe->info.enable_display) 274 + if (!xe->info.probe_display) 275 275 return; 276 276 277 277 if (gt->info.id == XE_GT0) ··· 308 308 } 309 309 } 310 310 311 + /* TODO: System and runtime suspend/resume sequences will be sanitized as a follow-up. */ 312 + void xe_display_pm_runtime_suspend(struct xe_device *xe) 313 + { 314 + if (!xe->info.probe_display) 315 + return; 316 + 317 + if (xe->d3cold.allowed) 318 + xe_display_pm_suspend(xe, true); 319 + 320 + intel_hpd_poll_enable(xe); 321 + } 322 + 311 323 void xe_display_pm_suspend(struct xe_device *xe, bool runtime) 312 324 { 313 325 struct intel_display *display = &xe->display; 314 326 bool s2idle = suspend_to_idle(); 315 - if (!xe->info.enable_display) 327 + if (!xe->info.probe_display) 316 328 return; 317 329 318 330 /* ··· 332 320 * properly. 333 321 */ 334 322 intel_power_domains_disable(xe); 335 - if (has_display(xe)) 323 + intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); 324 + if (!runtime && has_display(xe)) { 336 325 drm_kms_helper_poll_disable(&xe->drm); 337 - 338 - if (!runtime) 326 + intel_display_driver_disable_user_access(xe); 339 327 intel_display_driver_suspend(xe); 328 + } 329 + 330 + xe_display_flush_cleanup_work(xe); 340 331 341 332 xe_display_flush_cleanup_work(xe); 342 333 ··· 347 332 348 333 intel_hpd_cancel_work(xe); 349 334 335 + if (!runtime && has_display(xe)) 336 + intel_display_driver_suspend_access(xe); 337 + 350 338 intel_encoder_suspend_all(&xe->display); 351 339 352 340 intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold); 353 - 354 - intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); 355 341 356 342 intel_dmc_suspend(xe); 357 343 } ··· 360 344 void xe_display_pm_suspend_late(struct xe_device *xe) 361 345 { 362 346 bool s2idle = suspend_to_idle(); 363 - if (!xe->info.enable_display) 347 + if (!xe->info.probe_display) 364 348 return; 365 349 366 350 intel_power_domains_suspend(xe, s2idle); ··· 368 352 intel_display_power_suspend_late(xe); 369 353 } 370 354 355 + void xe_display_pm_runtime_resume(struct xe_device *xe) 356 + { 357 + if (!xe->info.probe_display) 358 + return; 359 + 360 + intel_hpd_poll_disable(xe); 361 + 362 + if (xe->d3cold.allowed) 363 + xe_display_pm_resume(xe, true); 364 + } 365 + 371 366 void xe_display_pm_resume_early(struct xe_device *xe) 372 367 { 373 - if (!xe->info.enable_display) 368 + if (!xe->info.probe_display) 374 369 return; 375 370 376 371 intel_display_power_resume_early(xe); ··· 393 366 { 394 367 struct intel_display *display = &xe->display; 395 368 396 - if (!xe->info.enable_display) 369 + if (!xe->info.probe_display) 397 370 return; 398 371 399 372 intel_dmc_resume(xe); ··· 404 377 intel_display_driver_init_hw(xe); 405 378 intel_hpd_init(xe); 406 379 380 + if (!runtime && has_display(xe)) 381 + intel_display_driver_resume_access(xe); 382 + 407 383 /* MST sideband requires HPD interrupts enabled */ 408 384 intel_dp_mst_resume(xe); 409 - if (!runtime) 385 + if (!runtime && has_display(xe)) { 410 386 intel_display_driver_resume(xe); 411 - 412 - intel_hpd_poll_disable(xe); 413 - if (has_display(xe)) 414 387 drm_kms_helper_poll_enable(&xe->drm); 388 + intel_display_driver_enable_user_access(xe); 389 + intel_hpd_poll_disable(xe); 390 + } 415 391 416 392 intel_opregion_resume(display); 417 393 ··· 434 404 { 435 405 int err; 436 406 437 - if (!xe->info.enable_display) 407 + if (!xe->info.probe_display) 438 408 goto no_display; 439 409 440 410 intel_display_device_probe(xe); ··· 447 417 return 0; 448 418 449 419 no_display: 450 - xe->info.enable_display = false; 420 + xe->info.probe_display = false; 451 421 unset_display_features(xe); 452 422 return 0; 453 423 }

+4

drivers/gpu/drm/xe/display/xe_display.h

··· 38 38 void xe_display_pm_suspend_late(struct xe_device *xe); 39 39 void xe_display_pm_resume_early(struct xe_device *xe); 40 40 void xe_display_pm_resume(struct xe_device *xe, bool runtime); 41 + void xe_display_pm_runtime_suspend(struct xe_device *xe); 42 + void xe_display_pm_runtime_resume(struct xe_device *xe); 41 43 42 44 #else 43 45 ··· 69 67 static inline void xe_display_pm_suspend_late(struct xe_device *xe) {} 70 68 static inline void xe_display_pm_resume_early(struct xe_device *xe) {} 71 69 static inline void xe_display_pm_resume(struct xe_device *xe, bool runtime) {} 70 + static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {} 71 + static inline void xe_display_pm_runtime_resume(struct xe_device *xe) {} 72 72 73 73 #endif /* CONFIG_DRM_XE_DISPLAY */ 74 74 #endif /* _XE_DISPLAY_H_ */

+33 -17

drivers/gpu/drm/xe/display/xe_fb_pin.c

··· 204 204 if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) 205 205 align = max_t(u32, align, SZ_64K); 206 206 207 - if (bo->ggtt_node.size && view->type == I915_GTT_VIEW_NORMAL) { 207 + if (bo->ggtt_node && view->type == I915_GTT_VIEW_NORMAL) { 208 208 vma->node = bo->ggtt_node; 209 209 } else if (view->type == I915_GTT_VIEW_NORMAL) { 210 210 u32 x, size = bo->ttm.base.size; 211 211 212 - ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size, 213 - align, 0); 214 - if (ret) 212 + vma->node = xe_ggtt_node_init(ggtt); 213 + if (IS_ERR(vma->node)) { 214 + ret = PTR_ERR(vma->node); 215 215 goto out_unlock; 216 + } 217 + 218 + ret = xe_ggtt_node_insert_locked(vma->node, size, align, 0); 219 + if (ret) { 220 + xe_ggtt_node_fini(vma->node); 221 + goto out_unlock; 222 + } 216 223 217 224 for (x = 0; x < size; x += XE_PAGE_SIZE) { 218 225 u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x, 219 226 xe->pat.idx[XE_CACHE_NONE]); 220 227 221 - ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node.start + x, pte); 228 + ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node->base.start + x, pte); 222 229 } 223 230 } else { 224 231 u32 i, ggtt_ofs; ··· 234 227 /* display seems to use tiles instead of bytes here, so convert it back.. */ 235 228 u32 size = intel_rotation_info_size(rot_info) * XE_PAGE_SIZE; 236 229 237 - ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size, 238 - align, 0); 239 - if (ret) 230 + vma->node = xe_ggtt_node_init(ggtt); 231 + if (IS_ERR(vma->node)) { 232 + ret = PTR_ERR(vma->node); 240 233 goto out_unlock; 234 + } 241 235 242 - ggtt_ofs = vma->node.start; 236 + ret = xe_ggtt_node_insert_locked(vma->node, size, align, 0); 237 + if (ret) { 238 + xe_ggtt_node_fini(vma->node); 239 + goto out_unlock; 240 + } 241 + 242 + ggtt_ofs = vma->node->base.start; 243 243 244 244 for (i = 0; i < ARRAY_SIZE(rot_info->plane); i++) 245 245 write_ggtt_rotated(bo, ggtt, &ggtt_ofs, ··· 334 320 335 321 static void __xe_unpin_fb_vma(struct i915_vma *vma) 336 322 { 337 - struct xe_device *xe = to_xe_device(vma->bo->ttm.base.dev); 338 - struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; 339 - 340 323 if (vma->dpt) 341 324 xe_bo_unpin_map_no_vm(vma->dpt); 342 - else if (!drm_mm_node_allocated(&vma->bo->ggtt_node) || 343 - vma->bo->ggtt_node.start != vma->node.start) 344 - xe_ggtt_remove_node(ggtt, &vma->node, false); 325 + else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node) || 326 + vma->bo->ggtt_node->base.start != vma->node->base.start) 327 + xe_ggtt_node_remove(vma->node, false); 345 328 346 329 ttm_bo_reserve(&vma->bo->ttm, false, false, NULL); 347 330 ttm_bo_unpin(&vma->bo->ttm); ··· 388 377 } 389 378 390 379 /* 391 - * For Xe introduce dummy intel_dpt_create which just return NULL and 392 - * intel_dpt_destroy which does nothing. 380 + * For Xe introduce dummy intel_dpt_create which just return NULL, 381 + * intel_dpt_destroy which does nothing, and fake intel_dpt_ofsset returning 0; 393 382 */ 394 383 struct i915_address_space *intel_dpt_create(struct intel_framebuffer *fb) 395 384 { ··· 399 388 void intel_dpt_destroy(struct i915_address_space *vm) 400 389 { 401 390 return; 391 + } 392 + 393 + u64 intel_dpt_offset(struct i915_vma *dpt_vma) 394 + { 395 + return 0; 402 396 }

+1

drivers/gpu/drm/xe/regs/xe_engine_regs.h

··· 104 104 #define CSFE_CHICKEN1(base) XE_REG((base) + 0xd4, XE_REG_OPTION_MASKED) 105 105 #define GHWSP_CSB_REPORT_DIS REG_BIT(15) 106 106 #define PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS REG_BIT(14) 107 + #define CS_PRIORITY_MEM_READ REG_BIT(7) 107 108 108 109 #define FF_SLICE_CS_CHICKEN1(base) XE_REG((base) + 0xe0, XE_REG_OPTION_MASKED) 109 110 #define FFSC_PERCTX_PREEMPT_CTRL REG_BIT(14)

+8 -1

drivers/gpu/drm/xe/regs/xe_gt_regs.h

··· 80 80 #define LE_CACHEABILITY_MASK REG_GENMASK(1, 0) 81 81 #define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value) 82 82 83 - #define XE2_GAMREQSTRM_CTRL XE_REG(0x4194) 83 + #define STATELESS_COMPRESSION_CTRL XE_REG_MCR(0x4148) 84 + #define UNIFIED_COMPRESSION_FORMAT REG_GENMASK(3, 0) 85 + 86 + #define XE2_GAMREQSTRM_CTRL XE_REG_MCR(0x4194) 84 87 #define CG_DIS_CNTLBUS REG_BIT(6) 85 88 86 89 #define CCS_AUX_INV XE_REG(0x4208) ··· 196 193 #define GSCPSMI_BASE XE_REG(0x880c) 197 194 198 195 #define CCCHKNREG1 XE_REG_MCR(0x8828) 196 + #define L3CMPCTRL REG_BIT(23) 199 197 #define ENCOMPPERFFIX REG_BIT(18) 200 198 201 199 /* Fuse readout registers for GT */ ··· 370 366 371 367 #define XEHP_L3NODEARBCFG XE_REG_MCR(0xb0b4) 372 368 #define XEHP_LNESPARE REG_BIT(19) 369 + 370 + #define L3SQCREG2 XE_REG_MCR(0xb104) 371 + #define COMPMEMRD256BOVRFETCHEN REG_BIT(20) 373 372 374 373 #define L3SQCREG3 XE_REG_MCR(0xb108) 375 374 #define COMPPWOVERFETCHEN REG_BIT(28)

+4 -4

drivers/gpu/drm/xe/tests/xe_bo.c

··· 36 36 37 37 /* Optionally clear bo *and* CCS data in VRAM. */ 38 38 if (clear) { 39 - fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource); 39 + fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource, 40 + XE_MIGRATE_CLEAR_FLAG_FULL); 40 41 if (IS_ERR(fence)) { 41 42 KUNIT_FAIL(test, "Failed to submit bo clear.\n"); 42 43 return PTR_ERR(fence); ··· 125 124 kunit_info(test, "Testing system memory\n"); 126 125 127 126 bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, 128 - ttm_bo_type_device, bo_flags); 127 + bo_flags); 129 128 if (IS_ERR(bo)) { 130 129 KUNIT_FAIL(test, "Failed to create bo.\n"); 131 130 return; ··· 206 205 xe_vm_lock(vm, false); 207 206 bo = xe_bo_create_user(xe, NULL, vm, 0x10000, 208 207 DRM_XE_GEM_CPU_CACHING_WC, 209 - ttm_bo_type_device, 210 208 bo_flags); 211 209 xe_vm_unlock(vm); 212 210 if (IS_ERR(bo)) { ··· 215 215 216 216 external = xe_bo_create_user(xe, NULL, NULL, 0x10000, 217 217 DRM_XE_GEM_CPU_CACHING_WC, 218 - ttm_bo_type_device, bo_flags); 218 + bo_flags); 219 219 if (IS_ERR(external)) { 220 220 KUNIT_FAIL(test, "external bo create err=%pe\n", external); 221 221 goto cleanup_bo;

+1 -1

drivers/gpu/drm/xe/tests/xe_dma_buf.c

··· 126 126 127 127 kunit_info(test, "running %s\n", __func__); 128 128 bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC, 129 - ttm_bo_type_device, params->mem_mask); 129 + params->mem_mask); 130 130 if (IS_ERR(bo)) { 131 131 KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", 132 132 PTR_ERR(bo));

+15 -9

drivers/gpu/drm/xe/tests/xe_migrate.c

··· 105 105 } 106 106 107 107 xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); 108 - fence = xe_migrate_clear(m, remote, remote->ttm.resource); 108 + fence = xe_migrate_clear(m, remote, remote->ttm.resource, 109 + XE_MIGRATE_CLEAR_FLAG_FULL); 109 110 if (!sanity_fence_failed(xe, fence, big ? "Clearing remote big bo" : 110 111 "Clearing remote small bo", test)) { 111 112 retval = xe_map_rd(xe, &remote->vmap, 0, u64); ··· 280 279 kunit_info(test, "Clearing small buffer object\n"); 281 280 xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size); 282 281 expected = 0; 283 - fence = xe_migrate_clear(m, tiny, tiny->ttm.resource); 282 + fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, 283 + XE_MIGRATE_CLEAR_FLAG_FULL); 284 284 if (sanity_fence_failed(xe, fence, "Clearing small bo", test)) 285 285 goto out; 286 286 ··· 302 300 kunit_info(test, "Clearing big buffer object\n"); 303 301 xe_map_memset(xe, &big->vmap, 0, 0x11, big->size); 304 302 expected = 0; 305 - fence = xe_migrate_clear(m, big, big->ttm.resource); 303 + fence = xe_migrate_clear(m, big, big->ttm.resource, 304 + XE_MIGRATE_CLEAR_FLAG_FULL); 306 305 if (sanity_fence_failed(xe, fence, "Clearing big bo", test)) 307 306 goto out; 308 307 ··· 606 603 607 604 kunit_info(test, "Clear vram buffer object\n"); 608 605 expected = 0x0000000000000000; 609 - fence = xe_migrate_clear(tile->migrate, vram_bo, vram_bo->ttm.resource); 606 + fence = xe_migrate_clear(tile->migrate, vram_bo, vram_bo->ttm.resource, 607 + XE_MIGRATE_CLEAR_FLAG_FULL); 610 608 if (sanity_fence_failed(xe, fence, "Clear vram_bo", test)) 611 609 return; 612 610 dma_fence_put(fence); ··· 641 637 long ret; 642 638 643 639 sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, 644 - DRM_XE_GEM_CPU_CACHING_WC, ttm_bo_type_device, 640 + DRM_XE_GEM_CPU_CACHING_WC, 645 641 XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS); 646 642 647 643 if (IS_ERR(sys_bo)) { ··· 664 660 } 665 661 xe_bo_unlock(sys_bo); 666 662 667 - ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, 668 - ttm_bo_type_device, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); 663 + ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, 664 + DRM_XE_GEM_CPU_CACHING_WC, 665 + bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); 669 666 670 667 if (IS_ERR(ccs_bo)) { 671 668 KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", ··· 688 683 } 689 684 xe_bo_unlock(ccs_bo); 690 685 691 - vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, 692 - ttm_bo_type_device, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); 686 + vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, 687 + DRM_XE_GEM_CPU_CACHING_WC, 688 + bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); 693 689 if (IS_ERR(vram_bo)) { 694 690 KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", 695 691 PTR_ERR(vram_bo));

-52

drivers/gpu/drm/xe/tests/xe_pci.c

··· 12 12 #include <kunit/test-bug.h> 13 13 #include <kunit/visibility.h> 14 14 15 - struct kunit_test_data { 16 - int ndevs; 17 - xe_device_fn xe_fn; 18 - }; 19 - 20 - static int dev_to_xe_device_fn(struct device *dev, void *__data) 21 - 22 - { 23 - struct drm_device *drm = dev_get_drvdata(dev); 24 - struct kunit_test_data *data = __data; 25 - int ret = 0; 26 - int idx; 27 - 28 - data->ndevs++; 29 - 30 - if (drm_dev_enter(drm, &idx)) 31 - ret = data->xe_fn(to_xe_device(dev_get_drvdata(dev))); 32 - drm_dev_exit(idx); 33 - 34 - return ret; 35 - } 36 - 37 - /** 38 - * xe_call_for_each_device - Iterate over all devices this driver binds to 39 - * @xe_fn: Function to call for each device. 40 - * 41 - * This function iterated over all devices this driver binds to, and calls 42 - * @xe_fn: for each one of them. If the called function returns anything else 43 - * than 0, iteration is stopped and the return value is returned by this 44 - * function. Across each function call, drm_dev_enter() / drm_dev_exit() is 45 - * called for the corresponding drm device. 46 - * 47 - * Return: Number of devices iterated or 48 - * the error code of a call to @xe_fn returning an error code. 49 - */ 50 - int xe_call_for_each_device(xe_device_fn xe_fn) 51 - { 52 - int ret; 53 - struct kunit_test_data data = { 54 - .xe_fn = xe_fn, 55 - .ndevs = 0, 56 - }; 57 - 58 - ret = driver_for_each_device(&xe_pci_driver.driver, NULL, 59 - &data, dev_to_xe_device_fn); 60 - 61 - if (!data.ndevs) 62 - kunit_skip(current->kunit_test, "test runs only on hardware\n"); 63 - 64 - return ret ?: data.ndevs; 65 - } 66 - 67 15 /** 68 16 * xe_call_for_each_graphics_ip - Iterate over all recognized graphics IPs 69 17 * @xe_fn: Function to call for each device.

-1

drivers/gpu/drm/xe/tests/xe_pci_test.h

··· 19 19 typedef void (*xe_graphics_fn)(const struct xe_graphics_desc *); 20 20 typedef void (*xe_media_fn)(const struct xe_media_desc *); 21 21 22 - int xe_call_for_each_device(xe_device_fn xe_fn); 23 22 void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn); 24 23 void xe_call_for_each_media_ip(xe_media_fn xe_fn); 25 24

+13 -7

drivers/gpu/drm/xe/xe_bo.c

··· 793 793 } 794 794 } 795 795 } else { 796 - if (move_lacks_source) 797 - fence = xe_migrate_clear(migrate, bo, new_mem); 796 + if (move_lacks_source) { 797 + u32 flags = 0; 798 + 799 + if (mem_type_is_vram(new_mem->mem_type)) 800 + flags |= XE_MIGRATE_CLEAR_FLAG_FULL; 801 + else if (handle_system_ccs) 802 + flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA; 803 + 804 + fence = xe_migrate_clear(migrate, bo, new_mem, flags); 805 + } 798 806 else 799 807 fence = xe_migrate_copy(migrate, bo, bo, old_mem, 800 808 new_mem, handle_system_ccs); ··· 1098 1090 1099 1091 xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list)); 1100 1092 1101 - if (bo->ggtt_node.size) 1093 + if (bo->ggtt_node && bo->ggtt_node->base.size) 1102 1094 xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo); 1103 1095 1104 1096 #ifdef CONFIG_PROC_FS ··· 1499 1491 struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, 1500 1492 struct xe_vm *vm, size_t size, 1501 1493 u16 cpu_caching, 1502 - enum ttm_bo_type type, 1503 1494 u32 flags) 1504 1495 { 1505 1496 struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 1506 - cpu_caching, type, 1497 + cpu_caching, ttm_bo_type_device, 1507 1498 flags | XE_BO_FLAG_USER); 1508 1499 if (!IS_ERR(bo)) 1509 1500 xe_bo_unlock_vm_held(bo); ··· 2026 2019 } 2027 2020 2028 2021 bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching, 2029 - ttm_bo_type_device, bo_flags); 2022 + bo_flags); 2030 2023 2031 2024 if (vm) 2032 2025 xe_vm_unlock(vm); ··· 2332 2325 2333 2326 bo = xe_bo_create_user(xe, NULL, NULL, args->size, 2334 2327 DRM_XE_GEM_CPU_CACHING_WC, 2335 - ttm_bo_type_device, 2336 2328 XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | 2337 2329 XE_BO_FLAG_SCANOUT | 2338 2330 XE_BO_FLAG_NEEDS_CPU_ACCESS);

+6 -4

drivers/gpu/drm/xe/xe_bo.h

··· 87 87 struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, 88 88 struct xe_vm *vm, size_t size, 89 89 u16 cpu_caching, 90 - enum ttm_bo_type type, 91 90 u32 flags); 92 91 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, 93 92 struct xe_vm *vm, size_t size, ··· 194 195 static inline u32 195 196 xe_bo_ggtt_addr(struct xe_bo *bo) 196 197 { 197 - XE_WARN_ON(bo->ggtt_node.size > bo->size); 198 - XE_WARN_ON(bo->ggtt_node.start + bo->ggtt_node.size > (1ull << 32)); 199 - return bo->ggtt_node.start; 198 + if (XE_WARN_ON(!bo->ggtt_node)) 199 + return 0; 200 + 201 + XE_WARN_ON(bo->ggtt_node->base.size > bo->size); 202 + XE_WARN_ON(bo->ggtt_node->base.start + bo->ggtt_node->base.size > (1ull << 32)); 203 + return bo->ggtt_node->base.start; 200 204 } 201 205 202 206 int xe_bo_vmap(struct xe_bo *bo);

+3 -2

drivers/gpu/drm/xe/xe_bo_types.h

··· 8 8 9 9 #include <linux/iosys-map.h> 10 10 11 - #include <drm/drm_mm.h> 12 11 #include <drm/ttm/ttm_bo.h> 13 12 #include <drm/ttm/ttm_device.h> 14 13 #include <drm/ttm/ttm_execbuf_util.h> 15 14 #include <drm/ttm/ttm_placement.h> 15 + 16 + #include "xe_ggtt_types.h" 16 17 17 18 struct xe_device; 18 19 struct xe_vm; ··· 40 39 /** @placement: current placement for this BO */ 41 40 struct ttm_placement placement; 42 41 /** @ggtt_node: GGTT node if this BO is mapped in the GGTT */ 43 - struct drm_mm_node ggtt_node; 42 + struct xe_ggtt_node *ggtt_node; 44 43 /** @vmap: iosys map of this buffer */ 45 44 struct iosys_map vmap; 46 45 /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */

+4

drivers/gpu/drm/xe/xe_debugfs.h

··· 8 8 9 9 struct xe_device; 10 10 11 + #ifdef CONFIG_DEBUG_FS 11 12 void xe_debugfs_register(struct xe_device *xe); 13 + #else 14 + static inline void xe_debugfs_register(struct xe_device *xe) { } 15 + #endif 12 16 13 17 #endif

+80 -31

drivers/gpu/drm/xe/xe_devcoredump.c

··· 66 66 return &q->gt->uc.guc; 67 67 } 68 68 69 - static void xe_devcoredump_deferred_snap_work(struct work_struct *work) 69 + static ssize_t __xe_devcoredump_read(char *buffer, size_t count, 70 + struct xe_devcoredump *coredump) 70 71 { 71 - struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); 72 - 73 - /* keep going if fw fails as we still want to save the memory and SW data */ 74 - if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL)) 75 - xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); 76 - xe_vm_snapshot_capture_delayed(ss->vm); 77 - xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); 78 - xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); 79 - } 80 - 81 - static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, 82 - size_t count, void *data, size_t datalen) 83 - { 84 - struct xe_devcoredump *coredump = data; 85 72 struct xe_device *xe; 86 73 struct xe_devcoredump_snapshot *ss; 87 74 struct drm_printer p; ··· 76 89 struct timespec64 ts; 77 90 int i; 78 91 79 - if (!coredump) 80 - return -ENODEV; 81 - 82 92 xe = coredump_to_xe(coredump); 83 93 ss = &coredump->snapshot; 84 94 85 - /* Ensure delayed work is captured before continuing */ 86 - flush_work(&ss->work); 87 - 88 95 iter.data = buffer; 89 - iter.offset = 0; 90 - iter.start = offset; 96 + iter.start = 0; 91 97 iter.remain = count; 92 98 93 99 p = drm_coredump_printer(&iter); ··· 114 134 return count - iter.remain; 115 135 } 116 136 137 + static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) 138 + { 139 + int i; 140 + 141 + xe_guc_ct_snapshot_free(ss->ct); 142 + ss->ct = NULL; 143 + 144 + xe_guc_exec_queue_snapshot_free(ss->ge); 145 + ss->ge = NULL; 146 + 147 + xe_sched_job_snapshot_free(ss->job); 148 + ss->job = NULL; 149 + 150 + for (i = 0; i < XE_NUM_HW_ENGINES; i++) 151 + if (ss->hwe[i]) { 152 + xe_hw_engine_snapshot_free(ss->hwe[i]); 153 + ss->hwe[i] = NULL; 154 + } 155 + 156 + xe_vm_snapshot_free(ss->vm); 157 + ss->vm = NULL; 158 + } 159 + 160 + static void xe_devcoredump_deferred_snap_work(struct work_struct *work) 161 + { 162 + struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); 163 + struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot); 164 + 165 + /* keep going if fw fails as we still want to save the memory and SW data */ 166 + if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL)) 167 + xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); 168 + xe_vm_snapshot_capture_delayed(ss->vm); 169 + xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); 170 + xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); 171 + 172 + /* Calculate devcoredump size */ 173 + ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump); 174 + 175 + ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); 176 + if (!ss->read.buffer) 177 + return; 178 + 179 + __xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump); 180 + xe_devcoredump_snapshot_free(ss); 181 + } 182 + 183 + static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, 184 + size_t count, void *data, size_t datalen) 185 + { 186 + struct xe_devcoredump *coredump = data; 187 + struct xe_devcoredump_snapshot *ss; 188 + ssize_t byte_copied; 189 + 190 + if (!coredump) 191 + return -ENODEV; 192 + 193 + ss = &coredump->snapshot; 194 + 195 + /* Ensure delayed work is captured before continuing */ 196 + flush_work(&ss->work); 197 + 198 + if (!ss->read.buffer) 199 + return -ENODEV; 200 + 201 + if (offset >= ss->read.size) 202 + return 0; 203 + 204 + byte_copied = count < ss->read.size - offset ? count : 205 + ss->read.size - offset; 206 + memcpy(buffer, ss->read.buffer + offset, byte_copied); 207 + 208 + return byte_copied; 209 + } 210 + 117 211 static void xe_devcoredump_free(void *data) 118 212 { 119 213 struct xe_devcoredump *coredump = data; 120 - int i; 121 214 122 215 /* Our device is gone. Nothing to do... */ 123 216 if (!data || !coredump_to_xe(coredump)) ··· 198 145 199 146 cancel_work_sync(&coredump->snapshot.work); 200 147 201 - xe_guc_ct_snapshot_free(coredump->snapshot.ct); 202 - xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge); 203 - xe_sched_job_snapshot_free(coredump->snapshot.job); 204 - for (i = 0; i < XE_NUM_HW_ENGINES; i++) 205 - if (coredump->snapshot.hwe[i]) 206 - xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]); 207 - xe_vm_snapshot_free(coredump->snapshot.vm); 148 + xe_devcoredump_snapshot_free(&coredump->snapshot); 149 + kvfree(coredump->snapshot.read.buffer); 208 150 209 151 /* To prevent stale data on next snapshot, clear everything */ 210 152 memset(&coredump->snapshot, 0, sizeof(coredump->snapshot)); ··· 308 260 { 309 261 return devm_add_action_or_reset(xe->drm.dev, xe_driver_devcoredump_fini, &xe->drm); 310 262 } 263 + 311 264 #endif

+8

drivers/gpu/drm/xe/xe_devcoredump_types.h

··· 46 46 struct xe_sched_job_snapshot *job; 47 47 /** @vm: Snapshot of VM state */ 48 48 struct xe_vm_snapshot *vm; 49 + 50 + /** @read: devcoredump in human readable format */ 51 + struct { 52 + /** @read.size: size of devcoredump in human readable format */ 53 + ssize_t size; 54 + /** @read.buffer: buffer of devcoredump in human readable format */ 55 + char *buffer; 56 + } read; 49 57 }; 50 58 51 59 /**

+4 -1

drivers/gpu/drm/xe/xe_device.c

··· 37 37 #include "xe_gt_printk.h" 38 38 #include "xe_gt_sriov_vf.h" 39 39 #include "xe_guc.h" 40 + #include "xe_hw_engine_group.h" 40 41 #include "xe_hwmon.h" 41 42 #include "xe_irq.h" 42 43 #include "xe_memirq.h" ··· 166 165 * vm->lock taken during xe_exec_queue_kill(). 167 166 */ 168 167 xa_for_each(&xef->exec_queue.xa, idx, q) { 168 + if (q->vm && q->hwe->hw_engine_group) 169 + xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); 169 170 xe_exec_queue_kill(q); 170 171 xe_exec_queue_put(q); 171 172 } ··· 546 543 { 547 544 /* disable features that are not available/applicable to VFs */ 548 545 if (IS_SRIOV_VF(xe)) { 549 - xe->info.enable_display = 0; 546 + xe->info.probe_display = 0; 550 547 xe->info.has_heci_gscfi = 0; 551 548 xe->info.skip_guc_pc = 1; 552 549 xe->info.skip_pcode = 1;

+5 -10

drivers/gpu/drm/xe/xe_device.h

··· 15 15 return container_of(dev, struct xe_device, drm); 16 16 } 17 17 18 + static inline struct xe_device *kdev_to_xe_device(struct device *kdev) 19 + { 20 + return dev_get_drvdata(kdev); 21 + } 22 + 18 23 static inline struct xe_device *pdev_to_xe_device(struct pci_dev *pdev) 19 24 { 20 25 return pci_get_drvdata(pdev); ··· 138 133 } 139 134 140 135 void xe_device_assert_mem_access(struct xe_device *xe); 141 - 142 - static inline bool xe_device_in_fault_mode(struct xe_device *xe) 143 - { 144 - return xe->usm.num_vm_in_fault_mode != 0; 145 - } 146 - 147 - static inline bool xe_device_in_non_fault_mode(struct xe_device *xe) 148 - { 149 - return xe->usm.num_vm_in_non_fault_mode != 0; 150 - } 151 136 152 137 static inline bool xe_device_has_flat_ccs(struct xe_device *xe) 153 138 {

+10 -7

drivers/gpu/drm/xe/xe_device_types.h

··· 204 204 struct xe_memirq memirq; 205 205 206 206 /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */ 207 - struct drm_mm_node ggtt_balloon[2]; 207 + struct xe_ggtt_node *ggtt_balloon[2]; 208 208 } vf; 209 209 } sriov; 210 210 ··· 282 282 u8 has_sriov:1; 283 283 /** @info.has_usm: Device has unified shared memory support */ 284 284 u8 has_usm:1; 285 - /** @info.enable_display: display enabled */ 286 - u8 enable_display:1; 285 + /** 286 + * @info.probe_display: Probe display hardware. If set to 287 + * false, the driver will behave as if there is no display 288 + * hardware present and will not try to read/write to it in any 289 + * way. The display hardware, if it exists, will not be 290 + * exposed to userspace and will be left untouched in whatever 291 + * state the firmware or bootloader left it in. 292 + */ 293 + u8 probe_display:1; 287 294 /** @info.skip_mtcfg: skip Multi-Tile configuration from MTCFG register */ 288 295 u8 skip_mtcfg:1; 289 296 /** @info.skip_pcode: skip access to PCODE uC */ ··· 368 361 struct xarray asid_to_vm; 369 362 /** @usm.next_asid: next ASID, used to cyclical alloc asids */ 370 363 u32 next_asid; 371 - /** @usm.num_vm_in_fault_mode: number of VM in fault mode */ 372 - u32 num_vm_in_fault_mode; 373 - /** @usm.num_vm_in_non_fault_mode: number of VM in non-fault mode */ 374 - u32 num_vm_in_non_fault_mode; 375 364 /** @usm.lock: protects UM state */ 376 365 struct mutex lock; 377 366 } usm;

+19 -1

drivers/gpu/drm/xe/xe_exec.c

··· 14 14 #include "xe_bo.h" 15 15 #include "xe_device.h" 16 16 #include "xe_exec_queue.h" 17 + #include "xe_hw_engine_group.h" 17 18 #include "xe_macros.h" 18 19 #include "xe_ring_ops_types.h" 19 20 #include "xe_sched_job.h" ··· 125 124 bool write_locked, skip_retry = false; 126 125 ktime_t end = 0; 127 126 int err = 0; 127 + struct xe_hw_engine_group *group; 128 + enum xe_hw_engine_group_execution_mode mode, previous_mode; 128 129 129 130 if (XE_IOCTL_DBG(xe, args->extensions) || 130 131 XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) || ··· 185 182 } 186 183 } 187 184 185 + group = q->hwe->hw_engine_group; 186 + mode = xe_hw_engine_group_find_exec_mode(q); 187 + 188 + if (mode == EXEC_MODE_DMA_FENCE) { 189 + err = xe_hw_engine_group_get_mode(group, mode, &previous_mode); 190 + if (err) 191 + goto err_syncs; 192 + } 193 + 188 194 retry: 189 195 if (!xe_vm_in_lr_mode(vm) && xe_vm_userptr_check_repin(vm)) { 190 196 err = down_write_killable(&vm->lock); ··· 211 199 downgrade_write(&vm->lock); 212 200 write_locked = false; 213 201 if (err) 214 - goto err_unlock_list; 202 + goto err_hw_exec_mode; 215 203 } 216 204 217 205 if (!args->num_batch_buffer) { ··· 324 312 spin_unlock(&xe->ttm.lru_lock); 325 313 } 326 314 315 + if (mode == EXEC_MODE_LR) 316 + xe_hw_engine_group_resume_faulting_lr_jobs(group); 317 + 327 318 err_repin: 328 319 if (!xe_vm_in_lr_mode(vm)) 329 320 up_read(&vm->userptr.notifier_lock); ··· 339 324 up_read(&vm->lock); 340 325 if (err == -EAGAIN && !skip_retry) 341 326 goto retry; 327 + err_hw_exec_mode: 328 + if (mode == EXEC_MODE_DMA_FENCE) 329 + xe_hw_engine_group_put(group); 342 330 err_syncs: 343 331 while (num_syncs--) 344 332 xe_sync_entry_cleanup(&syncs[num_syncs]);

+117 -96

drivers/gpu/drm/xe/xe_exec_queue.c

··· 14 14 #include "xe_device.h" 15 15 #include "xe_gt.h" 16 16 #include "xe_hw_engine_class_sysfs.h" 17 + #include "xe_hw_engine_group.h" 17 18 #include "xe_hw_fence.h" 18 19 #include "xe_lrc.h" 19 20 #include "xe_macros.h" ··· 74 73 q->ops = gt->exec_queue_ops; 75 74 INIT_LIST_HEAD(&q->lr.link); 76 75 INIT_LIST_HEAD(&q->multi_gt_link); 76 + INIT_LIST_HEAD(&q->hw_engine_group_link); 77 77 78 78 q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; 79 79 q->sched_props.preempt_timeout_us = ··· 168 166 169 167 struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, 170 168 struct xe_vm *vm, 171 - enum xe_engine_class class, u32 flags) 169 + enum xe_engine_class class, 170 + u32 flags, u64 extensions) 172 171 { 173 172 struct xe_hw_engine *hwe, *hwe0 = NULL; 174 173 enum xe_hw_engine_id id; ··· 189 186 if (!logical_mask) 190 187 return ERR_PTR(-ENODEV); 191 188 192 - return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, 0); 189 + return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, extensions); 190 + } 191 + 192 + /** 193 + * xe_exec_queue_create_bind() - Create bind exec queue. 194 + * @xe: Xe device. 195 + * @tile: tile which bind exec queue belongs to. 196 + * @flags: exec queue creation flags 197 + * @extensions: exec queue creation extensions 198 + * 199 + * Normalize bind exec queue creation. Bind exec queue is tied to migration VM 200 + * for access to physical memory required for page table programming. On a 201 + * faulting devices the reserved copy engine instance must be used to avoid 202 + * deadlocking (user binds cannot get stuck behind faults as kernel binds which 203 + * resolve faults depend on user binds). On non-faulting devices any copy engine 204 + * can be used. 205 + * 206 + * Returns exec queue on success, ERR_PTR on failure 207 + */ 208 + struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, 209 + struct xe_tile *tile, 210 + u32 flags, u64 extensions) 211 + { 212 + struct xe_gt *gt = tile->primary_gt; 213 + struct xe_exec_queue *q; 214 + struct xe_vm *migrate_vm; 215 + 216 + migrate_vm = xe_migrate_get_vm(tile->migrate); 217 + if (xe->info.has_usm) { 218 + struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, 219 + XE_ENGINE_CLASS_COPY, 220 + gt->usm.reserved_bcs_instance, 221 + false); 222 + 223 + if (!hwe) 224 + return ERR_PTR(-EINVAL); 225 + 226 + q = xe_exec_queue_create(xe, migrate_vm, 227 + BIT(hwe->logical_instance), 1, hwe, 228 + flags, extensions); 229 + } else { 230 + q = xe_exec_queue_create_class(xe, gt, migrate_vm, 231 + XE_ENGINE_CLASS_COPY, flags, 232 + extensions); 233 + } 234 + xe_vm_put(migrate_vm); 235 + 236 + return q; 193 237 } 194 238 195 239 void xe_exec_queue_destroy(struct kref *ref) ··· 468 418 return 0; 469 419 } 470 420 471 - static const enum xe_engine_class user_to_xe_engine_class[] = { 472 - [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, 473 - [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, 474 - [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, 475 - [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, 476 - [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, 477 - }; 478 - 479 - static struct xe_hw_engine * 480 - find_hw_engine(struct xe_device *xe, 481 - struct drm_xe_engine_class_instance eci) 482 - { 483 - u32 idx; 484 - 485 - if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) 486 - return NULL; 487 - 488 - if (eci.gt_id >= xe->info.gt_count) 489 - return NULL; 490 - 491 - idx = array_index_nospec(eci.engine_class, 492 - ARRAY_SIZE(user_to_xe_engine_class)); 493 - 494 - return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), 495 - user_to_xe_engine_class[idx], 496 - eci.engine_instance, true); 497 - } 498 - 499 - static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt, 500 - struct drm_xe_engine_class_instance *eci, 501 - u16 width, u16 num_placements) 502 - { 503 - struct xe_hw_engine *hwe; 504 - enum xe_hw_engine_id id; 505 - u32 logical_mask = 0; 506 - 507 - if (XE_IOCTL_DBG(xe, width != 1)) 508 - return 0; 509 - if (XE_IOCTL_DBG(xe, num_placements != 1)) 510 - return 0; 511 - if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) 512 - return 0; 513 - 514 - eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY; 515 - 516 - for_each_hw_engine(hwe, gt, id) { 517 - if (xe_hw_engine_is_reserved(hwe)) 518 - continue; 519 - 520 - if (hwe->class == 521 - user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY]) 522 - logical_mask |= BIT(hwe->logical_instance); 523 - } 524 - 525 - return logical_mask; 526 - } 527 - 528 421 static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, 529 422 struct drm_xe_engine_class_instance *eci, 530 423 u16 width, u16 num_placements) ··· 490 497 491 498 n = j * width + i; 492 499 493 - hwe = find_hw_engine(xe, eci[n]); 500 + hwe = xe_hw_engine_lookup(xe, eci[n]); 494 501 if (XE_IOCTL_DBG(xe, !hwe)) 495 502 return 0; 496 503 ··· 529 536 struct drm_xe_engine_class_instance __user *user_eci = 530 537 u64_to_user_ptr(args->instances); 531 538 struct xe_hw_engine *hwe; 532 - struct xe_vm *vm, *migrate_vm; 539 + struct xe_vm *vm; 533 540 struct xe_gt *gt; 541 + struct xe_tile *tile; 534 542 struct xe_exec_queue *q = NULL; 535 543 u32 logical_mask; 536 544 u32 id; ··· 556 562 return -EINVAL; 557 563 558 564 if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { 559 - for_each_gt(gt, xe, id) { 565 + if (XE_IOCTL_DBG(xe, args->width != 1) || 566 + XE_IOCTL_DBG(xe, args->num_placements != 1) || 567 + XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) 568 + return -EINVAL; 569 + 570 + for_each_tile(tile, xe, id) { 560 571 struct xe_exec_queue *new; 561 - u32 flags; 572 + u32 flags = EXEC_QUEUE_FLAG_VM; 562 573 563 - if (xe_gt_is_media_type(gt)) 564 - continue; 574 + if (id) 575 + flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD; 565 576 566 - eci[0].gt_id = gt->info.id; 567 - logical_mask = bind_exec_queue_logical_mask(xe, gt, eci, 568 - args->width, 569 - args->num_placements); 570 - if (XE_IOCTL_DBG(xe, !logical_mask)) 571 - return -EINVAL; 572 - 573 - hwe = find_hw_engine(xe, eci[0]); 574 - if (XE_IOCTL_DBG(xe, !hwe)) 575 - return -EINVAL; 576 - 577 - /* The migration vm doesn't hold rpm ref */ 578 - xe_pm_runtime_get_noresume(xe); 579 - 580 - flags = EXEC_QUEUE_FLAG_VM | (id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0); 581 - 582 - migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate); 583 - new = xe_exec_queue_create(xe, migrate_vm, logical_mask, 584 - args->width, hwe, flags, 585 - args->extensions); 586 - 587 - xe_pm_runtime_put(xe); /* now held by engine */ 588 - 589 - xe_vm_put(migrate_vm); 577 + new = xe_exec_queue_create_bind(xe, tile, flags, 578 + args->extensions); 590 579 if (IS_ERR(new)) { 591 580 err = PTR_ERR(new); 592 581 if (q) ··· 590 613 if (XE_IOCTL_DBG(xe, !logical_mask)) 591 614 return -EINVAL; 592 615 593 - hwe = find_hw_engine(xe, eci[0]); 616 + hwe = xe_hw_engine_lookup(xe, eci[0]); 594 617 if (XE_IOCTL_DBG(xe, !hwe)) 595 618 return -EINVAL; 596 619 ··· 623 646 624 647 err = xe_vm_add_compute_exec_queue(vm, q); 625 648 if (XE_IOCTL_DBG(xe, err)) 649 + goto put_exec_queue; 650 + } 651 + 652 + if (q->vm && q->hwe->hw_engine_group) { 653 + err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q); 654 + if (err) 626 655 goto put_exec_queue; 627 656 } 628 657 } ··· 781 798 xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; 782 799 } 783 800 801 + /** 802 + * xe_exec_queue_kill - permanently stop all execution from an exec queue 803 + * @q: The exec queue 804 + * 805 + * This function permanently stops all activity on an exec queue. If the queue 806 + * is actively executing on the HW, it will be kicked off the engine; any 807 + * pending jobs are discarded and all future submissions are rejected. 808 + * This function is safe to call multiple times. 809 + */ 784 810 void xe_exec_queue_kill(struct xe_exec_queue *q) 785 811 { 786 812 struct xe_exec_queue *eq = q, *next; ··· 822 830 if (XE_IOCTL_DBG(xe, !q)) 823 831 return -ENOENT; 824 832 833 + if (q->vm && q->hwe->hw_engine_group) 834 + xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); 835 + 825 836 xe_exec_queue_kill(q); 826 837 827 838 trace_xe_exec_queue_close(q); ··· 836 841 static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, 837 842 struct xe_vm *vm) 838 843 { 839 - if (q->flags & EXEC_QUEUE_FLAG_VM) 844 + if (q->flags & EXEC_QUEUE_FLAG_VM) { 840 845 lockdep_assert_held(&vm->lock); 841 - else 846 + } else { 842 847 xe_vm_assert_held(vm); 848 + lockdep_assert_held(&q->hwe->hw_engine_group->mode_sem); 849 + } 843 850 } 844 851 845 852 /** ··· 853 856 { 854 857 xe_exec_queue_last_fence_lockdep_assert(q, vm); 855 858 856 - if (q->last_fence) { 857 - dma_fence_put(q->last_fence); 858 - q->last_fence = NULL; 859 - } 859 + xe_exec_queue_last_fence_put_unlocked(q); 860 860 } 861 861 862 862 /** ··· 889 895 if (q->last_fence && 890 896 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) 891 897 xe_exec_queue_last_fence_put(q, vm); 898 + 899 + fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); 900 + dma_fence_get(fence); 901 + return fence; 902 + } 903 + 904 + /** 905 + * xe_exec_queue_last_fence_get_for_resume() - Get last fence 906 + * @q: The exec queue 907 + * @vm: The VM the engine does a bind or exec for 908 + * 909 + * Get last fence, takes a ref. Only safe to be called in the context of 910 + * resuming the hw engine group's long-running exec queue, when the group 911 + * semaphore is held. 912 + * 913 + * Returns: last fence if not signaled, dma fence stub if signaled 914 + */ 915 + struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *q, 916 + struct xe_vm *vm) 917 + { 918 + struct dma_fence *fence; 919 + 920 + lockdep_assert_held_write(&q->hwe->hw_engine_group->mode_sem); 921 + 922 + if (q->last_fence && 923 + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) 924 + xe_exec_queue_last_fence_put_unlocked(q); 892 925 893 926 fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); 894 927 dma_fence_get(fence);

+7 -1

drivers/gpu/drm/xe/xe_exec_queue.h

··· 20 20 u64 extensions); 21 21 struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, 22 22 struct xe_vm *vm, 23 - enum xe_engine_class class, u32 flags); 23 + enum xe_engine_class class, 24 + u32 flags, u64 extensions); 25 + struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, 26 + struct xe_tile *tile, 27 + u32 flags, u64 extensions); 24 28 25 29 void xe_exec_queue_fini(struct xe_exec_queue *q); 26 30 void xe_exec_queue_destroy(struct kref *ref); ··· 77 73 void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *e); 78 74 struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *e, 79 75 struct xe_vm *vm); 76 + struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *e, 77 + struct xe_vm *vm); 80 78 void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm, 81 79 struct dma_fence *fence); 82 80 int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q,

+2

drivers/gpu/drm/xe/xe_exec_queue_types.h

··· 140 140 * Protected by @vm's resv. Unused if @vm == NULL. 141 141 */ 142 142 u64 tlb_flush_seqno; 143 + /** @hw_engine_group_link: link into exec queues in the same hw engine group */ 144 + struct list_head hw_engine_group_link; 143 145 /** @lrc: logical ring context for this exec queue */ 144 146 struct xe_lrc *lrc[]; 145 147 };

+368 -123

drivers/gpu/drm/xe/xe_ggtt.c

··· 30 30 #include "xe_wa.h" 31 31 #include "xe_wopcm.h" 32 32 33 + /** 34 + * DOC: Global Graphics Translation Table (GGTT) 35 + * 36 + * Xe GGTT implements the support for a Global Virtual Address space that is used 37 + * for resources that are accessible to privileged (i.e. kernel-mode) processes, 38 + * and not tied to a specific user-level process. For example, the Graphics 39 + * micro-Controller (GuC) and Display Engine (if present) utilize this Global 40 + * address space. 41 + * 42 + * The Global GTT (GGTT) translates from the Global virtual address to a physical 43 + * address that can be accessed by HW. The GGTT is a flat, single-level table. 44 + * 45 + * Xe implements a simplified version of the GGTT specifically managing only a 46 + * certain range of it that goes from the Write Once Protected Content Memory (WOPCM) 47 + * Layout to a predefined GUC_GGTT_TOP. This approach avoids complications related to 48 + * the GuC (Graphics Microcontroller) hardware limitations. The GuC address space 49 + * is limited on both ends of the GGTT, because the GuC shim HW redirects 50 + * accesses to those addresses to other HW areas instead of going through the 51 + * GGTT. On the bottom end, the GuC can't access offsets below the WOPCM size, 52 + * while on the top side the limit is fixed at GUC_GGTT_TOP. To keep things 53 + * simple, instead of checking each object to see if they are accessed by GuC or 54 + * not, we just exclude those areas from the allocator. Additionally, to simplify 55 + * the driver load, we use the maximum WOPCM size in this logic instead of the 56 + * programmed one, so we don't need to wait until the actual size to be 57 + * programmed is determined (which requires FW fetch) before initializing the 58 + * GGTT. These simplifications might waste space in the GGTT (about 20-25 MBs 59 + * depending on the platform) but we can live with this. Another benefit of this 60 + * is the GuC bootrom can't access anything below the WOPCM max size so anything 61 + * the bootrom needs to access (e.g. a RSA key) needs to be placed in the GGTT 62 + * above the WOPCM max size. Starting the GGTT allocations above the WOPCM max 63 + * give us the correct placement for free. 64 + */ 65 + 33 66 static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 34 67 u16 pat_index) 35 68 { ··· 161 128 { 162 129 struct xe_ggtt *ggtt = arg; 163 130 131 + destroy_workqueue(ggtt->wq); 164 132 mutex_destroy(&ggtt->lock); 165 133 drm_mm_takedown(&ggtt->mm); 166 134 } 167 135 168 - static void ggtt_fini(struct drm_device *drm, void *arg) 136 + static void ggtt_fini(void *arg) 169 137 { 170 138 struct xe_ggtt *ggtt = arg; 171 139 ··· 198 164 .ggtt_set_pte = xe_ggtt_set_pte_and_flush, 199 165 }; 200 166 201 - /* 202 - * Early GGTT initialization, which allows to create new mappings usable by the 203 - * GuC. 204 - * Mappings are not usable by the HW engines, as it doesn't have scratch / 167 + /** 168 + * xe_ggtt_init_early - Early GGTT initialization 169 + * @ggtt: the &xe_ggtt to be initialized 170 + * 171 + * It allows to create new mappings usable by the GuC. 172 + * Mappings are not usable by the HW engines, as it doesn't have scratch nor 205 173 * initial clear done to it yet. That will happen in the regular, non-early 206 - * GGTT init. 174 + * GGTT initialization. 175 + * 176 + * Return: 0 on success or a negative error code on failure. 207 177 */ 208 178 int xe_ggtt_init_early(struct xe_ggtt *ggtt) 209 179 { ··· 232 194 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 233 195 ggtt->flags |= XE_GGTT_FLAGS_64K; 234 196 235 - /* 236 - * 8B per entry, each points to a 4KB page. 237 - * 238 - * The GuC address space is limited on both ends of the GGTT, because 239 - * the GuC shim HW redirects accesses to those addresses to other HW 240 - * areas instead of going through the GGTT. On the bottom end, the GuC 241 - * can't access offsets below the WOPCM size, while on the top side the 242 - * limit is fixed at GUC_GGTT_TOP. To keep things simple, instead of 243 - * checking each object to see if they are accessed by GuC or not, we 244 - * just exclude those areas from the allocator. Additionally, to 245 - * simplify the driver load, we use the maximum WOPCM size in this logic 246 - * instead of the programmed one, so we don't need to wait until the 247 - * actual size to be programmed is determined (which requires FW fetch) 248 - * before initializing the GGTT. These simplifications might waste space 249 - * in the GGTT (about 20-25 MBs depending on the platform) but we can 250 - * live with this. 251 - * 252 - * Another benifit of this is the GuC bootrom can't access anything 253 - * below the WOPCM max size so anything the bootom needs to access (e.g. 254 - * a RSA key) needs to be placed in the GGTT above the WOPCM max size. 255 - * Starting the GGTT allocations above the WOPCM max give us the correct 256 - * placement for free. 257 - */ 258 197 if (ggtt->size > GUC_GGTT_TOP) 259 198 ggtt->size = GUC_GGTT_TOP; 260 199 ··· 242 227 &xelpg_pt_wa_ops : &xelpg_pt_ops; 243 228 else 244 229 ggtt->pt_ops = &xelp_pt_ops; 230 + 231 + ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, 0); 245 232 246 233 drm_mm_init(&ggtt->mm, xe_wopcm_size(xe), 247 234 ggtt->size - xe_wopcm_size(xe)); ··· 279 262 mutex_unlock(&ggtt->lock); 280 263 } 281 264 265 + static void ggtt_node_remove(struct xe_ggtt_node *node) 266 + { 267 + struct xe_ggtt *ggtt = node->ggtt; 268 + struct xe_device *xe = tile_to_xe(ggtt->tile); 269 + bool bound; 270 + int idx; 271 + 272 + bound = drm_dev_enter(&xe->drm, &idx); 273 + 274 + mutex_lock(&ggtt->lock); 275 + if (bound) 276 + xe_ggtt_clear(ggtt, node->base.start, node->base.size); 277 + drm_mm_remove_node(&node->base); 278 + node->base.size = 0; 279 + mutex_unlock(&ggtt->lock); 280 + 281 + if (!bound) 282 + goto free_node; 283 + 284 + if (node->invalidate_on_remove) 285 + xe_ggtt_invalidate(ggtt); 286 + 287 + drm_dev_exit(idx); 288 + 289 + free_node: 290 + xe_ggtt_node_fini(node); 291 + } 292 + 293 + static void ggtt_node_remove_work_func(struct work_struct *work) 294 + { 295 + struct xe_ggtt_node *node = container_of(work, typeof(*node), 296 + delayed_removal_work); 297 + struct xe_device *xe = tile_to_xe(node->ggtt->tile); 298 + 299 + xe_pm_runtime_get(xe); 300 + ggtt_node_remove(node); 301 + xe_pm_runtime_put(xe); 302 + } 303 + 304 + /** 305 + * xe_ggtt_node_remove - Remove a &xe_ggtt_node from the GGTT 306 + * @node: the &xe_ggtt_node to be removed 307 + * @invalidate: if node needs invalidation upon removal 308 + */ 309 + void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate) 310 + { 311 + struct xe_ggtt *ggtt; 312 + struct xe_device *xe; 313 + 314 + if (!node || !node->ggtt) 315 + return; 316 + 317 + ggtt = node->ggtt; 318 + xe = tile_to_xe(ggtt->tile); 319 + 320 + node->invalidate_on_remove = invalidate; 321 + 322 + if (xe_pm_runtime_get_if_active(xe)) { 323 + ggtt_node_remove(node); 324 + xe_pm_runtime_put(xe); 325 + } else { 326 + queue_work(ggtt->wq, &node->delayed_removal_work); 327 + } 328 + } 329 + 330 + /** 331 + * xe_ggtt_init - Regular non-early GGTT initialization 332 + * @ggtt: the &xe_ggtt to be initialized 333 + * 334 + * Return: 0 on success or a negative error code on failure. 335 + */ 282 336 int xe_ggtt_init(struct xe_ggtt *ggtt) 283 337 { 284 338 struct xe_device *xe = tile_to_xe(ggtt->tile); ··· 377 289 378 290 xe_ggtt_initial_clear(ggtt); 379 291 380 - return drmm_add_action_or_reset(&xe->drm, ggtt_fini, ggtt); 292 + return devm_add_action_or_reset(xe->drm.dev, ggtt_fini, ggtt); 381 293 err: 382 294 ggtt->scratch = NULL; 383 295 return err; ··· 402 314 ggtt_invalidate_gt_tlb(ggtt->tile->media_gt); 403 315 } 404 316 405 - void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix) 406 - { 407 - u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; 408 - u64 addr, scratch_pte; 409 - 410 - scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, pat_index); 411 - 412 - printk("%sGlobal GTT:", prefix); 413 - for (addr = 0; addr < ggtt->size; addr += XE_PAGE_SIZE) { 414 - unsigned int i = addr / XE_PAGE_SIZE; 415 - 416 - xe_tile_assert(ggtt->tile, addr <= U32_MAX); 417 - if (ggtt->gsm[i] == scratch_pte) 418 - continue; 419 - 420 - printk("%s ggtt[0x%08x] = 0x%016llx", 421 - prefix, (u32)addr, ggtt->gsm[i]); 422 - } 423 - } 424 - 425 317 static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, 426 318 const struct drm_mm_node *node, const char *description) 427 319 { ··· 415 347 } 416 348 417 349 /** 418 - * xe_ggtt_balloon - prevent allocation of specified GGTT addresses 419 - * @ggtt: the &xe_ggtt where we want to make reservation 350 + * xe_ggtt_node_insert_balloon - prevent allocation of specified GGTT addresses 351 + * @node: the &xe_ggtt_node to hold reserved GGTT node 420 352 * @start: the starting GGTT address of the reserved region 421 353 * @end: then end GGTT address of the reserved region 422 - * @node: the &drm_mm_node to hold reserved GGTT node 423 354 * 424 - * Use xe_ggtt_deballoon() to release a reserved GGTT node. 355 + * Use xe_ggtt_node_remove_balloon() to release a reserved GGTT node. 425 356 * 426 357 * Return: 0 on success or a negative error code on failure. 427 358 */ 428 - int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 end, struct drm_mm_node *node) 359 + int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, u64 start, u64 end) 429 360 { 361 + struct xe_ggtt *ggtt = node->ggtt; 430 362 int err; 431 363 432 364 xe_tile_assert(ggtt->tile, start < end); 433 365 xe_tile_assert(ggtt->tile, IS_ALIGNED(start, XE_PAGE_SIZE)); 434 366 xe_tile_assert(ggtt->tile, IS_ALIGNED(end, XE_PAGE_SIZE)); 435 - xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(node)); 367 + xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(&node->base)); 436 368 437 - node->color = 0; 438 - node->start = start; 439 - node->size = end - start; 369 + node->base.color = 0; 370 + node->base.start = start; 371 + node->base.size = end - start; 440 372 441 373 mutex_lock(&ggtt->lock); 442 - err = drm_mm_reserve_node(&ggtt->mm, node); 374 + err = drm_mm_reserve_node(&ggtt->mm, &node->base); 443 375 mutex_unlock(&ggtt->lock); 444 376 445 377 if (xe_gt_WARN(ggtt->tile->primary_gt, err, 446 378 "Failed to balloon GGTT %#llx-%#llx (%pe)\n", 447 - node->start, node->start + node->size, ERR_PTR(err))) 379 + node->base.start, node->base.start + node->base.size, ERR_PTR(err))) 448 380 return err; 449 381 450 - xe_ggtt_dump_node(ggtt, node, "balloon"); 382 + xe_ggtt_dump_node(ggtt, &node->base, "balloon"); 451 383 return 0; 452 384 } 453 385 454 386 /** 455 - * xe_ggtt_deballoon - release a reserved GGTT region 456 - * @ggtt: the &xe_ggtt where reserved node belongs 457 - * @node: the &drm_mm_node with reserved GGTT region 387 + * xe_ggtt_node_remove_balloon - release a reserved GGTT region 388 + * @node: the &xe_ggtt_node with reserved GGTT region 458 389 * 459 - * See xe_ggtt_balloon() for details. 390 + * See xe_ggtt_node_insert_balloon() for details. 460 391 */ 461 - void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node) 392 + void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node) 462 393 { 463 - if (!drm_mm_node_allocated(node)) 394 + if (!node || !node->ggtt) 464 395 return; 465 396 466 - xe_ggtt_dump_node(ggtt, node, "deballoon"); 397 + if (!drm_mm_node_allocated(&node->base)) 398 + goto free_node; 467 399 468 - mutex_lock(&ggtt->lock); 469 - drm_mm_remove_node(node); 470 - mutex_unlock(&ggtt->lock); 400 + xe_ggtt_dump_node(node->ggtt, &node->base, "remove-balloon"); 401 + 402 + mutex_lock(&node->ggtt->lock); 403 + drm_mm_remove_node(&node->base); 404 + mutex_unlock(&node->ggtt->lock); 405 + 406 + free_node: 407 + xe_ggtt_node_fini(node); 471 408 } 472 409 473 - int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node, 474 - u32 size, u32 align, u32 mm_flags) 410 + /** 411 + * xe_ggtt_node_insert_locked - Locked version to insert a &xe_ggtt_node into the GGTT 412 + * @node: the &xe_ggtt_node to be inserted 413 + * @size: size of the node 414 + * @align: alignment constrain of the node 415 + * @mm_flags: flags to control the node behavior 416 + * 417 + * It cannot be called without first having called xe_ggtt_init() once. 418 + * To be used in cases where ggtt->lock is already taken. 419 + * 420 + * Return: 0 on success or a negative error code on failure. 421 + */ 422 + int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node, 423 + u32 size, u32 align, u32 mm_flags) 475 424 { 476 - return drm_mm_insert_node_generic(&ggtt->mm, node, size, align, 0, 425 + return drm_mm_insert_node_generic(&node->ggtt->mm, &node->base, size, align, 0, 477 426 mm_flags); 478 427 } 479 428 480 - int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, 481 - u32 size, u32 align) 429 + /** 430 + * xe_ggtt_node_insert - Insert a &xe_ggtt_node into the GGTT 431 + * @node: the &xe_ggtt_node to be inserted 432 + * @size: size of the node 433 + * @align: alignment constrain of the node 434 + * 435 + * It cannot be called without first having called xe_ggtt_init() once. 436 + * 437 + * Return: 0 on success or a negative error code on failure. 438 + */ 439 + int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align) 482 440 { 483 441 int ret; 484 442 485 - mutex_lock(&ggtt->lock); 486 - ret = xe_ggtt_insert_special_node_locked(ggtt, node, size, 487 - align, DRM_MM_INSERT_HIGH); 488 - mutex_unlock(&ggtt->lock); 443 + if (!node || !node->ggtt) 444 + return -ENOENT; 445 + 446 + mutex_lock(&node->ggtt->lock); 447 + ret = xe_ggtt_node_insert_locked(node, size, align, 448 + DRM_MM_INSERT_HIGH); 449 + mutex_unlock(&node->ggtt->lock); 489 450 490 451 return ret; 491 452 } 492 453 454 + /** 455 + * xe_ggtt_node_init - Initialize %xe_ggtt_node struct 456 + * @ggtt: the &xe_ggtt where the new node will later be inserted/reserved. 457 + * 458 + * This function will allocated the struct %xe_ggtt_node and return it's pointer. 459 + * This struct will then be freed after the node removal upon xe_ggtt_node_remove() 460 + * or xe_ggtt_node_remove_balloon(). 461 + * Having %xe_ggtt_node struct allocated doesn't mean that the node is already allocated 462 + * in GGTT. Only the xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), 463 + * xe_ggtt_node_insert_balloon() will ensure the node is inserted or reserved in GGTT. 464 + * 465 + * Return: A pointer to %xe_ggtt_node struct on success. An ERR_PTR otherwise. 466 + **/ 467 + struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt) 468 + { 469 + struct xe_ggtt_node *node = kzalloc(sizeof(*node), GFP_NOFS); 470 + 471 + if (!node) 472 + return ERR_PTR(-ENOMEM); 473 + 474 + INIT_WORK(&node->delayed_removal_work, ggtt_node_remove_work_func); 475 + node->ggtt = ggtt; 476 + 477 + return node; 478 + } 479 + 480 + /** 481 + * xe_ggtt_node_fini - Forcebly finalize %xe_ggtt_node struct 482 + * @node: the &xe_ggtt_node to be freed 483 + * 484 + * If anything went wrong with either xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), 485 + * or xe_ggtt_node_insert_balloon(); and this @node is not going to be reused, then, 486 + * this function needs to be called to free the %xe_ggtt_node struct 487 + **/ 488 + void xe_ggtt_node_fini(struct xe_ggtt_node *node) 489 + { 490 + kfree(node); 491 + } 492 + 493 + /** 494 + * xe_ggtt_node_allocated - Check if node is allocated in GGTT 495 + * @node: the &xe_ggtt_node to be inspected 496 + * 497 + * Return: True if allocated, False otherwise. 498 + */ 499 + bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node) 500 + { 501 + if (!node || !node->ggtt) 502 + return false; 503 + 504 + return drm_mm_node_allocated(&node->base); 505 + } 506 + 507 + /** 508 + * xe_ggtt_map_bo - Map the BO into GGTT 509 + * @ggtt: the &xe_ggtt where node will be mapped 510 + * @bo: the &xe_bo to be mapped 511 + */ 493 512 void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) 494 513 { 495 514 u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; 496 515 u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; 497 - u64 start = bo->ggtt_node.start; 516 + u64 start; 498 517 u64 offset, pte; 518 + 519 + if (XE_WARN_ON(!bo->ggtt_node)) 520 + return; 521 + 522 + start = bo->ggtt_node->base.start; 499 523 500 524 for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { 501 525 pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index); ··· 604 444 if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) 605 445 alignment = SZ_64K; 606 446 607 - if (XE_WARN_ON(bo->ggtt_node.size)) { 447 + if (XE_WARN_ON(bo->ggtt_node)) { 608 448 /* Someone's already inserted this BO in the GGTT */ 609 - xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size); 449 + xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size); 610 450 return 0; 611 451 } 612 452 ··· 615 455 return err; 616 456 617 457 xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); 458 + 459 + bo->ggtt_node = xe_ggtt_node_init(ggtt); 460 + if (IS_ERR(bo->ggtt_node)) { 461 + err = PTR_ERR(bo->ggtt_node); 462 + goto out; 463 + } 464 + 618 465 mutex_lock(&ggtt->lock); 619 - err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size, 466 + err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node->base, bo->size, 620 467 alignment, 0, start, end, 0); 621 - if (!err) 468 + if (err) 469 + xe_ggtt_node_fini(bo->ggtt_node); 470 + else 622 471 xe_ggtt_map_bo(ggtt, bo); 623 472 mutex_unlock(&ggtt->lock); 624 473 625 474 if (!err && bo->flags & XE_BO_FLAG_GGTT_INVALIDATE) 626 475 xe_ggtt_invalidate(ggtt); 476 + 477 + out: 627 478 xe_pm_runtime_put(tile_to_xe(ggtt->tile)); 628 479 629 480 return err; 630 481 } 631 482 483 + /** 484 + * xe_ggtt_insert_bo_at - Insert BO at a specific GGTT space 485 + * @ggtt: the &xe_ggtt where bo will be inserted 486 + * @bo: the &xe_bo to be inserted 487 + * @start: address where it will be inserted 488 + * @end: end of the range where it will be inserted 489 + * 490 + * Return: 0 on success or a negative error code on failure. 491 + */ 632 492 int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, 633 493 u64 start, u64 end) 634 494 { 635 495 return __xe_ggtt_insert_bo_at(ggtt, bo, start, end); 636 496 } 637 497 498 + /** 499 + * xe_ggtt_insert_bo - Insert BO into GGTT 500 + * @ggtt: the &xe_ggtt where bo will be inserted 501 + * @bo: the &xe_bo to be inserted 502 + * 503 + * Return: 0 on success or a negative error code on failure. 504 + */ 638 505 int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) 639 506 { 640 507 return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX); 641 508 } 642 509 643 - void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, 644 - bool invalidate) 645 - { 646 - struct xe_device *xe = tile_to_xe(ggtt->tile); 647 - bool bound; 648 - int idx; 649 - 650 - bound = drm_dev_enter(&xe->drm, &idx); 651 - if (bound) 652 - xe_pm_runtime_get_noresume(xe); 653 - 654 - mutex_lock(&ggtt->lock); 655 - if (bound) 656 - xe_ggtt_clear(ggtt, node->start, node->size); 657 - drm_mm_remove_node(node); 658 - node->size = 0; 659 - mutex_unlock(&ggtt->lock); 660 - 661 - if (!bound) 662 - return; 663 - 664 - if (invalidate) 665 - xe_ggtt_invalidate(ggtt); 666 - 667 - xe_pm_runtime_put(xe); 668 - drm_dev_exit(idx); 669 - } 670 - 510 + /** 511 + * xe_ggtt_remove_bo - Remove a BO from the GGTT 512 + * @ggtt: the &xe_ggtt where node will be removed 513 + * @bo: the &xe_bo to be removed 514 + */ 671 515 void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) 672 516 { 673 - if (XE_WARN_ON(!bo->ggtt_node.size)) 517 + if (XE_WARN_ON(!bo->ggtt_node)) 674 518 return; 675 519 676 520 /* This BO is not currently in the GGTT */ 677 - xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size); 521 + xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size); 678 522 679 - xe_ggtt_remove_node(ggtt, &bo->ggtt_node, 523 + xe_ggtt_node_remove(bo->ggtt_node, 680 524 bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); 525 + } 526 + 527 + /** 528 + * xe_ggtt_largest_hole - Largest GGTT hole 529 + * @ggtt: the &xe_ggtt that will be inspected 530 + * @alignment: minimum alignment 531 + * @spare: If not NULL: in: desired memory size to be spared / out: Adjusted possible spare 532 + * 533 + * Return: size of the largest continuous GGTT region 534 + */ 535 + u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare) 536 + { 537 + const struct drm_mm *mm = &ggtt->mm; 538 + const struct drm_mm_node *entry; 539 + u64 hole_min_start = xe_wopcm_size(tile_to_xe(ggtt->tile)); 540 + u64 hole_start, hole_end, hole_size; 541 + u64 max_hole = 0; 542 + 543 + mutex_lock(&ggtt->lock); 544 + 545 + drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { 546 + hole_start = max(hole_start, hole_min_start); 547 + hole_start = ALIGN(hole_start, alignment); 548 + hole_end = ALIGN_DOWN(hole_end, alignment); 549 + if (hole_start >= hole_end) 550 + continue; 551 + hole_size = hole_end - hole_start; 552 + if (spare) 553 + *spare -= min3(*spare, hole_size, max_hole); 554 + max_hole = max(max_hole, hole_size); 555 + } 556 + 557 + mutex_unlock(&ggtt->lock); 558 + 559 + return max_hole; 681 560 } 682 561 683 562 #ifdef CONFIG_PCI_IOV ··· 747 548 748 549 /** 749 550 * xe_ggtt_assign - assign a GGTT region to the VF 750 - * @ggtt: the &xe_ggtt where the node belongs 751 - * @node: the &drm_mm_node to update 551 + * @node: the &xe_ggtt_node to update 752 552 * @vfid: the VF identifier 753 553 * 754 554 * This function is used by the PF driver to assign a GGTT region to the VF. 755 555 * In addition to PTE's VFID bits 11:2 also PRESENT bit 0 is set as on some 756 556 * platforms VFs can't modify that either. 757 557 */ 758 - void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid) 558 + void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid) 759 559 { 760 - mutex_lock(&ggtt->lock); 761 - xe_ggtt_assign_locked(ggtt, node, vfid); 762 - mutex_unlock(&ggtt->lock); 560 + mutex_lock(&node->ggtt->lock); 561 + xe_ggtt_assign_locked(node->ggtt, &node->base, vfid); 562 + mutex_unlock(&node->ggtt->lock); 763 563 } 764 564 #endif 765 565 566 + /** 567 + * xe_ggtt_dump - Dump GGTT for debug 568 + * @ggtt: the &xe_ggtt to be dumped 569 + * @p: the &drm_mm_printer helper handle to be used to dump the information 570 + * 571 + * Return: 0 on success or a negative error code on failure. 572 + */ 766 573 int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) 767 574 { 768 575 int err; ··· 780 575 drm_mm_print(&ggtt->mm, p); 781 576 mutex_unlock(&ggtt->lock); 782 577 return err; 578 + } 579 + 580 + /** 581 + * xe_ggtt_print_holes - Print holes 582 + * @ggtt: the &xe_ggtt to be inspected 583 + * @alignment: min alignment 584 + * @p: the &drm_printer 585 + * 586 + * Print GGTT ranges that are available and return total size available. 587 + * 588 + * Return: Total available size. 589 + */ 590 + u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer *p) 591 + { 592 + const struct drm_mm *mm = &ggtt->mm; 593 + const struct drm_mm_node *entry; 594 + u64 hole_min_start = xe_wopcm_size(tile_to_xe(ggtt->tile)); 595 + u64 hole_start, hole_end, hole_size; 596 + u64 total = 0; 597 + char buf[10]; 598 + 599 + mutex_lock(&ggtt->lock); 600 + 601 + drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { 602 + hole_start = max(hole_start, hole_min_start); 603 + hole_start = ALIGN(hole_start, alignment); 604 + hole_end = ALIGN_DOWN(hole_end, alignment); 605 + if (hole_start >= hole_end) 606 + continue; 607 + hole_size = hole_end - hole_start; 608 + total += hole_size; 609 + 610 + string_get_size(hole_size, 1, STRING_UNITS_2, buf, sizeof(buf)); 611 + drm_printf(p, "range:\t%#llx-%#llx\t(%s)\n", 612 + hole_start, hole_end - 1, buf); 613 + } 614 + 615 + mutex_unlock(&ggtt->lock); 616 + 617 + return total; 783 618 }

+13 -11

drivers/gpu/drm/xe/xe_ggtt.h

··· 12 12 13 13 int xe_ggtt_init_early(struct xe_ggtt *ggtt); 14 14 int xe_ggtt_init(struct xe_ggtt *ggtt); 15 - void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix); 16 15 17 - int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 size, struct drm_mm_node *node); 18 - void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node); 16 + struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt); 17 + void xe_ggtt_node_fini(struct xe_ggtt_node *node); 18 + int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, 19 + u64 start, u64 size); 20 + void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node); 19 21 20 - int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, 21 - u32 size, u32 align); 22 - int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, 23 - struct drm_mm_node *node, 24 - u32 size, u32 align, u32 mm_flags); 25 - void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, 26 - bool invalidate); 22 + int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align); 23 + int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node, 24 + u32 size, u32 align, u32 mm_flags); 25 + void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate); 26 + bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node); 27 27 void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); 28 28 int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); 29 29 int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, 30 30 u64 start, u64 end); 31 31 void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); 32 + u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare); 32 33 33 34 int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p); 35 + u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer *p); 34 36 35 37 #ifdef CONFIG_PCI_IOV 36 - void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid); 38 + void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid); 37 39 #endif 38 40 39 41 #endif

+47 -7

drivers/gpu/drm/xe/xe_ggtt_types.h

··· 13 13 struct xe_bo; 14 14 struct xe_gt; 15 15 16 + /** 17 + * struct xe_ggtt - Main GGTT struct 18 + * 19 + * In general, each tile can contains its own Global Graphics Translation Table 20 + * (GGTT) instance. 21 + */ 16 22 struct xe_ggtt { 23 + /** @tile: Back pointer to tile where this GGTT belongs */ 17 24 struct xe_tile *tile; 18 - 25 + /** @size: Total size of this GGTT */ 19 26 u64 size; 20 27 21 28 #define XE_GGTT_FLAGS_64K BIT(0) 29 + /** 30 + * @flags: Flags for this GGTT 31 + * Acceptable flags: 32 + * - %XE_GGTT_FLAGS_64K - if PTE size is 64K. Otherwise, regular is 4K. 33 + */ 22 34 unsigned int flags; 23 - 35 + /** @scratch: Internal object allocation used as a scratch page */ 24 36 struct xe_bo *scratch; 25 - 37 + /** @lock: Mutex lock to protect GGTT data */ 26 38 struct mutex lock; 27 - 39 + /** 40 + * @gsm: The iomem pointer to the actual location of the translation 41 + * table located in the GSM for easy PTE manipulation 42 + */ 28 43 u64 __iomem *gsm; 29 - 44 + /** @pt_ops: Page Table operations per platform */ 30 45 const struct xe_ggtt_pt_ops *pt_ops; 31 - 46 + /** @mm: The memory manager used to manage individual GGTT allocations */ 32 47 struct drm_mm mm; 33 - 34 48 /** @access_count: counts GGTT writes */ 35 49 unsigned int access_count; 50 + /** @wq: Dedicated unordered work queue to process node removals */ 51 + struct workqueue_struct *wq; 36 52 }; 37 53 54 + /** 55 + * struct xe_ggtt_node - A node in GGTT. 56 + * 57 + * This struct needs to be initialized (only-once) with xe_ggtt_node_init() before any node 58 + * insertion, reservation, or 'ballooning'. 59 + * It will, then, be finalized by either xe_ggtt_node_remove() or xe_ggtt_node_deballoon(). 60 + */ 61 + struct xe_ggtt_node { 62 + /** @ggtt: Back pointer to xe_ggtt where this region will be inserted at */ 63 + struct xe_ggtt *ggtt; 64 + /** @base: A drm_mm_node */ 65 + struct drm_mm_node base; 66 + /** @delayed_removal_work: The work struct for the delayed removal */ 67 + struct work_struct delayed_removal_work; 68 + /** @invalidate_on_remove: If it needs invalidation upon removal */ 69 + bool invalidate_on_remove; 70 + }; 71 + 72 + /** 73 + * struct xe_ggtt_pt_ops - GGTT Page table operations 74 + * Which can vary from platform to platform. 75 + */ 38 76 struct xe_ggtt_pt_ops { 77 + /** @pte_encode_bo: Encode PTE address for a given BO */ 39 78 u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index); 79 + /** @ggtt_set_pte: Directly write into GGTT's PTE */ 40 80 void (*ggtt_set_pte)(struct xe_ggtt *ggtt, u64 addr, u64 pte); 41 81 }; 42 82

+15 -8

drivers/gpu/drm/xe/xe_gpu_scheduler.c

··· 15 15 { 16 16 struct xe_sched_msg *msg; 17 17 18 - spin_lock(&sched->base.job_list_lock); 18 + xe_sched_msg_lock(sched); 19 19 msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link); 20 20 if (msg) 21 21 xe_sched_process_msg_queue(sched); 22 - spin_unlock(&sched->base.job_list_lock); 22 + xe_sched_msg_unlock(sched); 23 23 } 24 24 25 25 static struct xe_sched_msg * ··· 27 27 { 28 28 struct xe_sched_msg *msg; 29 29 30 - spin_lock(&sched->base.job_list_lock); 30 + xe_sched_msg_lock(sched); 31 31 msg = list_first_entry_or_null(&sched->msgs, 32 32 struct xe_sched_msg, link); 33 33 if (msg) 34 - list_del(&msg->link); 35 - spin_unlock(&sched->base.job_list_lock); 34 + list_del_init(&msg->link); 35 + xe_sched_msg_unlock(sched); 36 36 37 37 return msg; 38 38 } ··· 93 93 void xe_sched_add_msg(struct xe_gpu_scheduler *sched, 94 94 struct xe_sched_msg *msg) 95 95 { 96 - spin_lock(&sched->base.job_list_lock); 97 - list_add_tail(&msg->link, &sched->msgs); 98 - spin_unlock(&sched->base.job_list_lock); 96 + xe_sched_msg_lock(sched); 97 + xe_sched_add_msg_locked(sched, msg); 98 + xe_sched_msg_unlock(sched); 99 + } 99 100 101 + void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched, 102 + struct xe_sched_msg *msg) 103 + { 104 + lockdep_assert_held(&sched->base.job_list_lock); 105 + 106 + list_add_tail(&msg->link, &sched->msgs); 100 107 xe_sched_process_msg_queue(sched); 101 108 }

+12

drivers/gpu/drm/xe/xe_gpu_scheduler.h

··· 24 24 25 25 void xe_sched_add_msg(struct xe_gpu_scheduler *sched, 26 26 struct xe_sched_msg *msg); 27 + void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched, 28 + struct xe_sched_msg *msg); 29 + 30 + static inline void xe_sched_msg_lock(struct xe_gpu_scheduler *sched) 31 + { 32 + spin_lock(&sched->base.job_list_lock); 33 + } 34 + 35 + static inline void xe_sched_msg_unlock(struct xe_gpu_scheduler *sched) 36 + { 37 + spin_unlock(&sched->base.job_list_lock); 38 + } 27 39 28 40 static inline void xe_sched_stop(struct xe_gpu_scheduler *sched) 29 41 {

+3 -9

drivers/gpu/drm/xe/xe_gsc.c

··· 450 450 xe_exec_queue_put(gsc->q); 451 451 gsc->q = NULL; 452 452 } 453 - 454 - if (gsc->private) { 455 - xe_bo_unpin_map_no_vm(gsc->private); 456 - gsc->private = NULL; 457 - } 458 453 } 459 454 460 455 int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) ··· 469 474 if (!hwe) 470 475 return -ENODEV; 471 476 472 - bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M, 473 - ttm_bo_type_kernel, 474 - XE_BO_FLAG_STOLEN | 475 - XE_BO_FLAG_GGTT); 477 + bo = xe_managed_bo_create_pin_map(xe, tile, SZ_4M, 478 + XE_BO_FLAG_STOLEN | 479 + XE_BO_FLAG_GGTT); 476 480 if (IS_ERR(bo)) 477 481 return PTR_ERR(bo); 478 482

+8 -37

drivers/gpu/drm/xe/xe_gsc_proxy.c

··· 62 62 return container_of(gsc, struct xe_gt, uc.gsc); 63 63 } 64 64 65 - static inline struct xe_device *kdev_to_xe(struct device *kdev) 66 - { 67 - return dev_get_drvdata(kdev); 68 - } 69 - 70 65 bool xe_gsc_proxy_init_done(struct xe_gsc *gsc) 71 66 { 72 67 struct xe_gt *gt = gsc_to_gt(gsc); ··· 340 345 static int xe_gsc_proxy_component_bind(struct device *xe_kdev, 341 346 struct device *mei_kdev, void *data) 342 347 { 343 - struct xe_device *xe = kdev_to_xe(xe_kdev); 348 + struct xe_device *xe = kdev_to_xe_device(xe_kdev); 344 349 struct xe_gt *gt = xe->tiles[0].media_gt; 345 350 struct xe_gsc *gsc = &gt->uc.gsc; 346 351 ··· 355 360 static void xe_gsc_proxy_component_unbind(struct device *xe_kdev, 356 361 struct device *mei_kdev, void *data) 357 362 { 358 - struct xe_device *xe = kdev_to_xe(xe_kdev); 363 + struct xe_device *xe = kdev_to_xe_device(xe_kdev); 359 364 struct xe_gt *gt = xe->tiles[0].media_gt; 360 365 struct xe_gsc *gsc = &gt->uc.gsc; 361 366 ··· 371 376 .unbind = xe_gsc_proxy_component_unbind, 372 377 }; 373 378 374 - static void proxy_channel_free(struct drm_device *drm, void *arg) 375 - { 376 - struct xe_gsc *gsc = arg; 377 - 378 - if (!gsc->proxy.bo) 379 - return; 380 - 381 - if (gsc->proxy.to_csme) { 382 - kfree(gsc->proxy.to_csme); 383 - gsc->proxy.to_csme = NULL; 384 - gsc->proxy.from_csme = NULL; 385 - } 386 - 387 - if (gsc->proxy.bo) { 388 - iosys_map_clear(&gsc->proxy.to_gsc); 389 - iosys_map_clear(&gsc->proxy.from_gsc); 390 - xe_bo_unpin_map_no_vm(gsc->proxy.bo); 391 - gsc->proxy.bo = NULL; 392 - } 393 - } 394 - 395 379 static int proxy_channel_alloc(struct xe_gsc *gsc) 396 380 { 397 381 struct xe_gt *gt = gsc_to_gt(gsc); ··· 379 405 struct xe_bo *bo; 380 406 void *csme; 381 407 382 - csme = kzalloc(GSC_PROXY_CHANNEL_SIZE, GFP_KERNEL); 408 + csme = drmm_kzalloc(&xe->drm, GSC_PROXY_CHANNEL_SIZE, GFP_KERNEL); 383 409 if (!csme) 384 410 return -ENOMEM; 385 411 386 - bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_PROXY_CHANNEL_SIZE, 387 - ttm_bo_type_kernel, 388 - XE_BO_FLAG_SYSTEM | 389 - XE_BO_FLAG_GGTT); 390 - if (IS_ERR(bo)) { 391 - kfree(csme); 412 + bo = xe_managed_bo_create_pin_map(xe, tile, GSC_PROXY_CHANNEL_SIZE, 413 + XE_BO_FLAG_SYSTEM | 414 + XE_BO_FLAG_GGTT); 415 + if (IS_ERR(bo)) 392 416 return PTR_ERR(bo); 393 - } 394 417 395 418 gsc->proxy.bo = bo; 396 419 gsc->proxy.to_gsc = IOSYS_MAP_INIT_OFFSET(&bo->vmap, 0); ··· 395 424 gsc->proxy.to_csme = csme; 396 425 gsc->proxy.from_csme = csme + GSC_PROXY_BUFFER_SIZE; 397 426 398 - return drmm_add_action_or_reset(&xe->drm, proxy_channel_free, gsc); 427 + return 0; 399 428 } 400 429 401 430 /**

+5 -5

drivers/gpu/drm/xe/xe_gt.c

··· 112 112 113 113 if (!xe_gt_is_media_type(gt)) { 114 114 xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH); 115 - reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL); 115 + reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); 116 116 reg |= CG_DIS_CNTLBUS; 117 - xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg); 117 + xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); 118 118 } 119 119 120 120 xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); ··· 136 136 if (WARN_ON(err)) 137 137 return; 138 138 139 - reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL); 139 + reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); 140 140 reg &= ~CG_DIS_CNTLBUS; 141 - xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg); 141 + xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); 142 142 143 143 xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 144 144 } ··· 559 559 560 560 xe_gt_mcr_init_early(gt); 561 561 xe_pat_init(gt); 562 - xe_gt_enable_host_l2_vram(gt); 563 562 564 563 err = xe_uc_init(&gt->uc); 565 564 if (err) ··· 570 571 571 572 xe_gt_topology_init(gt); 572 573 xe_gt_mcr_init(gt); 574 + xe_gt_enable_host_l2_vram(gt); 573 575 574 576 out_fw: 575 577 xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);

+13

drivers/gpu/drm/xe/xe_gt_debugfs.c

··· 17 17 #include "xe_gt_mcr.h" 18 18 #include "xe_gt_sriov_pf_debugfs.h" 19 19 #include "xe_gt_sriov_vf_debugfs.h" 20 + #include "xe_gt_stats.h" 20 21 #include "xe_gt_topology.h" 22 + #include "xe_guc_hwconfig.h" 21 23 #include "xe_hw_engine.h" 22 24 #include "xe_lrc.h" 23 25 #include "xe_macros.h" ··· 271 269 return 0; 272 270 } 273 271 272 + static int hwconfig(struct xe_gt *gt, struct drm_printer *p) 273 + { 274 + xe_pm_runtime_get(gt_to_xe(gt)); 275 + xe_guc_hwconfig_dump(&gt->uc.guc, p); 276 + xe_pm_runtime_put(gt_to_xe(gt)); 277 + 278 + return 0; 279 + } 280 + 274 281 static const struct drm_info_list debugfs_list[] = { 275 282 {"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines}, 276 283 {"force_reset", .show = xe_gt_debugfs_simple_show, .data = force_reset}, ··· 297 286 {"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc}, 298 287 {"default_lrc_vcs", .show = xe_gt_debugfs_simple_show, .data = vcs_default_lrc}, 299 288 {"default_lrc_vecs", .show = xe_gt_debugfs_simple_show, .data = vecs_default_lrc}, 289 + {"stats", .show = xe_gt_debugfs_simple_show, .data = xe_gt_stats_print_info}, 290 + {"hwconfig", .show = xe_gt_debugfs_simple_show, .data = hwconfig}, 300 291 }; 301 292 302 293 void xe_gt_debugfs_register(struct xe_gt *gt)

+36 -4

drivers/gpu/drm/xe/xe_gt_mcr.c

··· 8 8 #include "regs/xe_gt_regs.h" 9 9 #include "xe_assert.h" 10 10 #include "xe_gt.h" 11 + #include "xe_gt_printk.h" 11 12 #include "xe_gt_topology.h" 12 13 #include "xe_gt_types.h" 14 + #include "xe_guc_hwconfig.h" 13 15 #include "xe_mmio.h" 14 16 #include "xe_sriov.h" 15 17 ··· 299 297 300 298 static unsigned int dss_per_group(struct xe_gt *gt) 301 299 { 300 + struct xe_guc *guc = &gt->uc.guc; 301 + u32 max_slices = 0, max_subslices = 0; 302 + int ret; 303 + 304 + /* 305 + * Try to query the GuC's hwconfig table for the maximum number of 306 + * slices and subslices. These don't reflect the platform's actual 307 + * slice/DSS counts, just the physical layout by which we should 308 + * determine the steering targets. On older platforms with older GuC 309 + * firmware releases it's possible that these attributes may not be 310 + * included in the table, so we can always fall back to the old 311 + * hardcoded layouts. 312 + */ 313 + #define HWCONFIG_ATTR_MAX_SLICES 1 314 + #define HWCONFIG_ATTR_MAX_SUBSLICES 70 315 + 316 + ret = xe_guc_hwconfig_lookup_u32(guc, HWCONFIG_ATTR_MAX_SLICES, 317 + &max_slices); 318 + if (ret < 0 || max_slices == 0) 319 + goto fallback; 320 + 321 + ret = xe_guc_hwconfig_lookup_u32(guc, HWCONFIG_ATTR_MAX_SUBSLICES, 322 + &max_subslices); 323 + if (ret < 0 || max_subslices == 0) 324 + goto fallback; 325 + 326 + return DIV_ROUND_UP(max_subslices, max_slices); 327 + 328 + fallback: 329 + xe_gt_dbg(gt, "GuC hwconfig cannot provide dss/slice; using typical fallback values\n"); 302 330 if (gt_to_xe(gt)->info.platform == XE_PVC) 303 331 return 8; 304 332 else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) ··· 346 314 */ 347 315 void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance) 348 316 { 349 - int dss_per_grp = dss_per_group(gt); 350 - 351 317 xe_gt_assert(gt, dss < XE_MAX_DSS_FUSE_BITS); 352 318 353 - *group = dss / dss_per_grp; 354 - *instance = dss % dss_per_grp; 319 + *group = dss / gt->steering_dss_per_grp; 320 + *instance = dss % gt->steering_dss_per_grp; 355 321 } 356 322 357 323 static void init_steering_dss(struct xe_gt *gt) 358 324 { 325 + gt->steering_dss_per_grp = dss_per_group(gt); 326 + 359 327 xe_gt_mcr_get_dss_steering(gt, 360 328 min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0), 361 329 xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0)),

+45 -12

drivers/gpu/drm/xe/xe_gt_pagefault.c

··· 287 287 PFD_VIRTUAL_ADDR_LO_SHIFT; 288 288 289 289 pf_queue->tail = (pf_queue->tail + PF_MSG_LEN_DW) % 290 - PF_QUEUE_NUM_DW; 290 + pf_queue->num_dw; 291 291 ret = true; 292 292 } 293 293 spin_unlock_irq(&pf_queue->lock); ··· 299 299 { 300 300 lockdep_assert_held(&pf_queue->lock); 301 301 302 - return CIRC_SPACE(pf_queue->head, pf_queue->tail, PF_QUEUE_NUM_DW) <= 302 + return CIRC_SPACE(pf_queue->head, pf_queue->tail, 303 + pf_queue->num_dw) <= 303 304 PF_MSG_LEN_DW; 304 305 } 305 306 ··· 313 312 u32 asid; 314 313 bool full; 315 314 316 - /* 317 - * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0 318 - */ 319 - BUILD_BUG_ON(PF_QUEUE_NUM_DW % PF_MSG_LEN_DW); 320 - 321 315 if (unlikely(len != PF_MSG_LEN_DW)) 322 316 return -EPROTO; 323 317 324 318 asid = FIELD_GET(PFD_ASID, msg[1]); 325 319 pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE); 326 320 321 + /* 322 + * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0 323 + */ 324 + xe_gt_assert(gt, !(pf_queue->num_dw % PF_MSG_LEN_DW)); 325 + 327 326 spin_lock_irqsave(&pf_queue->lock, flags); 328 327 full = pf_queue_full(pf_queue); 329 328 if (!full) { 330 329 memcpy(pf_queue->data + pf_queue->head, msg, len * sizeof(u32)); 331 - pf_queue->head = (pf_queue->head + len) % PF_QUEUE_NUM_DW; 330 + pf_queue->head = (pf_queue->head + len) % 331 + pf_queue->num_dw; 332 332 queue_work(gt->usm.pf_wq, &pf_queue->worker); 333 333 } else { 334 334 drm_warn(&xe->drm, "PF Queue full, shouldn't be possible"); ··· 388 386 { 389 387 struct xe_gt *gt = arg; 390 388 struct xe_device *xe = gt_to_xe(gt); 389 + int i; 391 390 392 391 if (!xe->info.has_usm) 393 392 return; 394 393 395 394 destroy_workqueue(gt->usm.acc_wq); 396 395 destroy_workqueue(gt->usm.pf_wq); 396 + 397 + for (i = 0; i < NUM_PF_QUEUE; ++i) 398 + kfree(gt->usm.pf_queue[i].data); 399 + } 400 + 401 + static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) 402 + { 403 + xe_dss_mask_t all_dss; 404 + int num_dss, num_eus; 405 + 406 + bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, 407 + XE_MAX_DSS_FUSE_BITS); 408 + 409 + num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS); 410 + num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss, 411 + XE_MAX_EU_FUSE_BITS) * num_dss; 412 + 413 + /* user can issue separate page faults per EU and per CS */ 414 + pf_queue->num_dw = 415 + (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW; 416 + 417 + pf_queue->gt = gt; 418 + pf_queue->data = kcalloc(pf_queue->num_dw, sizeof(u32), GFP_KERNEL); 419 + if (!pf_queue->data) 420 + return -ENOMEM; 421 + 422 + spin_lock_init(&pf_queue->lock); 423 + INIT_WORK(&pf_queue->worker, pf_queue_work_func); 424 + 425 + return 0; 397 426 } 398 427 399 428 int xe_gt_pagefault_init(struct xe_gt *gt) 400 429 { 401 430 struct xe_device *xe = gt_to_xe(gt); 402 - int i; 431 + int i, ret = 0; 403 432 404 433 if (!xe->info.has_usm) 405 434 return 0; 406 435 407 436 for (i = 0; i < NUM_PF_QUEUE; ++i) { 408 - gt->usm.pf_queue[i].gt = gt; 409 - spin_lock_init(&gt->usm.pf_queue[i].lock); 410 - INIT_WORK(&gt->usm.pf_queue[i].worker, pf_queue_work_func); 437 + ret = xe_alloc_pf_queue(gt, &gt->usm.pf_queue[i]); 438 + if (ret) 439 + return ret; 411 440 } 412 441 for (i = 0; i < NUM_ACC_QUEUE; ++i) { 413 442 gt->usm.acc_queue[i].gt = gt;

+40 -70

drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c

··· 232 232 { 233 233 u32 n = 0; 234 234 235 - if (drm_mm_node_allocated(&config->ggtt_region)) { 235 + if (xe_ggtt_node_allocated(config->ggtt_region)) { 236 236 cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); 237 - cfg[n++] = lower_32_bits(config->ggtt_region.start); 238 - cfg[n++] = upper_32_bits(config->ggtt_region.start); 237 + cfg[n++] = lower_32_bits(config->ggtt_region->base.start); 238 + cfg[n++] = upper_32_bits(config->ggtt_region->base.start); 239 239 240 240 cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); 241 - cfg[n++] = lower_32_bits(config->ggtt_region.size); 242 - cfg[n++] = upper_32_bits(config->ggtt_region.size); 241 + cfg[n++] = lower_32_bits(config->ggtt_region->base.size); 242 + cfg[n++] = upper_32_bits(config->ggtt_region->base.size); 243 243 } 244 244 245 245 return n; ··· 369 369 return err ?: err2; 370 370 } 371 371 372 - static void pf_release_ggtt(struct xe_tile *tile, struct drm_mm_node *node) 372 + static void pf_release_ggtt(struct xe_tile *tile, struct xe_ggtt_node *node) 373 373 { 374 - struct xe_ggtt *ggtt = tile->mem.ggtt; 375 - 376 - if (drm_mm_node_allocated(node)) { 374 + if (xe_ggtt_node_allocated(node)) { 377 375 /* 378 376 * explicit GGTT PTE assignment to the PF using xe_ggtt_assign() 379 377 * is redundant, as PTE will be implicitly re-assigned to PF by 380 378 * the xe_ggtt_clear() called by below xe_ggtt_remove_node(). 381 379 */ 382 - xe_ggtt_remove_node(ggtt, node, false); 380 + xe_ggtt_node_remove(node, false); 383 381 } 384 382 } 385 383 386 384 static void pf_release_vf_config_ggtt(struct xe_gt *gt, struct xe_gt_sriov_config *config) 387 385 { 388 - pf_release_ggtt(gt_to_tile(gt), &config->ggtt_region); 386 + pf_release_ggtt(gt_to_tile(gt), config->ggtt_region); 387 + config->ggtt_region = NULL; 389 388 } 390 389 391 390 static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) 392 391 { 393 392 struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); 394 - struct drm_mm_node *node = &config->ggtt_region; 393 + struct xe_ggtt_node *node = config->ggtt_region; 395 394 struct xe_tile *tile = gt_to_tile(gt); 396 395 struct xe_ggtt *ggtt = tile->mem.ggtt; 397 396 u64 alignment = pf_get_ggtt_alignment(gt); ··· 402 403 403 404 size = round_up(size, alignment); 404 405 405 - if (drm_mm_node_allocated(node)) { 406 + if (xe_ggtt_node_allocated(node)) { 406 407 err = pf_distribute_config_ggtt(tile, vfid, 0, 0); 407 408 if (unlikely(err)) 408 409 return err; 409 410 410 411 pf_release_ggtt(tile, node); 411 412 } 412 - xe_gt_assert(gt, !drm_mm_node_allocated(node)); 413 + xe_gt_assert(gt, !xe_ggtt_node_allocated(node)); 413 414 414 415 if (!size) 415 416 return 0; 416 417 417 - err = xe_ggtt_insert_special_node(ggtt, node, size, alignment); 418 - if (unlikely(err)) 419 - return err; 418 + node = xe_ggtt_node_init(ggtt); 419 + if (IS_ERR(node)) 420 + return PTR_ERR(node); 420 421 421 - xe_ggtt_assign(ggtt, node, vfid); 422 + err = xe_ggtt_node_insert(node, size, alignment); 423 + if (unlikely(err)) 424 + goto err; 425 + 426 + xe_ggtt_assign(node, vfid); 422 427 xe_gt_sriov_dbg_verbose(gt, "VF%u assigned GGTT %llx-%llx\n", 423 - vfid, node->start, node->start + node->size - 1); 428 + vfid, node->base.start, node->base.start + node->base.size - 1); 424 429 425 - err = pf_distribute_config_ggtt(gt->tile, vfid, node->start, node->size); 430 + err = pf_distribute_config_ggtt(gt->tile, vfid, node->base.start, node->base.size); 426 431 if (unlikely(err)) 427 - return err; 432 + goto err; 428 433 434 + config->ggtt_region = node; 429 435 return 0; 436 + err: 437 + xe_ggtt_node_fini(node); 438 + return err; 430 439 } 431 440 432 441 static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid) 433 442 { 434 443 struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); 435 - struct drm_mm_node *node = &config->ggtt_region; 444 + struct xe_ggtt_node *node = config->ggtt_region; 436 445 437 446 xe_gt_assert(gt, !xe_gt_is_media_type(gt)); 438 - return drm_mm_node_allocated(node) ? node->size : 0; 447 + return xe_ggtt_node_allocated(node) ? node->base.size : 0; 439 448 } 440 449 441 450 /** ··· 594 587 static u64 pf_get_max_ggtt(struct xe_gt *gt) 595 588 { 596 589 struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt; 597 - const struct drm_mm *mm = &ggtt->mm; 598 - const struct drm_mm_node *entry; 599 590 u64 alignment = pf_get_ggtt_alignment(gt); 600 591 u64 spare = pf_get_spare_ggtt(gt); 601 - u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt)); 602 - u64 hole_start, hole_end, hole_size; 603 - u64 max_hole = 0; 592 + u64 max_hole; 604 593 605 - mutex_lock(&ggtt->lock); 606 - 607 - drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { 608 - hole_start = max(hole_start, hole_min_start); 609 - hole_start = ALIGN(hole_start, alignment); 610 - hole_end = ALIGN_DOWN(hole_end, alignment); 611 - if (hole_start >= hole_end) 612 - continue; 613 - hole_size = hole_end - hole_start; 614 - xe_gt_sriov_dbg_verbose(gt, "HOLE start %llx size %lluK\n", 615 - hole_start, hole_size / SZ_1K); 616 - spare -= min3(spare, hole_size, max_hole); 617 - max_hole = max(max_hole, hole_size); 618 - } 619 - 620 - mutex_unlock(&ggtt->lock); 594 + max_hole = xe_ggtt_largest_hole(ggtt, alignment, &spare); 621 595 622 596 xe_gt_sriov_dbg_verbose(gt, "HOLE max %lluK reserved %lluK\n", 623 597 max_hole / SZ_1K, spare / SZ_1K); ··· 2013 2025 2014 2026 for (n = 1; n <= total_vfs; n++) { 2015 2027 config = &gt->sriov.pf.vfs[n].config; 2016 - if (!drm_mm_node_allocated(&config->ggtt_region)) 2028 + if (!xe_ggtt_node_allocated(config->ggtt_region)) 2017 2029 continue; 2018 2030 2019 - string_get_size(config->ggtt_region.size, 1, STRING_UNITS_2, buf, sizeof(buf)); 2031 + string_get_size(config->ggtt_region->base.size, 1, STRING_UNITS_2, 2032 + buf, sizeof(buf)); 2020 2033 drm_printf(p, "VF%u:\t%#0llx-%#llx\t(%s)\n", 2021 - n, config->ggtt_region.start, 2022 - config->ggtt_region.start + config->ggtt_region.size - 1, buf); 2034 + n, config->ggtt_region->base.start, 2035 + config->ggtt_region->base.start + config->ggtt_region->base.size - 1, 2036 + buf); 2023 2037 } 2024 2038 2025 2039 return 0; ··· 2109 2119 int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_printer *p) 2110 2120 { 2111 2121 struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt; 2112 - const struct drm_mm *mm = &ggtt->mm; 2113 - const struct drm_mm_node *entry; 2114 2122 u64 alignment = pf_get_ggtt_alignment(gt); 2115 - u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt)); 2116 - u64 hole_start, hole_end, hole_size; 2117 - u64 spare, avail, total = 0; 2123 + u64 spare, avail, total; 2118 2124 char buf[10]; 2119 2125 2120 2126 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); ··· 2118 2132 mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); 2119 2133 2120 2134 spare = pf_get_spare_ggtt(gt); 2135 + total = xe_ggtt_print_holes(ggtt, alignment, p); 2121 2136 2122 - mutex_lock(&ggtt->lock); 2123 - 2124 - drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { 2125 - hole_start = max(hole_start, hole_min_start); 2126 - hole_start = ALIGN(hole_start, alignment); 2127 - hole_end = ALIGN_DOWN(hole_end, alignment); 2128 - if (hole_start >= hole_end) 2129 - continue; 2130 - hole_size = hole_end - hole_start; 2131 - total += hole_size; 2132 - 2133 - string_get_size(hole_size, 1, STRING_UNITS_2, buf, sizeof(buf)); 2134 - drm_printf(p, "range:\t%#llx-%#llx\t(%s)\n", 2135 - hole_start, hole_end - 1, buf); 2136 - } 2137 - 2138 - mutex_unlock(&ggtt->lock); 2139 2137 mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); 2140 2138 2141 2139 string_get_size(total, 1, STRING_UNITS_2, buf, sizeof(buf));

+2 -3

drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h

··· 6 6 #ifndef _XE_GT_SRIOV_PF_CONFIG_TYPES_H_ 7 7 #define _XE_GT_SRIOV_PF_CONFIG_TYPES_H_ 8 8 9 - #include <drm/drm_mm.h> 10 - 9 + #include "xe_ggtt_types.h" 11 10 #include "xe_guc_klv_thresholds_set_types.h" 12 11 13 12 struct xe_bo; ··· 18 19 */ 19 20 struct xe_gt_sriov_config { 20 21 /** @ggtt_region: GGTT region assigned to the VF. */ 21 - struct drm_mm_node ggtt_region; 22 + struct xe_ggtt_node *ggtt_region; 22 23 /** @lmem_obj: LMEM allocation for use by the VF. */ 23 24 struct xe_bo *lmem_obj; 24 25 /** @num_ctxs: number of GuC contexts IDs. */

+29 -15

drivers/gpu/drm/xe/xe_gt_sriov_vf.c

··· 495 495 return gt->sriov.vf.self_config.lmem_size; 496 496 } 497 497 498 + static struct xe_ggtt_node * 499 + vf_balloon_ggtt_node(struct xe_ggtt *ggtt, u64 start, u64 end) 500 + { 501 + struct xe_ggtt_node *node; 502 + int err; 503 + 504 + node = xe_ggtt_node_init(ggtt); 505 + if (IS_ERR(node)) 506 + return node; 507 + 508 + err = xe_ggtt_node_insert_balloon(node, start, end); 509 + if (err) { 510 + xe_ggtt_node_fini(node); 511 + return ERR_PTR(err); 512 + } 513 + 514 + return node; 515 + } 516 + 498 517 static int vf_balloon_ggtt(struct xe_gt *gt) 499 518 { 500 519 struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config; ··· 521 502 struct xe_ggtt *ggtt = tile->mem.ggtt; 522 503 struct xe_device *xe = gt_to_xe(gt); 523 504 u64 start, end; 524 - int err; 525 505 526 506 xe_gt_assert(gt, IS_SRIOV_VF(xe)); 527 507 xe_gt_assert(gt, !xe_gt_is_media_type(gt)); ··· 546 528 start = xe_wopcm_size(xe); 547 529 end = config->ggtt_base; 548 530 if (end != start) { 549 - err = xe_ggtt_balloon(ggtt, start, end, &tile->sriov.vf.ggtt_balloon[0]); 550 - if (err) 551 - goto failed; 531 + tile->sriov.vf.ggtt_balloon[0] = vf_balloon_ggtt_node(ggtt, start, end); 532 + if (IS_ERR(tile->sriov.vf.ggtt_balloon[0])) 533 + return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]); 552 534 } 553 535 554 536 start = config->ggtt_base + config->ggtt_size; 555 537 end = GUC_GGTT_TOP; 556 538 if (end != start) { 557 - err = xe_ggtt_balloon(ggtt, start, end, &tile->sriov.vf.ggtt_balloon[1]); 558 - if (err) 559 - goto deballoon; 539 + tile->sriov.vf.ggtt_balloon[1] = vf_balloon_ggtt_node(ggtt, start, end); 540 + if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) { 541 + xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]); 542 + return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]); 543 + } 560 544 } 561 545 562 546 return 0; 563 - 564 - deballoon: 565 - xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[0]); 566 - failed: 567 - return err; 568 547 } 569 548 570 549 static void deballoon_ggtt(struct drm_device *drm, void *arg) 571 550 { 572 551 struct xe_tile *tile = arg; 573 - struct xe_ggtt *ggtt = tile->mem.ggtt; 574 552 575 553 xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); 576 - xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[1]); 577 - xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[0]); 554 + xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[1]); 555 + xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]); 578 556 } 579 557 580 558 /**

+49

drivers/gpu/drm/xe/xe_gt_stats.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2024 Intel Corporation 4 + */ 5 + 6 + #include <linux/atomic.h> 7 + 8 + #include <drm/drm_print.h> 9 + 10 + #include "xe_gt.h" 11 + #include "xe_gt_stats.h" 12 + 13 + /** 14 + * xe_gt_stats_incr - Increments the specified stats counter 15 + * @gt: graphics tile 16 + * @id: xe_gt_stats_id type id that needs to be incremented 17 + * @incr: value to be incremented with 18 + * 19 + * Increments the specified stats counter. 20 + */ 21 + void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr) 22 + { 23 + if (id >= __XE_GT_STATS_NUM_IDS) 24 + return; 25 + 26 + atomic_add(incr, &gt->stats.counters[id]); 27 + } 28 + 29 + static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { 30 + "tlb_inval_count", 31 + }; 32 + 33 + /** 34 + * xe_gt_stats_print_info - Print the GT stats 35 + * @gt: graphics tile 36 + * @p: drm_printer where it will be printed out. 37 + * 38 + * This prints out all the available GT stats. 39 + */ 40 + int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p) 41 + { 42 + enum xe_gt_stats_id id; 43 + 44 + for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id) 45 + drm_printf(p, "%s: %d\n", stat_description[id], 46 + atomic_read(&gt->stats.counters[id])); 47 + 48 + return 0; 49 + }

+29

drivers/gpu/drm/xe/xe_gt_stats.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2024 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GT_STATS_H_ 7 + #define _XE_GT_STATS_H_ 8 + 9 + struct xe_gt; 10 + struct drm_printer; 11 + 12 + enum xe_gt_stats_id { 13 + XE_GT_STATS_ID_TLB_INVAL, 14 + /* must be the last entry */ 15 + __XE_GT_STATS_NUM_IDS, 16 + }; 17 + 18 + #ifdef CONFIG_DEBUG_FS 19 + int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p); 20 + void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr); 21 + #else 22 + static inline void 23 + xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, 24 + int incr) 25 + { 26 + } 27 + 28 + #endif 29 + #endif

+2

drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c

··· 12 12 #include "xe_gt_printk.h" 13 13 #include "xe_guc.h" 14 14 #include "xe_guc_ct.h" 15 + #include "xe_gt_stats.h" 15 16 #include "xe_mmio.h" 16 17 #include "xe_pm.h" 17 18 #include "xe_sriov.h" ··· 214 213 gt->tlb_invalidation.seqno = 1; 215 214 } 216 215 mutex_unlock(&guc->ct.lock); 216 + xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1); 217 217 218 218 return ret; 219 219 }

+22 -2

drivers/gpu/drm/xe/xe_gt_types.h

··· 10 10 #include "xe_gt_idle_types.h" 11 11 #include "xe_gt_sriov_pf_types.h" 12 12 #include "xe_gt_sriov_vf_types.h" 13 + #include "xe_gt_stats.h" 13 14 #include "xe_hw_engine_types.h" 14 15 #include "xe_hw_fence_types.h" 15 16 #include "xe_oa.h" ··· 134 133 u8 has_indirect_ring_state:1; 135 134 } info; 136 135 136 + #if IS_ENABLED(CONFIG_DEBUG_FS) 137 + /** @stats: GT stats */ 138 + struct { 139 + /** @stats.counters: counters for various GT stats */ 140 + atomic_t counters[__XE_GT_STATS_NUM_IDS]; 141 + } stats; 142 + #endif 143 + 137 144 /** 138 145 * @mmio: mmio info for GT. All GTs within a tile share the same 139 146 * register space, but have their own copy of GSI registers at a ··· 247 238 struct pf_queue { 248 239 /** @usm.pf_queue.gt: back pointer to GT */ 249 240 struct xe_gt *gt; 250 - #define PF_QUEUE_NUM_DW 128 251 241 /** @usm.pf_queue.data: data in the page fault queue */ 252 - u32 data[PF_QUEUE_NUM_DW]; 242 + u32 *data; 243 + /** 244 + * @usm.pf_queue.num_dw: number of DWORDS in the page 245 + * fault queue. Dynamically calculated based on the number 246 + * of compute resources available. 247 + */ 248 + u32 num_dw; 253 249 /** 254 250 * @usm.pf_queue.tail: tail pointer in DWs for page fault queue, 255 251 * moved by worker which processes faults (consumer). ··· 380 366 /** @steering.instance_target: instance to steer accesses to */ 381 367 u16 instance_target; 382 368 } steering[NUM_STEERING_TYPES]; 369 + 370 + /** 371 + * @steering_dss_per_grp: number of DSS per steering group (gslice, 372 + * cslice, etc.). 373 + */ 374 + unsigned int steering_dss_per_grp; 383 375 384 376 /** 385 377 * @mcr_lock: protects the MCR_SELECTOR register for the duration

+2 -2

drivers/gpu/drm/xe/xe_guc.c

··· 350 350 if (ret) 351 351 goto out; 352 352 353 + xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); 354 + 353 355 ret = devm_add_action_or_reset(xe->drm.dev, guc_fini_hw, guc); 354 356 if (ret) 355 357 goto out; ··· 359 357 guc_init_params(guc); 360 358 361 359 xe_guc_comm_init_early(guc); 362 - 363 - xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); 364 360 365 361 return 0; 366 362

+10

drivers/gpu/drm/xe/xe_guc.h

··· 11 11 #include "xe_hw_engine_types.h" 12 12 #include "xe_macros.h" 13 13 14 + /* 15 + * GuC version number components are defined to be only 8-bit size, 16 + * so converting to a 32bit 8.8.8 integer allows simple (and safe) 17 + * numerical comparisons. 18 + */ 19 + #define MAKE_GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat)) 20 + #define MAKE_GUC_VER_STRUCT(ver) MAKE_GUC_VER((ver).major, (ver).minor, (ver).patch) 21 + #define GUC_SUBMIT_VER(guc) MAKE_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]) 22 + #define GUC_FIRMWARE_VER(guc) MAKE_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_RELEASE]) 23 + 14 24 struct drm_printer; 15 25 16 26 void xe_guc_comm_init_early(struct xe_guc *guc);

+6

drivers/gpu/drm/xe/xe_guc_ads.c

··· 24 24 #include "xe_map.h" 25 25 #include "xe_mmio.h" 26 26 #include "xe_platform_types.h" 27 + #include "xe_uc_fw.h" 27 28 #include "xe_wa.h" 28 29 29 30 /* Slack of a few additional entries per engine */ ··· 367 366 GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE, 368 367 0xC40, 369 368 &offset, &remain); 369 + 370 + if (XE_WA(gt, 14022293748) || XE_WA(gt, 22019794406)) 371 + guc_waklv_enable_simple(ads, 372 + GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET, 373 + &offset, &remain); 370 374 371 375 size = guc_ads_waklv_size(ads) - remain; 372 376 if (!size)

+10 -2

drivers/gpu/drm/xe/xe_guc_ct.c

··· 105 105 * enough space to avoid backpressure on the driver. We increase the size 106 106 * of the receive buffer (relative to the send) to ensure a G2H response 107 107 * CTB has a landing spot. 108 + * 109 + * In addition to submissions, the G2H buffer needs to be able to hold 110 + * enough space for recoverable page fault notifications. The number of 111 + * page faults is interrupt driven and can be as much as the number of 112 + * compute resources available. However, most of the actual work for these 113 + * is in a separate page fault worker thread. Therefore we only need to 114 + * make sure the queue has enough space to handle all of the submissions 115 + * and responses and an extra buffer for incoming page faults. 108 116 */ 109 117 110 118 #define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K) 111 119 #define CTB_H2G_BUFFER_SIZE (SZ_4K) 112 - #define CTB_G2H_BUFFER_SIZE (4 * CTB_H2G_BUFFER_SIZE) 113 - #define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 4) 120 + #define CTB_G2H_BUFFER_SIZE (SZ_128K) 121 + #define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 2) 114 122 115 123 /** 116 124 * xe_guc_ct_queue_proc_time_jiffies - Return maximum time to process a full

+97

drivers/gpu/drm/xe/xe_guc_hwconfig.c

··· 6 6 #include "xe_guc_hwconfig.h" 7 7 8 8 #include <drm/drm_managed.h> 9 + #include <drm/drm_print.h> 9 10 10 11 #include "abi/guc_actions_abi.h" 11 12 #include "xe_bo.h" ··· 103 102 104 103 xe_map_memcpy_from(xe, dst, &guc->hwconfig.bo->vmap, 0, 105 104 guc->hwconfig.size); 105 + } 106 + 107 + void xe_guc_hwconfig_dump(struct xe_guc *guc, struct drm_printer *p) 108 + { 109 + size_t size = xe_guc_hwconfig_size(guc); 110 + u32 *hwconfig; 111 + u64 num_dw; 112 + u32 extra_bytes; 113 + int i = 0; 114 + 115 + if (size == 0) { 116 + drm_printf(p, "No hwconfig available\n"); 117 + return; 118 + } 119 + 120 + num_dw = div_u64_rem(size, sizeof(u32), &extra_bytes); 121 + 122 + hwconfig = kzalloc(size, GFP_KERNEL); 123 + if (!hwconfig) { 124 + drm_printf(p, "Error: could not allocate hwconfig memory\n"); 125 + return; 126 + } 127 + 128 + xe_guc_hwconfig_copy(guc, hwconfig); 129 + 130 + /* An entry requires at least three dwords for key, length, value */ 131 + while (i + 3 <= num_dw) { 132 + u32 attribute = hwconfig[i++]; 133 + u32 len_dw = hwconfig[i++]; 134 + 135 + if (i + len_dw > num_dw) { 136 + drm_printf(p, "Error: Attribute %u is %u dwords, but only %llu remain\n", 137 + attribute, len_dw, num_dw - i); 138 + len_dw = num_dw - i; 139 + } 140 + 141 + /* 142 + * If it's a single dword (as most hwconfig attributes are), 143 + * then it's probably a number that makes sense to display 144 + * in decimal form. In the rare cases where it's more than 145 + * one dword, just print it in hex form and let the user 146 + * figure out how to interpret it. 147 + */ 148 + if (len_dw == 1) 149 + drm_printf(p, "[%2u] = %u\n", attribute, hwconfig[i]); 150 + else 151 + drm_printf(p, "[%2u] = { %*ph }\n", attribute, 152 + (int)(len_dw * sizeof(u32)), &hwconfig[i]); 153 + i += len_dw; 154 + } 155 + 156 + if (i < num_dw || extra_bytes) 157 + drm_printf(p, "Error: %llu extra bytes at end of hwconfig\n", 158 + (num_dw - i) * sizeof(u32) + extra_bytes); 159 + 160 + kfree(hwconfig); 161 + } 162 + 163 + /* 164 + * Lookup a specific 32-bit attribute value in the GuC's hwconfig table. 165 + */ 166 + int xe_guc_hwconfig_lookup_u32(struct xe_guc *guc, u32 attribute, u32 *val) 167 + { 168 + size_t size = xe_guc_hwconfig_size(guc); 169 + u64 num_dw = div_u64(size, sizeof(u32)); 170 + u32 *hwconfig; 171 + bool found = false; 172 + int i = 0; 173 + 174 + if (num_dw == 0) 175 + return -EINVAL; 176 + 177 + hwconfig = kzalloc(size, GFP_KERNEL); 178 + if (!hwconfig) 179 + return -ENOMEM; 180 + 181 + xe_guc_hwconfig_copy(guc, hwconfig); 182 + 183 + /* An entry requires at least three dwords for key, length, value */ 184 + while (i + 3 <= num_dw) { 185 + u32 key = hwconfig[i++]; 186 + u32 len_dw = hwconfig[i++]; 187 + 188 + if (key != attribute) { 189 + i += len_dw; 190 + continue; 191 + } 192 + 193 + *val = hwconfig[i]; 194 + found = true; 195 + break; 196 + } 197 + 198 + kfree(hwconfig); 199 + 200 + return found ? 0 : -ENOENT; 106 201 }

+3

drivers/gpu/drm/xe/xe_guc_hwconfig.h

··· 8 8 9 9 #include <linux/types.h> 10 10 11 + struct drm_printer; 11 12 struct xe_guc; 12 13 13 14 int xe_guc_hwconfig_init(struct xe_guc *guc); 14 15 u32 xe_guc_hwconfig_size(struct xe_guc *guc); 15 16 void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst); 17 + void xe_guc_hwconfig_dump(struct xe_guc *guc, struct drm_printer *p); 18 + int xe_guc_hwconfig_lookup_u32(struct xe_guc *guc, u32 attribute, u32 *val); 16 19 17 20 #endif

+1 -1

drivers/gpu/drm/xe/xe_guc_pc.c

··· 1042 1042 return; 1043 1043 1044 1044 XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL)); 1045 - XE_WARN_ON(xe_guc_pc_gucrc_disable(pc)); 1045 + xe_guc_pc_gucrc_disable(pc); 1046 1046 XE_WARN_ON(xe_guc_pc_stop(pc)); 1047 1047 1048 1048 /* Bind requested freq to mert_freq_cap before unload */

+43 -15

drivers/gpu/drm/xe/xe_guc_submit.c

··· 1374 1374 struct xe_exec_queue *q = msg->private_data; 1375 1375 1376 1376 if (guc_exec_queue_allowed_to_change_state(q)) { 1377 - q->guc->resume_time = RESUME_PENDING; 1378 1377 clear_exec_queue_suspended(q); 1379 - enable_scheduling(q); 1378 + if (!exec_queue_enabled(q)) { 1379 + q->guc->resume_time = RESUME_PENDING; 1380 + enable_scheduling(q); 1381 + } 1380 1382 } else { 1381 1383 clear_exec_queue_suspended(q); 1382 1384 } ··· 1388 1386 #define SET_SCHED_PROPS 2 1389 1387 #define SUSPEND 3 1390 1388 #define RESUME 4 1389 + #define OPCODE_MASK 0xf 1390 + #define MSG_LOCKED BIT(8) 1391 1391 1392 1392 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) 1393 1393 { ··· 1434 1430 struct xe_device *xe = guc_to_xe(guc); 1435 1431 struct xe_guc_exec_queue *ge; 1436 1432 long timeout; 1437 - int err; 1433 + int err, i; 1438 1434 1439 1435 xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc))); 1440 1436 ··· 1445 1441 q->guc = ge; 1446 1442 ge->q = q; 1447 1443 init_waitqueue_head(&ge->suspend_wait); 1444 + 1445 + for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) 1446 + INIT_LIST_HEAD(&ge->static_msgs[i].link); 1448 1447 1449 1448 timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : 1450 1449 msecs_to_jiffies(q->sched_props.job_timeout_ms); ··· 1511 1504 xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); 1512 1505 1513 1506 INIT_LIST_HEAD(&msg->link); 1514 - msg->opcode = opcode; 1507 + msg->opcode = opcode & OPCODE_MASK; 1515 1508 msg->private_data = q; 1516 1509 1517 1510 trace_xe_sched_msg_add(msg); 1518 - xe_sched_add_msg(&q->guc->sched, msg); 1511 + if (opcode & MSG_LOCKED) 1512 + xe_sched_add_msg_locked(&q->guc->sched, msg); 1513 + else 1514 + xe_sched_add_msg(&q->guc->sched, msg); 1515 + } 1516 + 1517 + static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, 1518 + struct xe_sched_msg *msg, 1519 + u32 opcode) 1520 + { 1521 + if (!list_empty(&msg->link)) 1522 + return false; 1523 + 1524 + guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED); 1525 + 1526 + return true; 1519 1527 } 1520 1528 1521 1529 #define STATIC_MSG_CLEANUP 0 ··· 1604 1582 1605 1583 static int guc_exec_queue_suspend(struct xe_exec_queue *q) 1606 1584 { 1585 + struct xe_gpu_scheduler *sched = &q->guc->sched; 1607 1586 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 1608 1587 1609 - if (exec_queue_killed_or_banned_or_wedged(q) || q->guc->suspend_pending) 1588 + if (exec_queue_killed_or_banned_or_wedged(q)) 1610 1589 return -EINVAL; 1611 1590 1612 - q->guc->suspend_pending = true; 1613 - guc_exec_queue_add_msg(q, msg, SUSPEND); 1591 + xe_sched_msg_lock(sched); 1592 + if (guc_exec_queue_try_add_msg(q, msg, SUSPEND)) 1593 + q->guc->suspend_pending = true; 1594 + xe_sched_msg_unlock(sched); 1614 1595 1615 1596 return 0; 1616 1597 } ··· 1628 1603 * suspend_pending upon kill but to be paranoid but races in which 1629 1604 * suspend_pending is set after kill also check kill here. 1630 1605 */ 1631 - ret = wait_event_timeout(q->guc->suspend_wait, 1632 - !READ_ONCE(q->guc->suspend_pending) || 1633 - exec_queue_killed(q) || 1634 - guc_read_stopped(guc), 1635 - HZ * 5); 1606 + ret = wait_event_interruptible_timeout(q->guc->suspend_wait, 1607 + !READ_ONCE(q->guc->suspend_pending) || 1608 + exec_queue_killed(q) || 1609 + guc_read_stopped(guc), 1610 + HZ * 5); 1636 1611 1637 1612 if (!ret) { 1638 1613 xe_gt_warn(guc_to_gt(guc), ··· 1642 1617 return -ETIME; 1643 1618 } 1644 1619 1645 - return 0; 1620 + return ret < 0 ? ret : 0; 1646 1621 } 1647 1622 1648 1623 static void guc_exec_queue_resume(struct xe_exec_queue *q) 1649 1624 { 1625 + struct xe_gpu_scheduler *sched = &q->guc->sched; 1650 1626 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; 1651 1627 struct xe_guc *guc = exec_queue_to_guc(q); 1652 1628 struct xe_device *xe = guc_to_xe(guc); 1653 1629 1654 1630 xe_assert(xe, !q->guc->suspend_pending); 1655 1631 1656 - guc_exec_queue_add_msg(q, msg, RESUME); 1632 + xe_sched_msg_lock(sched); 1633 + guc_exec_queue_try_add_msg(q, msg, RESUME); 1634 + xe_sched_msg_unlock(sched); 1657 1635 } 1658 1636 1659 1637 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)

+5 -14

drivers/gpu/drm/xe/xe_huc.c

··· 43 43 return &container_of(huc, struct xe_uc, huc)->guc; 44 44 } 45 45 46 - static void free_gsc_pkt(struct drm_device *drm, void *arg) 47 - { 48 - struct xe_huc *huc = arg; 49 - 50 - xe_bo_unpin_map_no_vm(huc->gsc_pkt); 51 - huc->gsc_pkt = NULL; 52 - } 53 - 54 46 #define PXP43_HUC_AUTH_INOUT_SIZE SZ_4K 55 47 static int huc_alloc_gsc_pkt(struct xe_huc *huc) 56 48 { ··· 51 59 struct xe_bo *bo; 52 60 53 61 /* we use a single object for both input and output */ 54 - bo = xe_bo_create_pin_map(xe, gt_to_tile(gt), NULL, 55 - PXP43_HUC_AUTH_INOUT_SIZE * 2, 56 - ttm_bo_type_kernel, 57 - XE_BO_FLAG_SYSTEM | 58 - XE_BO_FLAG_GGTT); 62 + bo = xe_managed_bo_create_pin_map(xe, gt_to_tile(gt), 63 + PXP43_HUC_AUTH_INOUT_SIZE * 2, 64 + XE_BO_FLAG_SYSTEM | 65 + XE_BO_FLAG_GGTT); 59 66 if (IS_ERR(bo)) 60 67 return PTR_ERR(bo); 61 68 62 69 huc->gsc_pkt = bo; 63 70 64 - return drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc); 71 + return 0; 65 72 } 66 73 67 74 int xe_huc_init(struct xe_huc *huc)

+110 -39

drivers/gpu/drm/xe/xe_hw_engine.c

··· 5 5 6 6 #include "xe_hw_engine.h" 7 7 8 + #include <linux/nospec.h> 9 + 8 10 #include <drm/drm_managed.h> 11 + #include <drm/xe_drm.h> 9 12 10 13 #include "regs/xe_engine_regs.h" 11 14 #include "regs/xe_gt_regs.h" ··· 23 20 #include "xe_gt_printk.h" 24 21 #include "xe_gt_mcr.h" 25 22 #include "xe_gt_topology.h" 23 + #include "xe_hw_engine_group.h" 26 24 #include "xe_hw_fence.h" 27 25 #include "xe_irq.h" 28 26 #include "xe_lrc.h" ··· 267 263 }, 268 264 }; 269 265 270 - static void hw_engine_fini(struct drm_device *drm, void *arg) 266 + static void hw_engine_fini(void *arg) 271 267 { 272 268 struct xe_hw_engine *hwe = arg; 273 269 ··· 278 274 hwe->gt = NULL; 279 275 } 280 276 281 - static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, 282 - u32 val) 277 + /** 278 + * xe_hw_engine_mmio_write32() - Write engine register 279 + * @hwe: engine 280 + * @reg: register to write into 281 + * @val: desired 32-bit value to write 282 + * 283 + * This function will write val into an engine specific register. 284 + * Forcewake must be held by the caller. 285 + * 286 + */ 287 + void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe, 288 + struct xe_reg reg, u32 val) 283 289 { 284 290 xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); 285 291 xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); ··· 299 285 xe_mmio_write32(hwe->gt, reg, val); 300 286 } 301 287 302 - static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) 288 + /** 289 + * xe_hw_engine_mmio_read32() - Read engine register 290 + * @hwe: engine 291 + * @reg: register to read from 292 + * 293 + * This function will read from an engine specific register. 294 + * Forcewake must be held by the caller. 295 + * 296 + * Return: value of the 32-bit register. 297 + */ 298 + u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) 303 299 { 304 300 xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); 305 301 xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); ··· 328 304 xe_mmio_write32(hwe->gt, RCU_MODE, 329 305 _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); 330 306 331 - hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); 332 - hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), 333 - xe_bo_ggtt_addr(hwe->hwsp)); 334 - hw_engine_mmio_write32(hwe, RING_MODE(0), 335 - _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); 336 - hw_engine_mmio_write32(hwe, RING_MI_MODE(0), 337 - _MASKED_BIT_DISABLE(STOP_RING)); 338 - hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); 307 + xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); 308 + xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), 309 + xe_bo_ggtt_addr(hwe->hwsp)); 310 + xe_hw_engine_mmio_write32(hwe, RING_MODE(0), 311 + _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); 312 + xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0), 313 + _MASKED_BIT_DISABLE(STOP_RING)); 314 + xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); 339 315 } 340 316 341 317 static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt, ··· 448 424 IDLE_WAIT_TIME, 449 425 0xA, 450 426 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 427 + }, 428 + /* Enable Priority Mem Read */ 429 + { XE_RTP_NAME("Priority_Mem_Read"), 430 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), 431 + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ, 432 + XE_RTP_ACTION_FLAG(ENGINE_BASE))) 451 433 }, 452 434 {} 453 435 }; ··· 585 555 if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY) 586 556 gt->usm.reserved_bcs_instance = hwe->instance; 587 557 588 - return drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe); 558 + return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe); 589 559 590 560 err_kernel_lrc: 591 561 xe_lrc_put(hwe->kernel_lrc); ··· 791 761 } 792 762 793 763 hw_engine_setup_logical_mapping(gt); 764 + err = xe_hw_engine_setup_groups(gt); 765 + if (err) 766 + return err; 794 767 795 768 return 0; 796 769 } ··· 824 791 unsigned int dss; 825 792 u16 group, instance; 826 793 827 - snapshot->reg.instdone.ring = hw_engine_mmio_read32(hwe, RING_INSTDONE(0)); 794 + snapshot->reg.instdone.ring = xe_hw_engine_mmio_read32(hwe, RING_INSTDONE(0)); 828 795 829 796 if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER) 830 797 return; ··· 920 887 return snapshot; 921 888 922 889 snapshot->reg.ring_execlist_status = 923 - hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)); 924 - val = hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); 890 + xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)); 891 + val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); 925 892 snapshot->reg.ring_execlist_status |= val << 32; 926 893 927 894 snapshot->reg.ring_execlist_sq_contents = 928 - hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0)); 929 - val = hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0)); 895 + xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0)); 896 + val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0)); 930 897 snapshot->reg.ring_execlist_sq_contents |= val << 32; 931 898 932 - snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0)); 933 - val = hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); 899 + snapshot->reg.ring_acthd = xe_hw_engine_mmio_read32(hwe, RING_ACTHD(0)); 900 + val = xe_hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); 934 901 snapshot->reg.ring_acthd |= val << 32; 935 902 936 - snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0)); 937 - val = hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); 903 + snapshot->reg.ring_bbaddr = xe_hw_engine_mmio_read32(hwe, RING_BBADDR(0)); 904 + val = xe_hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); 938 905 snapshot->reg.ring_bbaddr |= val << 32; 939 906 940 907 snapshot->reg.ring_dma_fadd = 941 - hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); 942 - val = hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); 908 + xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); 909 + val = xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); 943 910 snapshot->reg.ring_dma_fadd |= val << 32; 944 911 945 - snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); 946 - snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)); 947 - snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0)); 912 + snapshot->reg.ring_hwstam = xe_hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); 913 + snapshot->reg.ring_hws_pga = xe_hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)); 914 + snapshot->reg.ring_start = xe_hw_engine_mmio_read32(hwe, RING_START(0)); 948 915 if (GRAPHICS_VERx100(hwe->gt->tile->xe) >= 2000) { 949 - val = hw_engine_mmio_read32(hwe, RING_START_UDW(0)); 916 + val = xe_hw_engine_mmio_read32(hwe, RING_START_UDW(0)); 950 917 snapshot->reg.ring_start |= val << 32; 951 918 } 952 919 if (xe_gt_has_indirect_ring_state(hwe->gt)) { 953 920 snapshot->reg.indirect_ring_state = 954 - hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0)); 921 + xe_hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0)); 955 922 } 956 923 957 924 snapshot->reg.ring_head = 958 - hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; 925 + xe_hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; 959 926 snapshot->reg.ring_tail = 960 - hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR; 961 - snapshot->reg.ring_ctl = hw_engine_mmio_read32(hwe, RING_CTL(0)); 927 + xe_hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR; 928 + snapshot->reg.ring_ctl = xe_hw_engine_mmio_read32(hwe, RING_CTL(0)); 962 929 snapshot->reg.ring_mi_mode = 963 - hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); 964 - snapshot->reg.ring_mode = hw_engine_mmio_read32(hwe, RING_MODE(0)); 965 - snapshot->reg.ring_imr = hw_engine_mmio_read32(hwe, RING_IMR(0)); 966 - snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0)); 967 - snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0)); 968 - snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0)); 969 - snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0)); 930 + xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); 931 + snapshot->reg.ring_mode = xe_hw_engine_mmio_read32(hwe, RING_MODE(0)); 932 + snapshot->reg.ring_imr = xe_hw_engine_mmio_read32(hwe, RING_IMR(0)); 933 + snapshot->reg.ring_esr = xe_hw_engine_mmio_read32(hwe, RING_ESR(0)); 934 + snapshot->reg.ring_emr = xe_hw_engine_mmio_read32(hwe, RING_EMR(0)); 935 + snapshot->reg.ring_eir = xe_hw_engine_mmio_read32(hwe, RING_EIR(0)); 936 + snapshot->reg.ipehr = xe_hw_engine_mmio_read32(hwe, RING_IPEHR(0)); 970 937 xe_hw_engine_snapshot_instdone_capture(hwe, snapshot); 971 938 972 939 if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE) ··· 1167 1134 enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe) 1168 1135 { 1169 1136 return engine_infos[hwe->engine_id].domain; 1137 + } 1138 + 1139 + static const enum xe_engine_class user_to_xe_engine_class[] = { 1140 + [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, 1141 + [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, 1142 + [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, 1143 + [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, 1144 + [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, 1145 + }; 1146 + 1147 + /** 1148 + * xe_hw_engine_lookup() - Lookup hardware engine for class:instance 1149 + * @xe: xe device 1150 + * @eci: engine class and instance 1151 + * 1152 + * This function will find a hardware engine for given engine 1153 + * class and instance. 1154 + * 1155 + * Return: If found xe_hw_engine pointer, NULL otherwise. 1156 + */ 1157 + struct xe_hw_engine * 1158 + xe_hw_engine_lookup(struct xe_device *xe, 1159 + struct drm_xe_engine_class_instance eci) 1160 + { 1161 + unsigned int idx; 1162 + 1163 + if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) 1164 + return NULL; 1165 + 1166 + if (eci.gt_id >= xe->info.gt_count) 1167 + return NULL; 1168 + 1169 + idx = array_index_nospec(eci.engine_class, 1170 + ARRAY_SIZE(user_to_xe_engine_class)); 1171 + 1172 + return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), 1173 + user_to_xe_engine_class[idx], 1174 + eci.engine_instance, true); 1170 1175 }

+10

drivers/gpu/drm/xe/xe_hw_engine.h

··· 9 9 #include "xe_hw_engine_types.h" 10 10 11 11 struct drm_printer; 12 + struct drm_xe_engine_class_instance; 13 + struct xe_device; 12 14 13 15 #ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MIN 14 16 #define XE_HW_ENGINE_JOB_TIMEOUT_MIN CONFIG_DRM_XE_JOB_TIMEOUT_MIN ··· 64 62 void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe); 65 63 66 64 bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe); 65 + 66 + struct xe_hw_engine * 67 + xe_hw_engine_lookup(struct xe_device *xe, 68 + struct drm_xe_engine_class_instance eci); 69 + 67 70 static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe) 68 71 { 69 72 return hwe->name; ··· 77 70 const char *xe_hw_engine_class_to_str(enum xe_engine_class class); 78 71 u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe); 79 72 enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe); 73 + 74 + void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, u32 val); 75 + u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg); 80 76 81 77 #endif

+372

drivers/gpu/drm/xe/xe_hw_engine_group.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2024 Intel Corporation 4 + */ 5 + 6 + #include <drm/drm_managed.h> 7 + 8 + #include "xe_assert.h" 9 + #include "xe_device.h" 10 + #include "xe_exec_queue.h" 11 + #include "xe_gt.h" 12 + #include "xe_hw_engine_group.h" 13 + #include "xe_vm.h" 14 + 15 + static void 16 + hw_engine_group_free(struct drm_device *drm, void *arg) 17 + { 18 + struct xe_hw_engine_group *group = arg; 19 + 20 + destroy_workqueue(group->resume_wq); 21 + kfree(group); 22 + } 23 + 24 + static void 25 + hw_engine_group_resume_lr_jobs_func(struct work_struct *w) 26 + { 27 + struct xe_exec_queue *q; 28 + struct xe_hw_engine_group *group = container_of(w, struct xe_hw_engine_group, resume_work); 29 + int err; 30 + enum xe_hw_engine_group_execution_mode previous_mode; 31 + 32 + err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode); 33 + if (err) 34 + return; 35 + 36 + if (previous_mode == EXEC_MODE_LR) 37 + goto put; 38 + 39 + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 40 + if (!xe_vm_in_fault_mode(q->vm)) 41 + continue; 42 + 43 + q->ops->resume(q); 44 + } 45 + 46 + put: 47 + xe_hw_engine_group_put(group); 48 + } 49 + 50 + static struct xe_hw_engine_group * 51 + hw_engine_group_alloc(struct xe_device *xe) 52 + { 53 + struct xe_hw_engine_group *group; 54 + int err; 55 + 56 + group = kzalloc(sizeof(*group), GFP_KERNEL); 57 + if (!group) 58 + return ERR_PTR(-ENOMEM); 59 + 60 + group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0); 61 + if (!group->resume_wq) 62 + return ERR_PTR(-ENOMEM); 63 + 64 + init_rwsem(&group->mode_sem); 65 + INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func); 66 + INIT_LIST_HEAD(&group->exec_queue_list); 67 + 68 + err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group); 69 + if (err) 70 + return ERR_PTR(err); 71 + 72 + return group; 73 + } 74 + 75 + /** 76 + * xe_hw_engine_setup_groups() - Setup the hw engine groups for the gt 77 + * @gt: The gt for which groups are setup 78 + * 79 + * Return: 0 on success, negative error code on error. 80 + */ 81 + int xe_hw_engine_setup_groups(struct xe_gt *gt) 82 + { 83 + struct xe_hw_engine *hwe; 84 + enum xe_hw_engine_id id; 85 + struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs; 86 + struct xe_device *xe = gt_to_xe(gt); 87 + int err; 88 + 89 + group_rcs_ccs = hw_engine_group_alloc(xe); 90 + if (IS_ERR(group_rcs_ccs)) { 91 + err = PTR_ERR(group_rcs_ccs); 92 + goto err_group_rcs_ccs; 93 + } 94 + 95 + group_bcs = hw_engine_group_alloc(xe); 96 + if (IS_ERR(group_bcs)) { 97 + err = PTR_ERR(group_bcs); 98 + goto err_group_bcs; 99 + } 100 + 101 + group_vcs_vecs = hw_engine_group_alloc(xe); 102 + if (IS_ERR(group_vcs_vecs)) { 103 + err = PTR_ERR(group_vcs_vecs); 104 + goto err_group_vcs_vecs; 105 + } 106 + 107 + for_each_hw_engine(hwe, gt, id) { 108 + switch (hwe->class) { 109 + case XE_ENGINE_CLASS_COPY: 110 + hwe->hw_engine_group = group_bcs; 111 + break; 112 + case XE_ENGINE_CLASS_RENDER: 113 + case XE_ENGINE_CLASS_COMPUTE: 114 + hwe->hw_engine_group = group_rcs_ccs; 115 + break; 116 + case XE_ENGINE_CLASS_VIDEO_DECODE: 117 + case XE_ENGINE_CLASS_VIDEO_ENHANCE: 118 + hwe->hw_engine_group = group_vcs_vecs; 119 + break; 120 + case XE_ENGINE_CLASS_OTHER: 121 + break; 122 + default: 123 + drm_warn(&xe->drm, "NOT POSSIBLE"); 124 + } 125 + } 126 + 127 + return 0; 128 + 129 + err_group_vcs_vecs: 130 + kfree(group_vcs_vecs); 131 + err_group_bcs: 132 + kfree(group_bcs); 133 + err_group_rcs_ccs: 134 + kfree(group_rcs_ccs); 135 + 136 + return err; 137 + } 138 + 139 + /** 140 + * xe_hw_engine_group_add_exec_queue() - Add an exec queue to a hw engine group 141 + * @group: The hw engine group 142 + * @q: The exec_queue 143 + * 144 + * Return: 0 on success, 145 + * -EINTR if the lock could not be acquired 146 + */ 147 + int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q) 148 + { 149 + int err; 150 + struct xe_device *xe = gt_to_xe(q->gt); 151 + 152 + xe_assert(xe, group); 153 + xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM)); 154 + xe_assert(xe, q->vm); 155 + 156 + if (xe_vm_in_preempt_fence_mode(q->vm)) 157 + return 0; 158 + 159 + err = down_write_killable(&group->mode_sem); 160 + if (err) 161 + return err; 162 + 163 + if (xe_vm_in_fault_mode(q->vm) && group->cur_mode == EXEC_MODE_DMA_FENCE) { 164 + q->ops->suspend(q); 165 + err = q->ops->suspend_wait(q); 166 + if (err) 167 + goto err_suspend; 168 + 169 + xe_hw_engine_group_resume_faulting_lr_jobs(group); 170 + } 171 + 172 + list_add(&q->hw_engine_group_link, &group->exec_queue_list); 173 + up_write(&group->mode_sem); 174 + 175 + return 0; 176 + 177 + err_suspend: 178 + up_write(&group->mode_sem); 179 + return err; 180 + } 181 + 182 + /** 183 + * xe_hw_engine_group_del_exec_queue() - Delete an exec queue from a hw engine group 184 + * @group: The hw engine group 185 + * @q: The exec_queue 186 + */ 187 + void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q) 188 + { 189 + struct xe_device *xe = gt_to_xe(q->gt); 190 + 191 + xe_assert(xe, group); 192 + xe_assert(xe, q->vm); 193 + 194 + down_write(&group->mode_sem); 195 + 196 + if (!list_empty(&q->hw_engine_group_link)) 197 + list_del(&q->hw_engine_group_link); 198 + 199 + up_write(&group->mode_sem); 200 + } 201 + 202 + /** 203 + * xe_hw_engine_group_resume_faulting_lr_jobs() - Asynchronously resume the hw engine group's 204 + * faulting LR jobs 205 + * @group: The hw engine group 206 + */ 207 + void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group) 208 + { 209 + queue_work(group->resume_wq, &group->resume_work); 210 + } 211 + 212 + /** 213 + * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group 214 + * @group: The hw engine group 215 + * 216 + * Return: 0 on success, negative error code on error. 217 + */ 218 + static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group *group) 219 + { 220 + int err; 221 + struct xe_exec_queue *q; 222 + bool need_resume = false; 223 + 224 + lockdep_assert_held_write(&group->mode_sem); 225 + 226 + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 227 + if (!xe_vm_in_fault_mode(q->vm)) 228 + continue; 229 + 230 + need_resume = true; 231 + q->ops->suspend(q); 232 + } 233 + 234 + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 235 + if (!xe_vm_in_fault_mode(q->vm)) 236 + continue; 237 + 238 + err = q->ops->suspend_wait(q); 239 + if (err) 240 + goto err_suspend; 241 + } 242 + 243 + if (need_resume) 244 + xe_hw_engine_group_resume_faulting_lr_jobs(group); 245 + 246 + return 0; 247 + 248 + err_suspend: 249 + up_write(&group->mode_sem); 250 + return err; 251 + } 252 + 253 + /** 254 + * xe_hw_engine_group_wait_for_dma_fence_jobs() - Wait for dma fence jobs to complete 255 + * @group: The hw engine group 256 + * 257 + * This function is not meant to be called directly from a user IOCTL as dma_fence_wait() 258 + * is not interruptible. 259 + * 260 + * Return: 0 on success, 261 + * -ETIME if waiting for one job failed 262 + */ 263 + static int xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group *group) 264 + { 265 + long timeout; 266 + struct xe_exec_queue *q; 267 + struct dma_fence *fence; 268 + 269 + lockdep_assert_held_write(&group->mode_sem); 270 + 271 + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 272 + if (xe_vm_in_lr_mode(q->vm)) 273 + continue; 274 + 275 + fence = xe_exec_queue_last_fence_get_for_resume(q, q->vm); 276 + timeout = dma_fence_wait(fence, false); 277 + dma_fence_put(fence); 278 + 279 + if (timeout < 0) 280 + return -ETIME; 281 + } 282 + 283 + return 0; 284 + } 285 + 286 + static int switch_mode(struct xe_hw_engine_group *group) 287 + { 288 + int err = 0; 289 + enum xe_hw_engine_group_execution_mode new_mode; 290 + 291 + lockdep_assert_held_write(&group->mode_sem); 292 + 293 + switch (group->cur_mode) { 294 + case EXEC_MODE_LR: 295 + new_mode = EXEC_MODE_DMA_FENCE; 296 + err = xe_hw_engine_group_suspend_faulting_lr_jobs(group); 297 + break; 298 + case EXEC_MODE_DMA_FENCE: 299 + new_mode = EXEC_MODE_LR; 300 + err = xe_hw_engine_group_wait_for_dma_fence_jobs(group); 301 + break; 302 + } 303 + 304 + if (err) 305 + return err; 306 + 307 + group->cur_mode = new_mode; 308 + 309 + return 0; 310 + } 311 + 312 + /** 313 + * xe_hw_engine_group_get_mode() - Get the group to execute in the new mode 314 + * @group: The hw engine group 315 + * @new_mode: The new execution mode 316 + * @previous_mode: Pointer to the previous mode provided for use by caller 317 + * 318 + * Return: 0 if successful, -EINTR if locking failed. 319 + */ 320 + int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, 321 + enum xe_hw_engine_group_execution_mode new_mode, 322 + enum xe_hw_engine_group_execution_mode *previous_mode) 323 + __acquires(&group->mode_sem) 324 + { 325 + int err = down_read_interruptible(&group->mode_sem); 326 + 327 + if (err) 328 + return err; 329 + 330 + *previous_mode = group->cur_mode; 331 + 332 + if (new_mode != group->cur_mode) { 333 + up_read(&group->mode_sem); 334 + err = down_write_killable(&group->mode_sem); 335 + if (err) 336 + return err; 337 + 338 + if (new_mode != group->cur_mode) { 339 + err = switch_mode(group); 340 + if (err) { 341 + up_write(&group->mode_sem); 342 + return err; 343 + } 344 + } 345 + downgrade_write(&group->mode_sem); 346 + } 347 + 348 + return err; 349 + } 350 + 351 + /** 352 + * xe_hw_engine_group_put() - Put the group 353 + * @group: The hw engine group 354 + */ 355 + void xe_hw_engine_group_put(struct xe_hw_engine_group *group) 356 + __releases(&group->mode_sem) 357 + { 358 + up_read(&group->mode_sem); 359 + } 360 + 361 + /** 362 + * xe_hw_engine_group_find_exec_mode() - Find the execution mode for this exec queue 363 + * @q: The exec_queue 364 + */ 365 + enum xe_hw_engine_group_execution_mode 366 + xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q) 367 + { 368 + if (xe_vm_in_fault_mode(q->vm)) 369 + return EXEC_MODE_LR; 370 + else 371 + return EXEC_MODE_DMA_FENCE; 372 + }

+29

drivers/gpu/drm/xe/xe_hw_engine_group.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2024 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_HW_ENGINE_GROUP_H_ 7 + #define _XE_HW_ENGINE_GROUP_H_ 8 + 9 + #include "xe_hw_engine_group_types.h" 10 + 11 + struct drm_device; 12 + struct xe_exec_queue; 13 + struct xe_gt; 14 + 15 + int xe_hw_engine_setup_groups(struct xe_gt *gt); 16 + 17 + int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q); 18 + void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q); 19 + 20 + int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, 21 + enum xe_hw_engine_group_execution_mode new_mode, 22 + enum xe_hw_engine_group_execution_mode *previous_mode); 23 + void xe_hw_engine_group_put(struct xe_hw_engine_group *group); 24 + 25 + enum xe_hw_engine_group_execution_mode 26 + xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q); 27 + void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group); 28 + 29 + #endif

+51

drivers/gpu/drm/xe/xe_hw_engine_group_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2024 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_HW_ENGINE_GROUP_TYPES_H_ 7 + #define _XE_HW_ENGINE_GROUP_TYPES_H_ 8 + 9 + #include "xe_force_wake_types.h" 10 + #include "xe_lrc_types.h" 11 + #include "xe_reg_sr_types.h" 12 + 13 + /** 14 + * enum xe_hw_engine_group_execution_mode - possible execution modes of a hw 15 + * engine group 16 + * 17 + * @EXEC_MODE_LR: execution in long-running mode 18 + * @EXEC_MODE_DMA_FENCE: execution in dma fence mode 19 + */ 20 + enum xe_hw_engine_group_execution_mode { 21 + EXEC_MODE_LR, 22 + EXEC_MODE_DMA_FENCE, 23 + }; 24 + 25 + /** 26 + * struct xe_hw_engine_group - Hardware engine group 27 + * 28 + * hw engines belong to the same group if they share hardware resources in a way 29 + * that prevents them from making progress when one is stuck on a page fault. 30 + */ 31 + struct xe_hw_engine_group { 32 + /** 33 + * @exec_queue_list: list of exec queues attached to this 34 + * xe_hw_engine_group 35 + */ 36 + struct list_head exec_queue_list; 37 + /** @resume_work: worker to resume faulting LR exec queues */ 38 + struct work_struct resume_work; 39 + /** @resume_wq: workqueue to resume faulting LR exec queues */ 40 + struct workqueue_struct *resume_wq; 41 + /** 42 + * @mode_sem: used to protect this group's hardware resources and ensure 43 + * mutual exclusion between execution only in faulting LR mode and 44 + * execution only in DMA_FENCE mode 45 + */ 46 + struct rw_semaphore mode_sem; 47 + /** @cur_mode: current execution mode of this hw engine group */ 48 + enum xe_hw_engine_group_execution_mode cur_mode; 49 + }; 50 + 51 + #endif

+2

drivers/gpu/drm/xe/xe_hw_engine_types.h

··· 150 150 struct xe_hw_engine_class_intf *eclass; 151 151 /** @oa_unit: oa unit for this hw engine */ 152 152 struct xe_oa_unit *oa_unit; 153 + /** @hw_engine_group: the group of hw engines this one belongs to */ 154 + struct xe_hw_engine_group *hw_engine_group; 153 155 }; 154 156 155 157 /**

+26 -11

drivers/gpu/drm/xe/xe_lrc.c

··· 5 5 6 6 #include "xe_lrc.h" 7 7 8 + #include <generated/xe_wa_oob.h> 9 + 8 10 #include <linux/ascii85.h> 9 11 10 12 #include "instructions/xe_mi_commands.h" ··· 26 24 #include "xe_memirq.h" 27 25 #include "xe_sriov.h" 28 26 #include "xe_vm.h" 27 + #include "xe_wa.h" 29 28 30 29 #define LRC_VALID BIT_ULL(0) 31 30 #define LRC_PRIVILEGE BIT_ULL(8) ··· 1584 1581 int state_table_size = 0; 1585 1582 1586 1583 /* 1587 - * At the moment we only need to emit non-register state for the RCS 1588 - * engine. 1584 + * Wa_14019789679 1585 + * 1586 + * If the driver doesn't explicitly emit the SVG instructions while 1587 + * setting up the default LRC, the context switch will write 0's 1588 + * (noops) into the LRC memory rather than the expected instruction 1589 + * headers. Application contexts start out as a copy of the default 1590 + * LRC, and if they also do not emit specific settings for some SVG 1591 + * state, then on context restore they'll unintentionally inherit 1592 + * whatever state setting the previous context had programmed into the 1593 + * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will 1594 + * prevent the hardware from resetting that state back to any specific 1595 + * value). 1596 + * 1597 + * The official workaround only requires emitting 3DSTATE_MESH_CONTROL 1598 + * since that's a specific state setting that can easily cause GPU 1599 + * hangs if unintentionally inherited. However to be safe we'll 1600 + * continue to emit all of the SVG state since it's best not to leak 1601 + * any of the state between contexts, even if that leakage is harmless. 1589 1602 */ 1590 - if (q->hwe->class != XE_ENGINE_CLASS_RENDER) 1591 - return; 1592 - 1593 - switch (GRAPHICS_VERx100(xe)) { 1594 - case 1255: 1595 - case 1270 ... 2004: 1603 + if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) { 1596 1604 state_table = xe_hpg_svg_state; 1597 1605 state_table_size = ARRAY_SIZE(xe_hpg_svg_state); 1598 - break; 1599 - default: 1606 + } 1607 + 1608 + if (!state_table) { 1600 1609 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", 1601 1610 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); 1602 1611 return; ··· 1649 1634 if (!snapshot) 1650 1635 return NULL; 1651 1636 1652 - if (lrc->bo && lrc->bo->vm) 1637 + if (lrc->bo->vm) 1653 1638 xe_vm_get(lrc->bo->vm); 1654 1639 1655 1640 snapshot->context_desc = xe_lrc_ggtt_addr(lrc);

+20 -9

drivers/gpu/drm/xe/xe_migrate.c

··· 442 442 m->q = xe_exec_queue_create_class(xe, primary_gt, vm, 443 443 XE_ENGINE_CLASS_COPY, 444 444 EXEC_QUEUE_FLAG_KERNEL | 445 - EXEC_QUEUE_FLAG_PERMANENT); 445 + EXEC_QUEUE_FLAG_PERMANENT, 0); 446 446 } 447 447 if (IS_ERR(m->q)) { 448 448 xe_vm_close_and_put(vm); ··· 1037 1037 * @m: The migration context. 1038 1038 * @bo: The buffer object @dst is currently bound to. 1039 1039 * @dst: The dst TTM resource to be cleared. 1040 + * @clear_flags: flags to specify which data to clear: CCS, BO, or both. 1040 1041 * 1041 - * Clear the contents of @dst to zero. On flat CCS devices, 1042 - * the CCS metadata is cleared to zero as well on VRAM destinations. 1042 + * Clear the contents of @dst to zero when XE_MIGRATE_CLEAR_FLAG_BO_DATA is set. 1043 + * On flat CCS devices, the CCS metadata is cleared to zero with XE_MIGRATE_CLEAR_FLAG_CCS_DATA. 1044 + * Set XE_MIGRATE_CLEAR_FLAG_FULL to clear bo as well as CCS metadata. 1043 1045 * TODO: Eliminate the @bo argument. 1044 1046 * 1045 1047 * Return: Pointer to a dma_fence representing the last clear batch, or ··· 1050 1048 */ 1051 1049 struct dma_fence *xe_migrate_clear(struct xe_migrate *m, 1052 1050 struct xe_bo *bo, 1053 - struct ttm_resource *dst) 1051 + struct ttm_resource *dst, 1052 + u32 clear_flags) 1054 1053 { 1055 1054 bool clear_vram = mem_type_is_vram(dst->mem_type); 1055 + bool clear_bo_data = XE_MIGRATE_CLEAR_FLAG_BO_DATA & clear_flags; 1056 + bool clear_ccs = XE_MIGRATE_CLEAR_FLAG_CCS_DATA & clear_flags; 1056 1057 struct xe_gt *gt = m->tile->primary_gt; 1057 1058 struct xe_device *xe = gt_to_xe(gt); 1058 - bool clear_system_ccs = (xe_bo_needs_ccs_pages(bo) && !IS_DGFX(xe)) ? true : false; 1059 + bool clear_only_system_ccs = false; 1059 1060 struct dma_fence *fence = NULL; 1060 1061 u64 size = bo->size; 1061 1062 struct xe_res_cursor src_it; 1062 1063 struct ttm_resource *src = dst; 1063 1064 int err; 1065 + 1066 + if (WARN_ON(!clear_bo_data && !clear_ccs)) 1067 + return NULL; 1068 + 1069 + if (!clear_bo_data && clear_ccs && !IS_DGFX(xe)) 1070 + clear_only_system_ccs = true; 1064 1071 1065 1072 if (!clear_vram) 1066 1073 xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it); ··· 1096 1085 batch_size = 2 + 1097 1086 pte_update_size(m, pte_flags, src, &src_it, 1098 1087 &clear_L0, &clear_L0_ofs, &clear_L0_pt, 1099 - clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0, 1088 + clear_bo_data ? emit_clear_cmd_len(gt) : 0, 0, 1100 1089 avail_pts); 1101 1090 1102 1091 if (xe_migrate_needs_ccs_emit(xe)) ··· 1118 1107 if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) 1119 1108 xe_res_next(&src_it, clear_L0); 1120 1109 else 1121 - emit_pte(m, bb, clear_L0_pt, clear_vram, clear_system_ccs, 1110 + emit_pte(m, bb, clear_L0_pt, clear_vram, clear_only_system_ccs, 1122 1111 &src_it, clear_L0, dst); 1123 1112 1124 1113 bb->cs[bb->len++] = MI_BATCH_BUFFER_END; 1125 1114 update_idx = bb->len; 1126 1115 1127 - if (!clear_system_ccs) 1116 + if (clear_bo_data) 1128 1117 emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, clear_vram); 1129 1118 1130 1119 if (xe_migrate_needs_ccs_emit(xe)) { ··· 1183 1172 return ERR_PTR(err); 1184 1173 } 1185 1174 1186 - if (clear_system_ccs) 1175 + if (clear_ccs) 1187 1176 bo->ccs_cleared = true; 1188 1177 1189 1178 return fence;

+7 -2

drivers/gpu/drm/xe/xe_migrate.h

··· 6 6 #ifndef _XE_MIGRATE_ 7 7 #define _XE_MIGRATE_ 8 8 9 - #include <drm/drm_mm.h> 9 + #include <linux/types.h> 10 10 11 11 struct dma_fence; 12 12 struct iosys_map; ··· 102 102 struct ttm_resource *dst, 103 103 bool copy_only_ccs); 104 104 105 + #define XE_MIGRATE_CLEAR_FLAG_BO_DATA BIT(0) 106 + #define XE_MIGRATE_CLEAR_FLAG_CCS_DATA BIT(1) 107 + #define XE_MIGRATE_CLEAR_FLAG_FULL (XE_MIGRATE_CLEAR_FLAG_BO_DATA | \ 108 + XE_MIGRATE_CLEAR_FLAG_CCS_DATA) 105 109 struct dma_fence *xe_migrate_clear(struct xe_migrate *m, 106 110 struct xe_bo *bo, 107 - struct ttm_resource *dst); 111 + struct ttm_resource *dst, 112 + u32 clear_flags); 108 113 109 114 struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m); 110 115

+2 -3

drivers/gpu/drm/xe/xe_mmio.c

··· 29 29 struct xe_tile *tile; 30 30 int id; 31 31 32 - for_each_tile(tile, xe, id) 33 - if (tile != xe_device_get_root_tile(xe)) 34 - tile->mmio.regs = NULL; 32 + for_each_remote_tile(tile, xe, id) 33 + tile->mmio.regs = NULL; 35 34 } 36 35 37 36 /*

+48 -6

drivers/gpu/drm/xe/xe_module.c

··· 8 8 #include <linux/init.h> 9 9 #include <linux/module.h> 10 10 11 + #include <drm/drm_module.h> 12 + 11 13 #include "xe_drv.h" 12 14 #include "xe_hw_fence.h" 13 15 #include "xe_pci.h" 16 + #include "xe_pm.h" 14 17 #include "xe_observation.h" 15 18 #include "xe_sched_job.h" 16 19 17 20 struct xe_modparam xe_modparam = { 18 - .enable_display = true, 21 + .probe_display = true, 19 22 .guc_log_level = 5, 20 23 .force_probe = CONFIG_DRM_XE_FORCE_PROBE, 21 24 .wedged_mode = 1, ··· 28 25 module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); 29 26 MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); 30 27 31 - module_param_named(enable_display, xe_modparam.enable_display, bool, 0444); 32 - MODULE_PARM_DESC(enable_display, "Enable display"); 28 + module_param_named(probe_display, xe_modparam.probe_display, bool, 0444); 29 + MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched (default: true)"); 33 30 34 31 module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, uint, 0600); 35 32 MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size(in MiB)"); ··· 64 61 MODULE_PARM_DESC(wedged_mode, 65 62 "Module's default policy for the wedged mode - 0=never, 1=upon-critical-errors[default], 2=upon-any-hang"); 66 63 64 + static int xe_check_nomodeset(void) 65 + { 66 + if (drm_firmware_drivers_only()) 67 + return -ENODEV; 68 + 69 + return 0; 70 + } 71 + 67 72 struct init_funcs { 68 73 int (*init)(void); 69 74 void (*exit)(void); 70 75 }; 71 76 77 + static void xe_dummy_exit(void) 78 + { 79 + } 80 + 72 81 static const struct init_funcs init_funcs[] = { 82 + { 83 + .init = xe_check_nomodeset, 84 + }, 73 85 { 74 86 .init = xe_hw_fence_module_init, 75 87 .exit = xe_hw_fence_module_exit, ··· 101 83 .init = xe_observation_sysctl_register, 102 84 .exit = xe_observation_sysctl_unregister, 103 85 }, 86 + { 87 + .init = xe_pm_module_init, 88 + .exit = xe_dummy_exit, 89 + }, 104 90 }; 91 + 92 + static int __init xe_call_init_func(unsigned int i) 93 + { 94 + if (WARN_ON(i >= ARRAY_SIZE(init_funcs))) 95 + return 0; 96 + if (!init_funcs[i].init) 97 + return 0; 98 + 99 + return init_funcs[i].init(); 100 + } 101 + 102 + static void xe_call_exit_func(unsigned int i) 103 + { 104 + if (WARN_ON(i >= ARRAY_SIZE(init_funcs))) 105 + return; 106 + if (!init_funcs[i].exit) 107 + return; 108 + 109 + init_funcs[i].exit(); 110 + } 105 111 106 112 static int __init xe_init(void) 107 113 { 108 114 int err, i; 109 115 110 116 for (i = 0; i < ARRAY_SIZE(init_funcs); i++) { 111 - err = init_funcs[i].init(); 117 + err = xe_call_init_func(i); 112 118 if (err) { 113 119 while (i--) 114 - init_funcs[i].exit(); 120 + xe_call_exit_func(i); 115 121 return err; 116 122 } 117 123 } ··· 148 106 int i; 149 107 150 108 for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--) 151 - init_funcs[i].exit(); 109 + xe_call_exit_func(i); 152 110 } 153 111 154 112 module_init(xe_init);

+1 -1

drivers/gpu/drm/xe/xe_module.h

··· 11 11 /* Module modprobe variables */ 12 12 struct xe_modparam { 13 13 bool force_execlist; 14 - bool enable_display; 14 + bool probe_display; 15 15 u32 force_vram_bar_size; 16 16 int guc_log_level; 17 17 char *guc_firmware_path;

+1 -2

drivers/gpu/drm/xe/xe_oa.c

··· 1244 1244 vm_flags_mod(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY, 1245 1245 VM_MAYWRITE | VM_MAYEXEC); 1246 1246 1247 - xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == 1248 - (vma->vm_end - vma->vm_start) >> PAGE_SHIFT); 1247 + xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == vma_pages(vma)); 1249 1248 for (i = 0; i < bo->ttm.ttm->num_pages; i++) { 1250 1249 ret = remap_pfn_range(vma, start, page_to_pfn(bo->ttm.ttm->pages[i]), 1251 1250 PAGE_SIZE, vma->vm_page_prot);

+5 -7

drivers/gpu/drm/xe/xe_pci.c

··· 338 338 static const struct xe_device_desc lnl_desc = { 339 339 PLATFORM(LUNARLAKE), 340 340 .has_display = true, 341 - .require_force_probe = true, 342 341 }; 343 342 344 343 static const struct xe_device_desc bmg_desc = { 345 344 DGFX_FEATURES, 346 345 PLATFORM(BATTLEMAGE), 347 346 .has_display = true, 348 - .require_force_probe = true, 349 347 .has_heci_cscfi = 1, 350 348 }; 351 349 ··· 614 616 xe->info.skip_mtcfg = desc->skip_mtcfg; 615 617 xe->info.skip_pcode = desc->skip_pcode; 616 618 617 - xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && 618 - xe_modparam.enable_display && 619 - desc->has_display; 619 + xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && 620 + xe_modparam.probe_display && 621 + desc->has_display; 620 622 621 623 err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0); 622 624 if (err) ··· 745 747 { 746 748 struct xe_device *xe; 747 749 748 - xe = pci_get_drvdata(pdev); 750 + xe = pdev_to_xe_device(pdev); 749 751 if (!xe) /* driver load aborted, nothing to cleanup */ 750 752 return; 751 753 ··· 827 829 xe->info.media_name, 828 830 xe->info.media_verx100 / 100, 829 831 xe->info.media_verx100 % 100, 830 - str_yes_no(xe->info.enable_display), 832 + str_yes_no(xe->info.probe_display), 831 833 xe->info.dma_mask_size, xe->info.tile_count, 832 834 xe->info.has_heci_gscfi, xe->info.has_heci_cscfi); 833 835

+75 -15

drivers/gpu/drm/xe/xe_pm.c

··· 70 70 */ 71 71 72 72 #ifdef CONFIG_LOCKDEP 73 - static struct lockdep_map xe_pm_runtime_lockdep_map = { 74 - .name = "xe_pm_runtime_lockdep_map" 73 + static struct lockdep_map xe_pm_runtime_d3cold_map = { 74 + .name = "xe_rpm_d3cold_map" 75 + }; 76 + 77 + static struct lockdep_map xe_pm_runtime_nod3cold_map = { 78 + .name = "xe_rpm_nod3cold_map" 75 79 }; 76 80 #endif 81 + 82 + static bool __maybe_unused xe_rpm_reclaim_safe(const struct xe_device *xe) 83 + { 84 + return !xe->d3cold.capable && !xe->info.has_sriov; 85 + } 86 + 87 + static void xe_rpm_lockmap_acquire(const struct xe_device *xe) 88 + { 89 + lock_map_acquire(xe_rpm_reclaim_safe(xe) ? 90 + &xe_pm_runtime_nod3cold_map : 91 + &xe_pm_runtime_d3cold_map); 92 + } 93 + 94 + static void xe_rpm_lockmap_release(const struct xe_device *xe) 95 + { 96 + lock_map_release(xe_rpm_reclaim_safe(xe) ? 97 + &xe_pm_runtime_nod3cold_map : 98 + &xe_pm_runtime_d3cold_map); 99 + } 77 100 78 101 /** 79 102 * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle ··· 377 354 * annotation here and in xe_pm_runtime_get() lockdep will see 378 355 * the potential lock inversion and give us a nice splat. 379 356 */ 380 - lock_map_acquire(&xe_pm_runtime_lockdep_map); 357 + xe_rpm_lockmap_acquire(xe); 381 358 382 359 /* 383 360 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify ··· 388 365 &xe->mem_access.vram_userfault.list, vram_userfault_link) 389 366 xe_bo_runtime_pm_release_mmap_offset(bo); 390 367 mutex_unlock(&xe->mem_access.vram_userfault.lock); 368 + 369 + xe_display_pm_runtime_suspend(xe); 391 370 392 371 if (xe->d3cold.allowed) { 393 372 xe_display_pm_suspend(xe, true); ··· 412 387 out: 413 388 if (err) 414 389 xe_display_pm_resume(xe, true); 415 - lock_map_release(&xe_pm_runtime_lockdep_map); 390 + xe_rpm_lockmap_release(xe); 416 391 xe_pm_write_callback_task(xe, NULL); 417 392 return err; 418 393 } ··· 433 408 /* Disable access_ongoing asserts and prevent recursive pm calls */ 434 409 xe_pm_write_callback_task(xe, current); 435 410 436 - lock_map_acquire(&xe_pm_runtime_lockdep_map); 411 + xe_rpm_lockmap_acquire(xe); 437 412 438 413 if (xe->d3cold.allowed) { 439 414 err = xe_pcode_ready(xe, true); ··· 456 431 for_each_gt(gt, xe, id) 457 432 xe_gt_resume(gt); 458 433 434 + xe_display_pm_runtime_resume(xe); 435 + 459 436 if (xe->d3cold.allowed) { 460 - xe_display_pm_resume(xe, true); 461 437 err = xe_bo_restore_user(xe); 462 438 if (err) 463 439 goto out; 464 440 } 441 + 465 442 out: 466 - lock_map_release(&xe_pm_runtime_lockdep_map); 443 + xe_rpm_lockmap_release(xe); 467 444 xe_pm_write_callback_task(xe, NULL); 468 445 return err; 469 446 } ··· 479 452 * stuff that can happen inside the runtime_resume callback by acquiring 480 453 * a dummy lock (it doesn't protect anything and gets compiled out on 481 454 * non-debug builds). Lockdep then only needs to see the 482 - * xe_pm_runtime_lockdep_map -> runtime_resume callback once, and then can 483 - * hopefully validate all the (callers_locks) -> xe_pm_runtime_lockdep_map. 455 + * xe_pm_runtime_xxx_map -> runtime_resume callback once, and then can 456 + * hopefully validate all the (callers_locks) -> xe_pm_runtime_xxx_map. 484 457 * For example if the (callers_locks) are ever grabbed in the 485 458 * runtime_resume callback, lockdep should give us a nice splat. 486 459 */ 487 - static void pm_runtime_lockdep_prime(void) 460 + static void xe_rpm_might_enter_cb(const struct xe_device *xe) 488 461 { 489 - lock_map_acquire(&xe_pm_runtime_lockdep_map); 490 - lock_map_release(&xe_pm_runtime_lockdep_map); 462 + xe_rpm_lockmap_acquire(xe); 463 + xe_rpm_lockmap_release(xe); 464 + } 465 + 466 + /* 467 + * Prime the lockdep maps for known locking orders that need to 468 + * be supported but that may not always occur on all systems. 469 + */ 470 + static void xe_pm_runtime_lockdep_prime(void) 471 + { 472 + struct dma_resv lockdep_resv; 473 + 474 + dma_resv_init(&lockdep_resv); 475 + lock_map_acquire(&xe_pm_runtime_d3cold_map); 476 + /* D3Cold takes the dma_resv locks to evict bos */ 477 + dma_resv_lock(&lockdep_resv, NULL); 478 + dma_resv_unlock(&lockdep_resv); 479 + lock_map_release(&xe_pm_runtime_d3cold_map); 480 + 481 + /* Shrinkers might like to wake up the device under reclaim. */ 482 + fs_reclaim_acquire(GFP_KERNEL); 483 + lock_map_acquire(&xe_pm_runtime_nod3cold_map); 484 + lock_map_release(&xe_pm_runtime_nod3cold_map); 485 + fs_reclaim_release(GFP_KERNEL); 491 486 } 492 487 493 488 /** ··· 524 475 if (xe_pm_read_callback_task(xe) == current) 525 476 return; 526 477 527 - pm_runtime_lockdep_prime(); 478 + xe_rpm_might_enter_cb(xe); 528 479 pm_runtime_resume(xe->drm.dev); 529 480 } 530 481 ··· 556 507 if (WARN_ON(xe_pm_read_callback_task(xe) == current)) 557 508 return -ELOOP; 558 509 559 - pm_runtime_lockdep_prime(); 510 + xe_rpm_might_enter_cb(xe); 560 511 return pm_runtime_get_sync(xe->drm.dev); 561 512 } 562 513 ··· 624 575 return true; 625 576 } 626 577 627 - pm_runtime_lockdep_prime(); 578 + xe_rpm_might_enter_cb(xe); 628 579 return pm_runtime_resume_and_get(xe->drm.dev) >= 0; 629 580 } 630 581 ··· 715 666 716 667 drm_dbg(&xe->drm, 717 668 "d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed)); 669 + } 670 + 671 + /** 672 + * xe_pm_module_init() - Perform xe_pm specific module initialization. 673 + * 674 + * Return: 0 on success. Currently doesn't fail. 675 + */ 676 + int __init xe_pm_module_init(void) 677 + { 678 + xe_pm_runtime_lockdep_prime(); 679 + return 0; 718 680 }

+1

drivers/gpu/drm/xe/xe_pm.h

··· 32 32 int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold); 33 33 void xe_pm_d3cold_allowed_toggle(struct xe_device *xe); 34 34 struct task_struct *xe_pm_read_callback_task(struct xe_device *xe); 35 + int xe_pm_module_init(void); 35 36 36 37 #endif

+6 -4

drivers/gpu/drm/xe/xe_pt.c

··· 1149 1149 return err; 1150 1150 } 1151 1151 1152 - if (job) 1153 - err = xe_sched_job_last_fence_add_dep(job, vm); 1154 - else 1155 - err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm); 1152 + if (!(pt_update_ops->q->flags & EXEC_QUEUE_FLAG_KERNEL)) { 1153 + if (job) 1154 + err = xe_sched_job_last_fence_add_dep(job, vm); 1155 + else 1156 + err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm); 1157 + } 1156 1158 1157 1159 for (i = 0; job && !err && i < vops->num_syncs; i++) 1158 1160 err = xe_sync_entry_add_deps(&vops->syncs[i], job);

-1

drivers/gpu/drm/xe/xe_res_cursor.h

··· 26 26 27 27 #include <linux/scatterlist.h> 28 28 29 - #include <drm/drm_mm.h> 30 29 #include <drm/ttm/ttm_placement.h> 31 30 #include <drm/ttm/ttm_range_manager.h> 32 31 #include <drm/ttm/ttm_resource.h>

+6 -7

drivers/gpu/drm/xe/xe_sa.c

··· 25 25 26 26 drm_suballoc_manager_fini(&sa_manager->base); 27 27 28 - if (bo->vmap.is_iomem) 28 + if (sa_manager->is_iomem) 29 29 kvfree(sa_manager->cpu_ptr); 30 30 31 - xe_bo_unpin_map_no_vm(bo); 32 31 sa_manager->bo = NULL; 33 32 } 34 33 ··· 46 47 47 48 sa_manager->bo = NULL; 48 49 49 - bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, 50 - XE_BO_FLAG_VRAM_IF_DGFX(tile) | 51 - XE_BO_FLAG_GGTT | 52 - XE_BO_FLAG_GGTT_INVALIDATE); 50 + bo = xe_managed_bo_create_pin_map(xe, tile, size, 51 + XE_BO_FLAG_VRAM_IF_DGFX(tile) | 52 + XE_BO_FLAG_GGTT | 53 + XE_BO_FLAG_GGTT_INVALIDATE); 53 54 if (IS_ERR(bo)) { 54 55 drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n", 55 56 PTR_ERR(bo)); 56 57 return (struct xe_sa_manager *)bo; 57 58 } 58 59 sa_manager->bo = bo; 60 + sa_manager->is_iomem = bo->vmap.is_iomem; 59 61 60 62 drm_suballoc_manager_init(&sa_manager->base, managed_size, align); 61 63 sa_manager->gpu_addr = xe_bo_ggtt_addr(bo); ··· 64 64 if (bo->vmap.is_iomem) { 65 65 sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL); 66 66 if (!sa_manager->cpu_ptr) { 67 - xe_bo_unpin_map_no_vm(sa_manager->bo); 68 67 sa_manager->bo = NULL; 69 68 return ERR_PTR(-ENOMEM); 70 69 }

+1

drivers/gpu/drm/xe/xe_sa_types.h

··· 14 14 struct xe_bo *bo; 15 15 u64 gpu_addr; 16 16 void *cpu_ptr; 17 + bool is_iomem; 17 18 }; 18 19 19 20 #endif

+1 -2

drivers/gpu/drm/xe/xe_sched_job.c

··· 89 89 90 90 if (ptrs->lrc_fence) 91 91 xe_lrc_free_seqno_fence(ptrs->lrc_fence); 92 - if (ptrs->chain_fence) 93 - dma_fence_chain_free(ptrs->chain_fence); 92 + dma_fence_chain_free(ptrs->chain_fence); 94 93 } 95 94 } 96 95

+6 -15

drivers/gpu/drm/xe/xe_sync.c

··· 55 55 struct xe_user_fence *ufence; 56 56 u64 __user *ptr = u64_to_user_ptr(addr); 57 57 58 - if (!access_ok(ptr, sizeof(ptr))) 58 + if (!access_ok(ptr, sizeof(*ptr))) 59 59 return ERR_PTR(-EFAULT); 60 60 61 61 ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); ··· 206 206 207 207 int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job) 208 208 { 209 - int err; 210 - 211 - if (sync->fence) { 212 - err = drm_sched_job_add_dependency(&job->drm, 213 - dma_fence_get(sync->fence)); 214 - if (err) { 215 - dma_fence_put(sync->fence); 216 - return err; 217 - } 218 - } 209 + if (sync->fence) 210 + return drm_sched_job_add_dependency(&job->drm, 211 + dma_fence_get(sync->fence)); 219 212 220 213 return 0; 221 214 } ··· 249 256 { 250 257 if (sync->syncobj) 251 258 drm_syncobj_put(sync->syncobj); 252 - if (sync->fence) 253 - dma_fence_put(sync->fence); 254 - if (sync->chain_fence) 255 - dma_fence_chain_free(sync->chain_fence); 259 + dma_fence_put(sync->fence); 260 + dma_fence_chain_free(sync->chain_fence); 256 261 if (sync->ufence) 257 262 user_fence_put(sync->ufence); 258 263 }

-1

drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c

··· 5 5 */ 6 6 7 7 #include <drm/drm_managed.h> 8 - #include <drm/drm_mm.h> 9 8 10 9 #include <drm/ttm/ttm_device.h> 11 10 #include <drm/ttm/ttm_placement.h>

+12 -1

drivers/gpu/drm/xe/xe_tuning.c

··· 39 39 }, 40 40 { XE_RTP_NAME("Tuning: Compression Overfetch"), 41 41 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), 42 - XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX)), 42 + XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX), 43 + SET(CCCHKNREG1, L3CMPCTRL)) 43 44 }, 44 45 { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"), 45 46 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), 46 47 XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN)) 48 + }, 49 + { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"), 50 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), 51 + XE_RTP_ACTIONS(SET(L3SQCREG2, 52 + COMPMEMRD256BOVRFETCHEN)) 53 + }, 54 + { XE_RTP_NAME("Tuning: Stateless compression control"), 55 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), 56 + XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, 57 + REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) 47 58 }, 48 59 {} 49 60 };

+15 -13

drivers/gpu/drm/xe/xe_uc_fw.c

··· 15 15 #include "xe_gsc.h" 16 16 #include "xe_gt.h" 17 17 #include "xe_gt_printk.h" 18 + #include "xe_guc.h" 18 19 #include "xe_map.h" 19 20 #include "xe_mmio.h" 20 21 #include "xe_module.h" ··· 106 105 }; 107 106 108 107 #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ 109 - fw_def(LUNARLAKE, major_ver(xe, guc, lnl, 70, 19, 2)) \ 110 - fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 19, 2)) \ 111 - fw_def(DG2, major_ver(i915, guc, dg2, 70, 19, 2)) \ 112 - fw_def(DG1, major_ver(i915, guc, dg1, 70, 19, 2)) \ 113 - fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 19, 2)) \ 114 - fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 19, 2)) \ 115 - fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 19, 2)) \ 116 - fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 19, 2)) \ 117 - fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 19, 2)) 108 + fw_def(BATTLEMAGE, major_ver(xe, guc, bmg, 70, 29, 2)) \ 109 + fw_def(LUNARLAKE, major_ver(xe, guc, lnl, 70, 29, 2)) \ 110 + fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 29, 2)) \ 111 + fw_def(DG2, major_ver(i915, guc, dg2, 70, 29, 2)) \ 112 + fw_def(DG1, major_ver(i915, guc, dg1, 70, 29, 2)) \ 113 + fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 29, 2)) \ 114 + fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 29, 2)) \ 115 + fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 29, 2)) \ 116 + fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 29, 2)) \ 117 + fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 29, 2)) 118 118 119 119 #define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \ 120 120 fw_def(BATTLEMAGE, no_ver(xe, huc, bmg)) \ ··· 311 309 312 310 xe_gt_assert(gt, uc_fw->type == XE_UC_FW_TYPE_GUC); 313 311 314 - /* We don't support GuC releases older than 70.19 */ 315 - if (release->major < 70 || (release->major == 70 && release->minor < 19)) { 316 - xe_gt_err(gt, "Unsupported GuC v%u.%u! v70.19 or newer is required\n", 317 - release->major, release->minor); 312 + /* We don't support GuC releases older than 70.29.2 */ 313 + if (MAKE_GUC_VER_STRUCT(*release) < MAKE_GUC_VER(70, 29, 2)) { 314 + xe_gt_err(gt, "Unsupported GuC v%u.%u.%u! v70.29.2 or newer is required\n", 315 + release->major, release->minor, release->patch); 318 316 return -EINVAL; 319 317 } 320 318

+37 -53

drivers/gpu/drm/xe/xe_vm.c

··· 275 275 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM 276 276 * @vm: The VM. 277 277 * @q: The exec_queue 278 + * 279 + * Note that this function might be called multiple times on the same queue. 278 280 */ 279 281 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) 280 282 { ··· 284 282 return; 285 283 286 284 down_write(&vm->lock); 287 - list_del(&q->lr.link); 288 - --vm->preempt.num_exec_queues; 285 + if (!list_empty(&q->lr.link)) { 286 + list_del_init(&q->lr.link); 287 + --vm->preempt.num_exec_queues; 288 + } 289 289 if (q->lr.pfence) { 290 290 dma_fence_enable_sw_signaling(q->lr.pfence); 291 291 dma_fence_put(q->lr.pfence); ··· 1195 1191 .vm_free = xe_vm_free, 1196 1192 }; 1197 1193 1198 - static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index) 1194 + static u64 pde_encode_pat_index(u16 pat_index) 1199 1195 { 1200 1196 u64 pte = 0; 1201 1197 ··· 1208 1204 return pte; 1209 1205 } 1210 1206 1211 - static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index, 1212 - u32 pt_level) 1207 + static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) 1213 1208 { 1214 1209 u64 pte = 0; 1215 1210 ··· 1249 1246 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, 1250 1247 const u16 pat_index) 1251 1248 { 1252 - struct xe_device *xe = xe_bo_device(bo); 1253 1249 u64 pde; 1254 1250 1255 1251 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1256 1252 pde |= XE_PAGE_PRESENT | XE_PAGE_RW; 1257 - pde |= pde_encode_pat_index(xe, pat_index); 1253 + pde |= pde_encode_pat_index(pat_index); 1258 1254 1259 1255 return pde; 1260 1256 } ··· 1261 1259 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 1262 1260 u16 pat_index, u32 pt_level) 1263 1261 { 1264 - struct xe_device *xe = xe_bo_device(bo); 1265 1262 u64 pte; 1266 1263 1267 1264 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 1268 1265 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1269 - pte |= pte_encode_pat_index(xe, pat_index, pt_level); 1266 + pte |= pte_encode_pat_index(pat_index, pt_level); 1270 1267 pte |= pte_encode_ps(pt_level); 1271 1268 1272 1269 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) ··· 1277 1276 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, 1278 1277 u16 pat_index, u32 pt_level) 1279 1278 { 1280 - struct xe_device *xe = xe_vma_vm(vma)->xe; 1281 - 1282 1279 pte |= XE_PAGE_PRESENT; 1283 1280 1284 1281 if (likely(!xe_vma_read_only(vma))) 1285 1282 pte |= XE_PAGE_RW; 1286 1283 1287 - pte |= pte_encode_pat_index(xe, pat_index, pt_level); 1284 + pte |= pte_encode_pat_index(pat_index, pt_level); 1288 1285 pte |= pte_encode_ps(pt_level); 1289 1286 1290 1287 if (unlikely(xe_vma_is_null(vma))) ··· 1302 1303 1303 1304 pte = addr; 1304 1305 pte |= XE_PAGE_PRESENT | XE_PAGE_RW; 1305 - pte |= pte_encode_pat_index(xe, pat_index, pt_level); 1306 + pte |= pte_encode_pat_index(pat_index, pt_level); 1306 1307 pte |= pte_encode_ps(pt_level); 1307 1308 1308 1309 if (devmem) ··· 1482 1483 /* Kernel migration VM shouldn't have a circular loop.. */ 1483 1484 if (!(flags & XE_VM_FLAG_MIGRATION)) { 1484 1485 for_each_tile(tile, xe, id) { 1485 - struct xe_gt *gt = tile->primary_gt; 1486 - struct xe_vm *migrate_vm; 1487 1486 struct xe_exec_queue *q; 1488 1487 u32 create_flags = EXEC_QUEUE_FLAG_VM; 1489 1488 1490 1489 if (!vm->pt_root[id]) 1491 1490 continue; 1492 1491 1493 - migrate_vm = xe_migrate_get_vm(tile->migrate); 1494 - q = xe_exec_queue_create_class(xe, gt, migrate_vm, 1495 - XE_ENGINE_CLASS_COPY, 1496 - create_flags); 1497 - xe_vm_put(migrate_vm); 1492 + q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); 1498 1493 if (IS_ERR(q)) { 1499 1494 err = PTR_ERR(q); 1500 1495 goto err_close; ··· 1500 1507 1501 1508 if (number_tiles > 1) 1502 1509 vm->composite_fence_ctx = dma_fence_context_alloc(1); 1503 - 1504 - mutex_lock(&xe->usm.lock); 1505 - if (flags & XE_VM_FLAG_FAULT_MODE) 1506 - xe->usm.num_vm_in_fault_mode++; 1507 - else if (!(flags & XE_VM_FLAG_MIGRATION)) 1508 - xe->usm.num_vm_in_non_fault_mode++; 1509 - mutex_unlock(&xe->usm.lock); 1510 1510 1511 1511 trace_xe_vm_create(vm); 1512 1512 ··· 1614 1628 up_write(&vm->lock); 1615 1629 1616 1630 mutex_lock(&xe->usm.lock); 1617 - if (vm->flags & XE_VM_FLAG_FAULT_MODE) 1618 - xe->usm.num_vm_in_fault_mode--; 1619 - else if (!(vm->flags & XE_VM_FLAG_MIGRATION)) 1620 - xe->usm.num_vm_in_non_fault_mode--; 1621 - 1622 1631 if (vm->usm.asid) { 1623 1632 void *lookup; 1624 1633 ··· 1749 1768 1750 1769 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && 1751 1770 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) 1752 - return -EINVAL; 1753 - 1754 - if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && 1755 - xe_device_in_non_fault_mode(xe))) 1756 - return -EINVAL; 1757 - 1758 - if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) && 1759 - xe_device_in_fault_mode(xe))) 1760 1771 return -EINVAL; 1761 1772 1762 1773 if (XE_IOCTL_DBG(xe, args->extensions)) ··· 3158 3185 { 3159 3186 struct xe_device *xe = xe_vma_vm(vma)->xe; 3160 3187 struct xe_tile *tile; 3161 - struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE]; 3162 - u32 tile_needs_invalidate = 0; 3188 + struct xe_gt_tlb_invalidation_fence 3189 + fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 3163 3190 u8 id; 3191 + u32 fence_id = 0; 3164 3192 int ret = 0; 3165 3193 3166 3194 xe_assert(xe, !xe_vma_is_null(vma)); ··· 3189 3215 if (xe_pt_zap_ptes(tile, vma)) { 3190 3216 xe_device_wmb(xe); 3191 3217 xe_gt_tlb_invalidation_fence_init(tile->primary_gt, 3192 - &fence[id], true); 3218 + &fence[fence_id], 3219 + true); 3193 3220 3194 - /* 3195 - * FIXME: We potentially need to invalidate multiple 3196 - * GTs within the tile 3197 - */ 3198 3221 ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, 3199 - &fence[id], vma); 3222 + &fence[fence_id], vma); 3200 3223 if (ret < 0) { 3201 - xe_gt_tlb_invalidation_fence_fini(&fence[id]); 3224 + xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]); 3202 3225 goto wait; 3203 3226 } 3227 + ++fence_id; 3204 3228 3205 - tile_needs_invalidate |= BIT(id); 3229 + if (!tile->media_gt) 3230 + continue; 3231 + 3232 + xe_gt_tlb_invalidation_fence_init(tile->media_gt, 3233 + &fence[fence_id], 3234 + true); 3235 + 3236 + ret = xe_gt_tlb_invalidation_vma(tile->media_gt, 3237 + &fence[fence_id], vma); 3238 + if (ret < 0) { 3239 + xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]); 3240 + goto wait; 3241 + } 3242 + ++fence_id; 3206 3243 } 3207 3244 } 3208 3245 3209 3246 wait: 3210 - for_each_tile(tile, xe, id) 3211 - if (tile_needs_invalidate & BIT(id)) 3212 - xe_gt_tlb_invalidation_fence_wait(&fence[id]); 3247 + for (id = 0; id < fence_id; ++id) 3248 + xe_gt_tlb_invalidation_fence_wait(&fence[id]); 3213 3249 3214 3250 vma->tile_invalidated = vma->tile_mask; 3215 3251

-10

drivers/gpu/drm/xe/xe_wa.c

··· 557 557 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 558 558 }, 559 559 560 - /* Xe2_LPM */ 561 - 562 - { XE_RTP_NAME("16021639441"), 563 - XE_RTP_RULES(MEDIA_VERSION(2000)), 564 - XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), 565 - GHWSP_CSB_REPORT_DIS | 566 - PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, 567 - XE_RTP_ACTION_FLAG(ENGINE_BASE))) 568 - }, 569 - 570 560 /* Xe2_HPM */ 571 561 572 562 { XE_RTP_NAME("16021639441"),

+6

drivers/gpu/drm/xe/xe_wa_oob.rules

··· 27 27 16022287689 GRAPHICS_VERSION(2001) 28 28 GRAPHICS_VERSION(2004) 29 29 13011645652 GRAPHICS_VERSION(2004) 30 + 14022293748 GRAPHICS_VERSION(2001) 31 + GRAPHICS_VERSION(2004) 32 + 22019794406 GRAPHICS_VERSION(2001) 33 + GRAPHICS_VERSION(2004) 30 34 22019338487 MEDIA_VERSION(2000) 31 35 GRAPHICS_VERSION(2001) 32 36 22019338487_display PLATFORM(LUNARLAKE) 33 37 16023588340 GRAPHICS_VERSION(2001) 38 + 14019789679 GRAPHICS_VERSION(1255) 39 + GRAPHICS_VERSION_RANGE(1270, 2004)

+53 -1

include/drm/drm_print.h

··· 221 221 222 222 /** 223 223 * struct drm_print_iterator - local struct used with drm_printer_coredump 224 - * @data: Pointer to the devcoredump output buffer 224 + * @data: Pointer to the devcoredump output buffer, can be NULL if using 225 + * drm_printer_coredump to determine size of devcoredump 225 226 * @start: The offset within the buffer to start writing 226 227 * @remain: The number of bytes to write for this iteration 227 228 */ ··· 266 265 * dev_coredumpm(dev, THIS_MODULE, data, 0, GFP_KERNEL, 267 266 * coredump_read, ...) 268 267 * } 268 + * 269 + * The above example has a time complexity of O(N^2), where N is the size of the 270 + * devcoredump. This is acceptable for small devcoredumps but scales poorly for 271 + * larger ones. 272 + * 273 + * Another use case for drm_coredump_printer is to capture the devcoredump into 274 + * a saved buffer before the dev_coredump() callback. This involves two passes: 275 + * one to determine the size of the devcoredump and another to print it to a 276 + * buffer. Then, in dev_coredump(), copy from the saved buffer into the 277 + * devcoredump read buffer. 278 + * 279 + * For example:: 280 + * 281 + * char *devcoredump_saved_buffer; 282 + * 283 + * ssize_t __coredump_print(char *buffer, ssize_t count, ...) 284 + * { 285 + * struct drm_print_iterator iter; 286 + * struct drm_printer p; 287 + * 288 + * iter.data = buffer; 289 + * iter.start = 0; 290 + * iter.remain = count; 291 + * 292 + * p = drm_coredump_printer(&iter); 293 + * 294 + * drm_printf(p, "foo=%d\n", foo); 295 + * ... 296 + * return count - iter.remain; 297 + * } 298 + * 299 + * void coredump_print(...) 300 + * { 301 + * ssize_t count; 302 + * 303 + * count = __coredump_print(NULL, INT_MAX, ...); 304 + * devcoredump_saved_buffer = kvmalloc(count, GFP_KERNEL); 305 + * __coredump_print(devcoredump_saved_buffer, count, ...); 306 + * } 307 + * 308 + * void coredump_read(char *buffer, loff_t offset, size_t count, 309 + * void *data, size_t datalen) 310 + * { 311 + * ... 312 + * memcpy(buffer, devcoredump_saved_buffer + offset, count); 313 + * ... 314 + * } 315 + * 316 + * The above example has a time complexity of O(N*2), where N is the size of the 317 + * devcoredump. This scales better than the previous example for larger 318 + * devcoredumps. 269 319 * 270 320 * RETURNS: 271 321 * The &drm_printer object

Configure Feed

Configure Feed