Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'drm-fixes-2024-04-05' of https://gitlab.freedesktop.org/drm/kernel

Pull drm fixes from Dave Airlie:
"Weekly fixes, mostly xe and i915, amdgpu on a week off, otherwise a
nouveau fix for a crash with new vulkan cts tests, and a couple of
cleanups and misc fixes.

display:
- fix typos in kerneldoc

prime:
- unbreak dma-buf export for virt-gpu

nouveau:
- uvmm: fix remap address calculation
- minor cleanups

panfrost:
- fix power-transition timeouts

xe:
- Stop using system_unbound_wq for preempt fences
- Fix saving unordered rebinding fences by attaching them as kernel
feces to the vm's resv
- Fix TLB invalidation fences completing out of order
- Move rebind TLB invalidation to the ring ops to reduce the latency

i915:
- A few DisplayPort related fixes
- eDP PSR fixes
- Remove some VM space restrictions on older platforms
- Disable automatic load CCS load balancing"

* tag 'drm-fixes-2024-04-05' of https://gitlab.freedesktop.org/drm/kernel: (22 commits)
drm/xe: Use ordered wq for preempt fence waiting
drm/xe: Move vma rebinding to the drm_exec locking loop
drm/xe: Make TLB invalidation fences unordered
drm/xe: Rework rebinding
drm/xe: Use ring ops TLB invalidation for rebinds
drm/i915/mst: Reject FEC+MST on ICL
drm/i915/mst: Limit MST+DSC to TGL+
drm/i915/dp: Fix the computation for compressed_bpp for DISPLAY < 13
drm/i915/gt: Enable only one CCS for compute workload
drm/i915/gt: Do not generate the command streamer for all the CCS
drm/i915/gt: Disable HW load balancing for CCS
drm/i915/gt: Limit the reserved VM space to only the platforms that need it
drm/i915/psr: Fix intel_psr2_sel_fetch_et_alignment usage
drm/i915/psr: Move writing early transport pipe src
drm/i915/psr: Calculate PIPE_SRCSZ_ERLY_TPT value
drm/i915/dp: Remove support for UHBR13.5
drm/i915/dp: Fix DSC state HW readout for SST connectors
drm/display: fix typo
drm/prime: Unbreak virtgpu dma-buf export
nouveau/uvmm: fix addr/range calcs for remap operations
...

+340 -197
+2 -2
drivers/gpu/drm/display/drm_dp_dual_mode_helper.c
··· 52 52 * @adapter: I2C adapter for the DDC bus 53 53 * @offset: register offset 54 54 * @buffer: buffer for return data 55 - * @size: sizo of the buffer 55 + * @size: size of the buffer 56 56 * 57 57 * Reads @size bytes from the DP dual mode adaptor registers 58 58 * starting at @offset. ··· 116 116 * @adapter: I2C adapter for the DDC bus 117 117 * @offset: register offset 118 118 * @buffer: buffer for write data 119 - * @size: sizo of the buffer 119 + * @size: size of the buffer 120 120 * 121 121 * Writes @size bytes to the DP dual mode adaptor registers 122 122 * starting at @offset.
+6 -1
drivers/gpu/drm/drm_prime.c
··· 582 582 { 583 583 struct drm_gem_object *obj = dma_buf->priv; 584 584 585 - if (!obj->funcs->get_sg_table) 585 + /* 586 + * drm_gem_map_dma_buf() requires obj->get_sg_table(), but drivers 587 + * that implement their own ->map_dma_buf() do not. 588 + */ 589 + if (dma_buf->ops->map_dma_buf == drm_gem_map_dma_buf && 590 + !obj->funcs->get_sg_table) 586 591 return -ENOSYS; 587 592 588 593 return drm_gem_pin(obj);
+1
drivers/gpu/drm/i915/Makefile
··· 118 118 gt/intel_ggtt_fencing.o \ 119 119 gt/intel_gt.o \ 120 120 gt/intel_gt_buffer_pool.o \ 121 + gt/intel_gt_ccs_mode.o \ 121 122 gt/intel_gt_clock_utils.o \ 122 123 gt/intel_gt_debugfs.o \ 123 124 gt/intel_gt_engines_debugfs.o \
-9
drivers/gpu/drm/i915/display/intel_display.c
··· 2709 2709 */ 2710 2710 intel_de_write(dev_priv, PIPESRC(pipe), 2711 2711 PIPESRC_WIDTH(width - 1) | PIPESRC_HEIGHT(height - 1)); 2712 - 2713 - if (!crtc_state->enable_psr2_su_region_et) 2714 - return; 2715 - 2716 - width = drm_rect_width(&crtc_state->psr2_su_area); 2717 - height = drm_rect_height(&crtc_state->psr2_su_area); 2718 - 2719 - intel_de_write(dev_priv, PIPE_SRCSZ_ERLY_TPT(pipe), 2720 - PIPESRC_WIDTH(width - 1) | PIPESRC_HEIGHT(height - 1)); 2721 2712 } 2722 2713 2723 2714 static bool intel_pipe_is_interlaced(const struct intel_crtc_state *crtc_state)
+1
drivers/gpu/drm/i915/display/intel_display_device.h
··· 47 47 #define HAS_DPT(i915) (DISPLAY_VER(i915) >= 13) 48 48 #define HAS_DSB(i915) (DISPLAY_INFO(i915)->has_dsb) 49 49 #define HAS_DSC(__i915) (DISPLAY_RUNTIME_INFO(__i915)->has_dsc) 50 + #define HAS_DSC_MST(__i915) (DISPLAY_VER(__i915) >= 12 && HAS_DSC(__i915)) 50 51 #define HAS_FBC(i915) (DISPLAY_RUNTIME_INFO(i915)->fbc_mask != 0) 51 52 #define HAS_FPGA_DBG_UNCLAIMED(i915) (DISPLAY_INFO(i915)->has_fpga_dbg) 52 53 #define HAS_FW_BLC(i915) (DISPLAY_VER(i915) >= 3)
+2
drivers/gpu/drm/i915/display/intel_display_types.h
··· 1423 1423 1424 1424 u32 psr2_man_track_ctl; 1425 1425 1426 + u32 pipe_srcsz_early_tpt; 1427 + 1426 1428 struct drm_rect psr2_su_area; 1427 1429 1428 1430 /* Variable Refresh Rate state */
+7 -4
drivers/gpu/drm/i915/display/intel_dp.c
··· 499 499 /* The values must be in increasing order */ 500 500 static const int mtl_rates[] = { 501 501 162000, 216000, 243000, 270000, 324000, 432000, 540000, 675000, 502 - 810000, 1000000, 1350000, 2000000, 502 + 810000, 1000000, 2000000, 503 503 }; 504 504 static const int icl_rates[] = { 505 505 162000, 216000, 270000, 324000, 432000, 540000, 648000, 810000, ··· 1422 1422 if (DISPLAY_VER(dev_priv) >= 12) 1423 1423 return true; 1424 1424 1425 - if (DISPLAY_VER(dev_priv) == 11 && encoder->port != PORT_A) 1425 + if (DISPLAY_VER(dev_priv) == 11 && encoder->port != PORT_A && 1426 + !intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST)) 1426 1427 return true; 1427 1428 1428 1429 return false; ··· 1918 1917 dsc_max_bpp = min(dsc_max_bpp, pipe_bpp - 1); 1919 1918 1920 1919 for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp); i++) { 1921 - if (valid_dsc_bpp[i] < dsc_min_bpp || 1922 - valid_dsc_bpp[i] > dsc_max_bpp) 1920 + if (valid_dsc_bpp[i] < dsc_min_bpp) 1921 + continue; 1922 + if (valid_dsc_bpp[i] > dsc_max_bpp) 1923 1923 break; 1924 1924 1925 1925 ret = dsc_compute_link_config(intel_dp, ··· 6559 6557 intel_connector->get_hw_state = intel_ddi_connector_get_hw_state; 6560 6558 else 6561 6559 intel_connector->get_hw_state = intel_connector_get_hw_state; 6560 + intel_connector->sync_state = intel_dp_connector_sync_state; 6562 6561 6563 6562 if (!intel_edp_init_connector(intel_dp, intel_connector)) { 6564 6563 intel_dp_aux_fini(intel_dp);
+1 -1
drivers/gpu/drm/i915/display/intel_dp_mst.c
··· 1355 1355 return 0; 1356 1356 } 1357 1357 1358 - if (DISPLAY_VER(dev_priv) >= 10 && 1358 + if (HAS_DSC_MST(dev_priv) && 1359 1359 drm_dp_sink_supports_dsc(intel_connector->dp.dsc_dpcd)) { 1360 1360 /* 1361 1361 * TBD pass the connector BPC,
+56 -22
drivers/gpu/drm/i915/display/intel_psr.c
··· 1994 1994 1995 1995 void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_state) 1996 1996 { 1997 + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); 1997 1998 struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); 1998 1999 enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; 1999 2000 struct intel_encoder *encoder; ··· 2014 2013 2015 2014 intel_de_write(dev_priv, PSR2_MAN_TRK_CTL(cpu_transcoder), 2016 2015 crtc_state->psr2_man_track_ctl); 2016 + 2017 + if (!crtc_state->enable_psr2_su_region_et) 2018 + return; 2019 + 2020 + intel_de_write(dev_priv, PIPE_SRCSZ_ERLY_TPT(crtc->pipe), 2021 + crtc_state->pipe_srcsz_early_tpt); 2017 2022 } 2018 2023 2019 2024 static void psr2_man_trk_ctl_calc(struct intel_crtc_state *crtc_state, ··· 2056 2049 } 2057 2050 exit: 2058 2051 crtc_state->psr2_man_track_ctl = val; 2052 + } 2053 + 2054 + static u32 psr2_pipe_srcsz_early_tpt_calc(struct intel_crtc_state *crtc_state, 2055 + bool full_update) 2056 + { 2057 + int width, height; 2058 + 2059 + if (!crtc_state->enable_psr2_su_region_et || full_update) 2060 + return 0; 2061 + 2062 + width = drm_rect_width(&crtc_state->psr2_su_area); 2063 + height = drm_rect_height(&crtc_state->psr2_su_area); 2064 + 2065 + return PIPESRC_WIDTH(width - 1) | PIPESRC_HEIGHT(height - 1); 2059 2066 } 2060 2067 2061 2068 static void clip_area_update(struct drm_rect *overlap_damage_area, ··· 2116 2095 * cursor fully when cursor is in SU area. 2117 2096 */ 2118 2097 static void 2119 - intel_psr2_sel_fetch_et_alignment(struct intel_crtc_state *crtc_state, 2120 - struct intel_plane_state *cursor_state) 2098 + intel_psr2_sel_fetch_et_alignment(struct intel_atomic_state *state, 2099 + struct intel_crtc *crtc) 2121 2100 { 2122 - struct drm_rect inter; 2101 + struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); 2102 + struct intel_plane_state *new_plane_state; 2103 + struct intel_plane *plane; 2104 + int i; 2123 2105 2124 - if (!crtc_state->enable_psr2_su_region_et || 2125 - !cursor_state->uapi.visible) 2106 + if (!crtc_state->enable_psr2_su_region_et) 2126 2107 return; 2127 2108 2128 - inter = crtc_state->psr2_su_area; 2129 - if (!drm_rect_intersect(&inter, &cursor_state->uapi.dst)) 2130 - return; 2109 + for_each_new_intel_plane_in_state(state, plane, new_plane_state, i) { 2110 + struct drm_rect inter; 2131 2111 2132 - clip_area_update(&crtc_state->psr2_su_area, &cursor_state->uapi.dst, 2133 - &crtc_state->pipe_src); 2112 + if (new_plane_state->uapi.crtc != crtc_state->uapi.crtc) 2113 + continue; 2114 + 2115 + if (plane->id != PLANE_CURSOR) 2116 + continue; 2117 + 2118 + if (!new_plane_state->uapi.visible) 2119 + continue; 2120 + 2121 + inter = crtc_state->psr2_su_area; 2122 + if (!drm_rect_intersect(&inter, &new_plane_state->uapi.dst)) 2123 + continue; 2124 + 2125 + clip_area_update(&crtc_state->psr2_su_area, &new_plane_state->uapi.dst, 2126 + &crtc_state->pipe_src); 2127 + } 2134 2128 } 2135 2129 2136 2130 /* ··· 2188 2152 { 2189 2153 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 2190 2154 struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); 2191 - struct intel_plane_state *new_plane_state, *old_plane_state, 2192 - *cursor_plane_state = NULL; 2155 + struct intel_plane_state *new_plane_state, *old_plane_state; 2193 2156 struct intel_plane *plane; 2194 2157 bool full_update = false; 2195 2158 int i, ret; ··· 2273 2238 damaged_area.x2 += new_plane_state->uapi.dst.x1 - src.x1; 2274 2239 2275 2240 clip_area_update(&crtc_state->psr2_su_area, &damaged_area, &crtc_state->pipe_src); 2276 - 2277 - /* 2278 - * Cursor plane new state is stored to adjust su area to cover 2279 - * cursor are fully. 2280 - */ 2281 - if (plane->id == PLANE_CURSOR) 2282 - cursor_plane_state = new_plane_state; 2283 2241 } 2284 2242 2285 2243 /* ··· 2301 2273 if (ret) 2302 2274 return ret; 2303 2275 2304 - /* Adjust su area to cover cursor fully as necessary */ 2305 - if (cursor_plane_state) 2306 - intel_psr2_sel_fetch_et_alignment(crtc_state, cursor_plane_state); 2276 + /* 2277 + * Adjust su area to cover cursor fully as necessary (early 2278 + * transport). This needs to be done after 2279 + * drm_atomic_add_affected_planes to ensure visible cursor is added into 2280 + * affected planes even when cursor is not updated by itself. 2281 + */ 2282 + intel_psr2_sel_fetch_et_alignment(state, crtc); 2307 2283 2308 2284 intel_psr2_sel_fetch_pipe_alignment(crtc_state); 2309 2285 ··· 2370 2338 2371 2339 skip_sel_fetch_set_loop: 2372 2340 psr2_man_trk_ctl_calc(crtc_state, full_update); 2341 + crtc_state->pipe_srcsz_early_tpt = 2342 + psr2_pipe_srcsz_early_tpt_calc(crtc_state, full_update); 2373 2343 return 0; 2374 2344 } 2375 2345
+3
drivers/gpu/drm/i915/gt/gen8_ppgtt.c
··· 961 961 struct i915_vma *vma; 962 962 int ret; 963 963 964 + if (!intel_gt_needs_wa_16018031267(vm->gt)) 965 + return 0; 966 + 964 967 /* The memory will be used only by GPU. */ 965 968 obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, 966 969 I915_BO_ALLOC_VOLATILE |
+17
drivers/gpu/drm/i915/gt/intel_engine_cs.c
··· 908 908 info->engine_mask &= ~BIT(GSC0); 909 909 } 910 910 911 + /* 912 + * Do not create the command streamer for CCS slices beyond the first. 913 + * All the workload submitted to the first engine will be shared among 914 + * all the slices. 915 + * 916 + * Once the user will be allowed to customize the CCS mode, then this 917 + * check needs to be removed. 918 + */ 919 + if (IS_DG2(gt->i915)) { 920 + u8 first_ccs = __ffs(CCS_MASK(gt)); 921 + 922 + /* Mask off all the CCS engine */ 923 + info->engine_mask &= ~GENMASK(CCS3, CCS0); 924 + /* Put back in the first CCS engine */ 925 + info->engine_mask |= BIT(_CCS(first_ccs)); 926 + } 927 + 911 928 return info->engine_mask; 912 929 } 913 930
+6
drivers/gpu/drm/i915/gt/intel_gt.c
··· 1024 1024 return I915_MAP_WC; 1025 1025 } 1026 1026 1027 + bool intel_gt_needs_wa_16018031267(struct intel_gt *gt) 1028 + { 1029 + /* Wa_16018031267, Wa_16018063123 */ 1030 + return IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 55), IP_VER(12, 71)); 1031 + } 1032 + 1027 1033 bool intel_gt_needs_wa_22016122933(struct intel_gt *gt) 1028 1034 { 1029 1035 return MEDIA_VER_FULL(gt->i915) == IP_VER(13, 0) && gt->type == GT_MEDIA;
+5 -4
drivers/gpu/drm/i915/gt/intel_gt.h
··· 82 82 ##__VA_ARGS__); \ 83 83 } while (0) 84 84 85 - #define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \ 86 - IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 55), IP_VER(12, 71)) && \ 87 - engine->class == COPY_ENGINE_CLASS && engine->instance == 0) 88 - 89 85 static inline bool gt_is_root(struct intel_gt *gt) 90 86 { 91 87 return !gt->info.id; 92 88 } 93 89 90 + bool intel_gt_needs_wa_16018031267(struct intel_gt *gt); 94 91 bool intel_gt_needs_wa_22016122933(struct intel_gt *gt); 92 + 93 + #define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \ 94 + intel_gt_needs_wa_16018031267(engine->gt) && \ 95 + engine->class == COPY_ENGINE_CLASS && engine->instance == 0) 95 96 96 97 static inline struct intel_gt *uc_to_gt(struct intel_uc *uc) 97 98 {
+39
drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2024 Intel Corporation 4 + */ 5 + 6 + #include "i915_drv.h" 7 + #include "intel_gt.h" 8 + #include "intel_gt_ccs_mode.h" 9 + #include "intel_gt_regs.h" 10 + 11 + void intel_gt_apply_ccs_mode(struct intel_gt *gt) 12 + { 13 + int cslice; 14 + u32 mode = 0; 15 + int first_ccs = __ffs(CCS_MASK(gt)); 16 + 17 + if (!IS_DG2(gt->i915)) 18 + return; 19 + 20 + /* Build the value for the fixed CCS load balancing */ 21 + for (cslice = 0; cslice < I915_MAX_CCS; cslice++) { 22 + if (CCS_MASK(gt) & BIT(cslice)) 23 + /* 24 + * If available, assign the cslice 25 + * to the first available engine... 26 + */ 27 + mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs); 28 + 29 + else 30 + /* 31 + * ... otherwise, mark the cslice as 32 + * unavailable if no CCS dispatches here 33 + */ 34 + mode |= XEHP_CCS_MODE_CSLICE(cslice, 35 + XEHP_CCS_MODE_CSLICE_MASK); 36 + } 37 + 38 + intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode); 39 + }
+13
drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2024 Intel Corporation 4 + */ 5 + 6 + #ifndef __INTEL_GT_CCS_MODE_H__ 7 + #define __INTEL_GT_CCS_MODE_H__ 8 + 9 + struct intel_gt; 10 + 11 + void intel_gt_apply_ccs_mode(struct intel_gt *gt); 12 + 13 + #endif /* __INTEL_GT_CCS_MODE_H__ */
+6
drivers/gpu/drm/i915/gt/intel_gt_regs.h
··· 1477 1477 #define ECOBITS_PPGTT_CACHE4B (0 << 8) 1478 1478 1479 1479 #define GEN12_RCU_MODE _MMIO(0x14800) 1480 + #define XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1) 1480 1481 #define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0) 1482 + 1483 + #define XEHP_CCS_MODE _MMIO(0x14804) 1484 + #define XEHP_CCS_MODE_CSLICE_MASK REG_GENMASK(2, 0) /* CCS0-3 + rsvd */ 1485 + #define XEHP_CCS_MODE_CSLICE_WIDTH ilog2(XEHP_CCS_MODE_CSLICE_MASK + 1) 1486 + #define XEHP_CCS_MODE_CSLICE(cslice, ccs) (ccs << (cslice * XEHP_CCS_MODE_CSLICE_WIDTH)) 1481 1487 1482 1488 #define CHV_FUSE_GT _MMIO(VLV_GUNIT_BASE + 0x2168) 1483 1489 #define CHV_FGT_DISABLE_SS0 (1 << 10)
+28 -2
drivers/gpu/drm/i915/gt/intel_workarounds.c
··· 10 10 #include "intel_engine_regs.h" 11 11 #include "intel_gpu_commands.h" 12 12 #include "intel_gt.h" 13 + #include "intel_gt_ccs_mode.h" 13 14 #include "intel_gt_mcr.h" 14 15 #include "intel_gt_print.h" 15 16 #include "intel_gt_regs.h" ··· 52 51 * registers belonging to BCS, VCS or VECS should be implemented in 53 52 * xcs_engine_wa_init(). Workarounds for registers not belonging to a specific 54 53 * engine's MMIO range but that are part of of the common RCS/CCS reset domain 55 - * should be implemented in general_render_compute_wa_init(). 54 + * should be implemented in general_render_compute_wa_init(). The settings 55 + * about the CCS load balancing should be added in ccs_engine_wa_mode(). 56 56 * 57 57 * - GT workarounds: the list of these WAs is applied whenever these registers 58 58 * revert to their default values: on GPU reset, suspend/resume [1]_, etc. ··· 2856 2854 wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC); 2857 2855 } 2858 2856 2857 + static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal) 2858 + { 2859 + struct intel_gt *gt = engine->gt; 2860 + 2861 + if (!IS_DG2(gt->i915)) 2862 + return; 2863 + 2864 + /* 2865 + * Wa_14019159160: This workaround, along with others, leads to 2866 + * significant challenges in utilizing load balancing among the 2867 + * CCS slices. Consequently, an architectural decision has been 2868 + * made to completely disable automatic CCS load balancing. 2869 + */ 2870 + wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE); 2871 + 2872 + /* 2873 + * After having disabled automatic load balancing we need to 2874 + * assign all slices to a single CCS. We will call it CCS mode 1 2875 + */ 2876 + intel_gt_apply_ccs_mode(gt); 2877 + } 2878 + 2859 2879 /* 2860 2880 * The workarounds in this function apply to shared registers in 2861 2881 * the general render reset domain that aren't tied to a ··· 3028 3004 * to a single RCS/CCS engine's workaround list since 3029 3005 * they're reset as part of the general render domain reset. 3030 3006 */ 3031 - if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) 3007 + if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) { 3032 3008 general_render_compute_wa_init(engine, wal); 3009 + ccs_engine_wa_mode(engine, wal); 3010 + } 3033 3011 3034 3012 if (engine->class == COMPUTE_CLASS) 3035 3013 ccs_engine_wa_init(engine, wal);
+3 -3
drivers/gpu/drm/nouveau/nouveau_uvmm.c
··· 812 812 struct drm_gpuva_op_unmap *u = r->unmap; 813 813 struct nouveau_uvma *uvma = uvma_from_va(u->va); 814 814 u64 addr = uvma->va.va.addr; 815 - u64 range = uvma->va.va.range; 815 + u64 end = uvma->va.va.addr + uvma->va.va.range; 816 816 817 817 if (r->prev) 818 818 addr = r->prev->va.addr + r->prev->va.range; 819 819 820 820 if (r->next) 821 - range = r->next->va.addr - addr; 821 + end = r->next->va.addr; 822 822 823 - op_unmap_range(u, addr, range); 823 + op_unmap_range(u, addr, end - addr); 824 824 } 825 825 826 826 static int
+1 -1
drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
··· 420 420 return ret; 421 421 } else { 422 422 ret = nvkm_memory_map(gr->attrib_cb, 0, chan->vmm, chan->attrib_cb, 423 - &args, sizeof(args));; 423 + &args, sizeof(args)); 424 424 if (ret) 425 425 return ret; 426 426 }
+3 -3
drivers/gpu/drm/panfrost/panfrost_gpu.c
··· 441 441 442 442 gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present); 443 443 ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_PWRTRANS_LO, 444 - val, !val, 1, 1000); 444 + val, !val, 1, 2000); 445 445 if (ret) 446 446 dev_err(pfdev->dev, "shader power transition timeout"); 447 447 448 448 gpu_write(pfdev, TILER_PWROFF_LO, pfdev->features.tiler_present); 449 449 ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_PWRTRANS_LO, 450 - val, !val, 1, 1000); 450 + val, !val, 1, 2000); 451 451 if (ret) 452 452 dev_err(pfdev->dev, "tiler power transition timeout"); 453 453 454 454 gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present); 455 455 ret = readl_poll_timeout(pfdev->iomem + L2_PWRTRANS_LO, 456 - val, !val, 0, 1000); 456 + val, !val, 0, 2000); 457 457 if (ret) 458 458 dev_err(pfdev->dev, "l2 power transition timeout"); 459 459 }
+10 -1
drivers/gpu/drm/xe/xe_device.c
··· 193 193 { 194 194 struct xe_device *xe = to_xe_device(dev); 195 195 196 + if (xe->preempt_fence_wq) 197 + destroy_workqueue(xe->preempt_fence_wq); 198 + 196 199 if (xe->ordered_wq) 197 200 destroy_workqueue(xe->ordered_wq); 198 201 ··· 261 258 INIT_LIST_HEAD(&xe->pinned.external_vram); 262 259 INIT_LIST_HEAD(&xe->pinned.evicted); 263 260 261 + xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0); 264 262 xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); 265 263 xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); 266 - if (!xe->ordered_wq || !xe->unordered_wq) { 264 + if (!xe->ordered_wq || !xe->unordered_wq || 265 + !xe->preempt_fence_wq) { 266 + /* 267 + * Cleanup done in xe_device_destroy via 268 + * drmm_add_action_or_reset register above 269 + */ 267 270 drm_err(&xe->drm, "Failed to allocate xe workqueues\n"); 268 271 err = -ENOMEM; 269 272 goto err;
+3
drivers/gpu/drm/xe/xe_device_types.h
··· 363 363 /** @ufence_wq: user fence wait queue */ 364 364 wait_queue_head_t ufence_wq; 365 365 366 + /** @preempt_fence_wq: used to serialize preempt fences */ 367 + struct workqueue_struct *preempt_fence_wq; 368 + 366 369 /** @ordered_wq: used to serialize compute mode resume */ 367 370 struct workqueue_struct *ordered_wq; 368 371
+7 -72
drivers/gpu/drm/xe/xe_exec.c
··· 94 94 * Unlock all 95 95 */ 96 96 97 + /* 98 + * Add validation and rebinding to the drm_exec locking loop, since both can 99 + * trigger eviction which may require sleeping dma_resv locks. 100 + */ 97 101 static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec) 98 102 { 99 103 struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm); 100 - struct drm_gem_object *obj; 101 - unsigned long index; 102 - int num_fences; 103 - int ret; 104 104 105 - ret = drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec); 106 - if (ret) 107 - return ret; 108 - 109 - /* 110 - * 1 fence slot for the final submit, and 1 more for every per-tile for 111 - * GPU bind and 1 extra for CPU bind. Note that there are potentially 112 - * many vma per object/dma-resv, however the fence slot will just be 113 - * re-used, since they are largely the same timeline and the seqno 114 - * should be in order. In the case of CPU bind there is dummy fence used 115 - * for all CPU binds, so no need to have a per-tile slot for that. 116 - */ 117 - num_fences = 1 + 1 + vm->xe->info.tile_count; 118 - 119 - /* 120 - * We don't know upfront exactly how many fence slots we will need at 121 - * the start of the exec, since the TTM bo_validate above can consume 122 - * numerous fence slots. Also due to how the dma_resv_reserve_fences() 123 - * works it only ensures that at least that many fence slots are 124 - * available i.e if there are already 10 slots available and we reserve 125 - * two more, it can just noop without reserving anything. With this it 126 - * is quite possible that TTM steals some of the fence slots and then 127 - * when it comes time to do the vma binding and final exec stage we are 128 - * lacking enough fence slots, leading to some nasty BUG_ON() when 129 - * adding the fences. Hence just add our own fences here, after the 130 - * validate stage. 131 - */ 132 - drm_exec_for_each_locked_object(&vm_exec->exec, index, obj) { 133 - ret = dma_resv_reserve_fences(obj->resv, num_fences); 134 - if (ret) 135 - return ret; 136 - } 137 - 138 - return 0; 105 + /* The fence slot added here is intended for the exec sched job. */ 106 + return xe_vm_validate_rebind(vm, &vm_exec->exec, 1); 139 107 } 140 108 141 109 int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ··· 120 152 struct drm_exec *exec = &vm_exec.exec; 121 153 u32 i, num_syncs = 0, num_ufence = 0; 122 154 struct xe_sched_job *job; 123 - struct dma_fence *rebind_fence; 124 155 struct xe_vm *vm; 125 156 bool write_locked, skip_retry = false; 126 157 ktime_t end = 0; ··· 257 290 goto err_exec; 258 291 } 259 292 260 - /* 261 - * Rebind any invalidated userptr or evicted BOs in the VM, non-compute 262 - * VM mode only. 263 - */ 264 - rebind_fence = xe_vm_rebind(vm, false); 265 - if (IS_ERR(rebind_fence)) { 266 - err = PTR_ERR(rebind_fence); 267 - goto err_put_job; 268 - } 269 - 270 - /* 271 - * We store the rebind_fence in the VM so subsequent execs don't get 272 - * scheduled before the rebinds of userptrs / evicted BOs is complete. 273 - */ 274 - if (rebind_fence) { 275 - dma_fence_put(vm->rebind_fence); 276 - vm->rebind_fence = rebind_fence; 277 - } 278 - if (vm->rebind_fence) { 279 - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 280 - &vm->rebind_fence->flags)) { 281 - dma_fence_put(vm->rebind_fence); 282 - vm->rebind_fence = NULL; 283 - } else { 284 - dma_fence_get(vm->rebind_fence); 285 - err = drm_sched_job_add_dependency(&job->drm, 286 - vm->rebind_fence); 287 - if (err) 288 - goto err_put_job; 289 - } 290 - } 291 - 292 - /* Wait behind munmap style rebinds */ 293 + /* Wait behind rebinds */ 293 294 if (!xe_vm_in_lr_mode(vm)) { 294 295 err = drm_sched_job_add_resv_dependencies(&job->drm, 295 296 xe_vm_resv(vm),
+5
drivers/gpu/drm/xe/xe_exec_queue_types.h
··· 148 148 const struct xe_ring_ops *ring_ops; 149 149 /** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */ 150 150 struct drm_sched_entity *entity; 151 + /** 152 + * @tlb_flush_seqno: The seqno of the last rebind tlb flush performed 153 + * Protected by @vm's resv. Unused if @vm == NULL. 154 + */ 155 + u64 tlb_flush_seqno; 151 156 /** @lrc: logical ring context for this exec queue */ 152 157 struct xe_lrc lrc[]; 153 158 };
+1 -2
drivers/gpu/drm/xe/xe_gt_pagefault.c
··· 100 100 { 101 101 struct xe_bo *bo = xe_vma_bo(vma); 102 102 struct xe_vm *vm = xe_vma_vm(vma); 103 - unsigned int num_shared = 2; /* slots for bind + move */ 104 103 int err; 105 104 106 - err = xe_vm_prepare_vma(exec, vma, num_shared); 105 + err = xe_vm_lock_vma(exec, vma); 107 106 if (err) 108 107 return err; 109 108
-1
drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
··· 61 61 INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences); 62 62 spin_lock_init(&gt->tlb_invalidation.pending_lock); 63 63 spin_lock_init(&gt->tlb_invalidation.lock); 64 - gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1); 65 64 INIT_DELAYED_WORK(&gt->tlb_invalidation.fence_tdr, 66 65 xe_gt_tlb_fence_timeout); 67 66
-7
drivers/gpu/drm/xe/xe_gt_types.h
··· 177 177 * xe_gt_tlb_fence_timeout after the timeut interval is over. 178 178 */ 179 179 struct delayed_work fence_tdr; 180 - /** @tlb_invalidation.fence_context: context for TLB invalidation fences */ 181 - u64 fence_context; 182 - /** 183 - * @tlb_invalidation.fence_seqno: seqno to TLB invalidation fences, protected by 184 - * tlb_invalidation.lock 185 - */ 186 - u32 fence_seqno; 187 180 /** @tlb_invalidation.lock: protects TLB invalidation fences */ 188 181 spinlock_t lock; 189 182 } tlb_invalidation;
+1 -1
drivers/gpu/drm/xe/xe_preempt_fence.c
··· 49 49 struct xe_exec_queue *q = pfence->q; 50 50 51 51 pfence->error = q->ops->suspend(q); 52 - queue_work(system_unbound_wq, &pfence->preempt_work); 52 + queue_work(q->vm->xe->preempt_fence_wq, &pfence->preempt_work); 53 53 return true; 54 54 } 55 55
+20 -5
drivers/gpu/drm/xe/xe_pt.c
··· 1135 1135 spin_lock_irq(&gt->tlb_invalidation.lock); 1136 1136 dma_fence_init(&ifence->base.base, &invalidation_fence_ops, 1137 1137 &gt->tlb_invalidation.lock, 1138 - gt->tlb_invalidation.fence_context, 1139 - ++gt->tlb_invalidation.fence_seqno); 1138 + dma_fence_context_alloc(1), 1); 1140 1139 spin_unlock_irq(&gt->tlb_invalidation.lock); 1141 1140 1142 1141 INIT_LIST_HEAD(&ifence->base.link); ··· 1235 1236 err = xe_pt_prepare_bind(tile, vma, entries, &num_entries); 1236 1237 if (err) 1237 1238 goto err; 1239 + 1240 + err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); 1241 + if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) 1242 + err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); 1243 + if (err) 1244 + goto err; 1245 + 1238 1246 xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); 1239 1247 1240 1248 xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); ··· 1260 1254 * non-faulting LR, in particular on user-space batch buffer chaining, 1261 1255 * it needs to be done here. 1262 1256 */ 1263 - if ((rebind && !xe_vm_in_lr_mode(vm) && !vm->batch_invalidate_tlb) || 1264 - (!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) { 1257 + if ((!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) { 1265 1258 ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); 1266 1259 if (!ifence) 1267 1260 return ERR_PTR(-ENOMEM); 1261 + } else if (rebind && !xe_vm_in_lr_mode(vm)) { 1262 + /* We bump also if batch_invalidate_tlb is true */ 1263 + vm->tlb_flush_seqno++; 1268 1264 } 1269 1265 1270 1266 rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); ··· 1305 1297 } 1306 1298 1307 1299 /* add shared fence now for pagetable delayed destroy */ 1308 - dma_resv_add_fence(xe_vm_resv(vm), fence, !rebind && 1300 + dma_resv_add_fence(xe_vm_resv(vm), fence, rebind || 1309 1301 last_munmap_rebind ? 1310 1302 DMA_RESV_USAGE_KERNEL : 1311 1303 DMA_RESV_USAGE_BOOKKEEP); ··· 1584 1576 struct dma_fence *fence = NULL; 1585 1577 struct invalidation_fence *ifence; 1586 1578 struct xe_range_fence *rfence; 1579 + int err; 1587 1580 1588 1581 LLIST_HEAD(deferred); 1589 1582 ··· 1601 1592 xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); 1602 1593 xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries, 1603 1594 num_entries); 1595 + 1596 + err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); 1597 + if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) 1598 + err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); 1599 + if (err) 1600 + return ERR_PTR(err); 1604 1601 1605 1602 ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); 1606 1603 if (!ifence)
+4 -7
drivers/gpu/drm/xe/xe_ring_ops.c
··· 219 219 { 220 220 u32 dw[MAX_JOB_SIZE_DW], i = 0; 221 221 u32 ppgtt_flag = get_ppgtt_flag(job); 222 - struct xe_vm *vm = job->q->vm; 223 222 struct xe_gt *gt = job->q->gt; 224 223 225 - if (vm && vm->batch_invalidate_tlb) { 224 + if (job->ring_ops_flush_tlb) { 226 225 dw[i++] = preparser_disable(true); 227 226 i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), 228 227 seqno, true, dw, i); ··· 269 270 struct xe_gt *gt = job->q->gt; 270 271 struct xe_device *xe = gt_to_xe(gt); 271 272 bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE; 272 - struct xe_vm *vm = job->q->vm; 273 273 274 274 dw[i++] = preparser_disable(true); 275 275 ··· 280 282 i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i); 281 283 } 282 284 283 - if (vm && vm->batch_invalidate_tlb) 285 + if (job->ring_ops_flush_tlb) 284 286 i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), 285 287 seqno, true, dw, i); 286 288 287 289 dw[i++] = preparser_disable(false); 288 290 289 - if (!vm || !vm->batch_invalidate_tlb) 291 + if (!job->ring_ops_flush_tlb) 290 292 i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), 291 293 seqno, dw, i); 292 294 ··· 315 317 struct xe_gt *gt = job->q->gt; 316 318 struct xe_device *xe = gt_to_xe(gt); 317 319 bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); 318 - struct xe_vm *vm = job->q->vm; 319 320 u32 mask_flags = 0; 320 321 321 322 dw[i++] = preparser_disable(true); ··· 324 327 mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS; 325 328 326 329 /* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */ 327 - i = emit_pipe_invalidate(mask_flags, vm && vm->batch_invalidate_tlb, dw, i); 330 + i = emit_pipe_invalidate(mask_flags, job->ring_ops_flush_tlb, dw, i); 328 331 329 332 /* hsdes: 1809175790 */ 330 333 if (has_aux_ccs(xe))
+10
drivers/gpu/drm/xe/xe_sched_job.c
··· 250 250 251 251 void xe_sched_job_arm(struct xe_sched_job *job) 252 252 { 253 + struct xe_exec_queue *q = job->q; 254 + struct xe_vm *vm = q->vm; 255 + 256 + if (vm && !xe_sched_job_is_migration(q) && !xe_vm_in_lr_mode(vm) && 257 + (vm->batch_invalidate_tlb || vm->tlb_flush_seqno != q->tlb_flush_seqno)) { 258 + xe_vm_assert_held(vm); 259 + q->tlb_flush_seqno = vm->tlb_flush_seqno; 260 + job->ring_ops_flush_tlb = true; 261 + } 262 + 253 263 drm_sched_job_arm(&job->drm); 254 264 } 255 265
+2
drivers/gpu/drm/xe/xe_sched_job_types.h
··· 39 39 } user_fence; 40 40 /** @migrate_flush_flags: Additional flush flags for migration jobs */ 41 41 u32 migrate_flush_flags; 42 + /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */ 43 + bool ring_ops_flush_tlb; 42 44 /** @batch_addr: batch buffer address of job */ 43 45 u64 batch_addr[]; 44 46 };
+67 -43
drivers/gpu/drm/xe/xe_vm.c
··· 482 482 return 0; 483 483 } 484 484 485 + /** 486 + * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas 487 + * @vm: The vm for which we are rebinding. 488 + * @exec: The struct drm_exec with the locked GEM objects. 489 + * @num_fences: The number of fences to reserve for the operation, not 490 + * including rebinds and validations. 491 + * 492 + * Validates all evicted gem objects and rebinds their vmas. Note that 493 + * rebindings may cause evictions and hence the validation-rebind 494 + * sequence is rerun until there are no more objects to validate. 495 + * 496 + * Return: 0 on success, negative error code on error. In particular, 497 + * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if 498 + * the drm_exec transaction needs to be restarted. 499 + */ 500 + int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 501 + unsigned int num_fences) 502 + { 503 + struct drm_gem_object *obj; 504 + unsigned long index; 505 + int ret; 506 + 507 + do { 508 + ret = drm_gpuvm_validate(&vm->gpuvm, exec); 509 + if (ret) 510 + return ret; 511 + 512 + ret = xe_vm_rebind(vm, false); 513 + if (ret) 514 + return ret; 515 + } while (!list_empty(&vm->gpuvm.evict.list)); 516 + 517 + drm_exec_for_each_locked_object(exec, index, obj) { 518 + ret = dma_resv_reserve_fences(obj->resv, num_fences); 519 + if (ret) 520 + return ret; 521 + } 522 + 523 + return 0; 524 + } 525 + 485 526 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, 486 527 bool *done) 487 528 { 488 529 int err; 489 530 490 - /* 491 - * 1 fence for each preempt fence plus a fence for each tile from a 492 - * possible rebind 493 - */ 494 - err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues + 495 - vm->xe->info.tile_count); 531 + err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); 496 532 if (err) 497 533 return err; 498 534 ··· 543 507 return 0; 544 508 } 545 509 546 - err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues); 510 + err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); 547 511 if (err) 548 512 return err; 549 513 ··· 551 515 if (err) 552 516 return err; 553 517 554 - return drm_gpuvm_validate(&vm->gpuvm, exec); 518 + /* 519 + * Add validation and rebinding to the locking loop since both can 520 + * cause evictions which may require blocing dma_resv locks. 521 + * The fence reservation here is intended for the new preempt fences 522 + * we attach at the end of the rebind work. 523 + */ 524 + return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); 555 525 } 556 526 557 527 static void preempt_rebind_work_func(struct work_struct *w) 558 528 { 559 529 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 560 530 struct drm_exec exec; 561 - struct dma_fence *rebind_fence; 562 531 unsigned int fence_count = 0; 563 532 LIST_HEAD(preempt_fences); 564 533 ktime_t end = 0; ··· 609 568 if (err) 610 569 goto out_unlock; 611 570 612 - rebind_fence = xe_vm_rebind(vm, true); 613 - if (IS_ERR(rebind_fence)) { 614 - err = PTR_ERR(rebind_fence); 571 + err = xe_vm_rebind(vm, true); 572 + if (err) 615 573 goto out_unlock; 616 - } 617 574 618 - if (rebind_fence) { 619 - dma_fence_wait(rebind_fence, false); 620 - dma_fence_put(rebind_fence); 621 - } 622 - 623 - /* Wait on munmap style VM unbinds */ 575 + /* Wait on rebinds and munmap style VM unbinds */ 624 576 wait = dma_resv_wait_timeout(xe_vm_resv(vm), 625 577 DMA_RESV_USAGE_KERNEL, 626 578 false, MAX_SCHEDULE_TIMEOUT); ··· 807 773 struct xe_sync_entry *syncs, u32 num_syncs, 808 774 bool first_op, bool last_op); 809 775 810 - struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 776 + int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 811 777 { 812 - struct dma_fence *fence = NULL; 778 + struct dma_fence *fence; 813 779 struct xe_vma *vma, *next; 814 780 815 781 lockdep_assert_held(&vm->lock); 816 782 if (xe_vm_in_lr_mode(vm) && !rebind_worker) 817 - return NULL; 783 + return 0; 818 784 819 785 xe_vm_assert_held(vm); 820 786 list_for_each_entry_safe(vma, next, &vm->rebind_list, ··· 822 788 xe_assert(vm->xe, vma->tile_present); 823 789 824 790 list_del_init(&vma->combined_links.rebind); 825 - dma_fence_put(fence); 826 791 if (rebind_worker) 827 792 trace_xe_vma_rebind_worker(vma); 828 793 else 829 794 trace_xe_vma_rebind_exec(vma); 830 795 fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false); 831 796 if (IS_ERR(fence)) 832 - return fence; 797 + return PTR_ERR(fence); 798 + dma_fence_put(fence); 833 799 } 834 800 835 - return fence; 801 + return 0; 836 802 } 837 803 838 804 static void xe_vma_free(struct xe_vma *vma) ··· 1038 1004 } 1039 1005 1040 1006 /** 1041 - * xe_vm_prepare_vma() - drm_exec utility to lock a vma 1007 + * xe_vm_lock_vma() - drm_exec utility to lock a vma 1042 1008 * @exec: The drm_exec object we're currently locking for. 1043 1009 * @vma: The vma for witch we want to lock the vm resv and any attached 1044 1010 * object's resv. 1045 - * @num_shared: The number of dma-fence slots to pre-allocate in the 1046 - * objects' reservation objects. 1047 1011 * 1048 1012 * Return: 0 on success, negative error code on error. In particular 1049 1013 * may return -EDEADLK on WW transaction contention and -EINTR if 1050 1014 * an interruptible wait is terminated by a signal. 1051 1015 */ 1052 - int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, 1053 - unsigned int num_shared) 1016 + int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) 1054 1017 { 1055 1018 struct xe_vm *vm = xe_vma_vm(vma); 1056 1019 struct xe_bo *bo = xe_vma_bo(vma); 1057 1020 int err; 1058 1021 1059 1022 XE_WARN_ON(!vm); 1060 - if (num_shared) 1061 - err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared); 1062 - else 1063 - err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1064 - if (!err && bo && !bo->vm) { 1065 - if (num_shared) 1066 - err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared); 1067 - else 1068 - err = drm_exec_lock_obj(exec, &bo->ttm.base); 1069 - } 1023 + 1024 + err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); 1025 + if (!err && bo && !bo->vm) 1026 + err = drm_exec_lock_obj(exec, &bo->ttm.base); 1070 1027 1071 1028 return err; 1072 1029 } ··· 1069 1044 1070 1045 drm_exec_init(&exec, 0, 0); 1071 1046 drm_exec_until_all_locked(&exec) { 1072 - err = xe_vm_prepare_vma(&exec, vma, 0); 1047 + err = xe_vm_lock_vma(&exec, vma); 1073 1048 drm_exec_retry_on_contention(&exec); 1074 1049 if (XE_WARN_ON(err)) 1075 1050 break; ··· 1614 1589 XE_WARN_ON(vm->pt_root[id]); 1615 1590 1616 1591 trace_xe_vm_free(vm); 1617 - dma_fence_put(vm->rebind_fence); 1618 1592 kfree(vm); 1619 1593 } 1620 1594 ··· 2536 2512 2537 2513 lockdep_assert_held_write(&vm->lock); 2538 2514 2539 - err = xe_vm_prepare_vma(exec, vma, 1); 2515 + err = xe_vm_lock_vma(exec, vma); 2540 2516 if (err) 2541 2517 return err; 2542 2518
+5 -3
drivers/gpu/drm/xe/xe_vm.h
··· 207 207 208 208 int xe_vm_userptr_check_repin(struct xe_vm *vm); 209 209 210 - struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); 210 + int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); 211 211 212 212 int xe_vm_invalidate_vma(struct xe_vma *vma); 213 213 ··· 242 242 243 243 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id); 244 244 245 - int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, 246 - unsigned int num_shared); 245 + int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma); 246 + 247 + int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, 248 + unsigned int num_fences); 247 249 248 250 /** 249 251 * xe_vm_resv() - Return's the vm's reservation object
+5 -3
drivers/gpu/drm/xe/xe_vm_types.h
··· 177 177 */ 178 178 struct list_head rebind_list; 179 179 180 - /** @rebind_fence: rebind fence from execbuf */ 181 - struct dma_fence *rebind_fence; 182 - 183 180 /** 184 181 * @destroy_work: worker to destroy VM, needed as a dma_fence signaling 185 182 * from an irq context can be last put and the destroy needs to be able ··· 261 264 bool capture_once; 262 265 } error_capture; 263 266 267 + /** 268 + * @tlb_flush_seqno: Required TLB flush seqno for the next exec. 269 + * protected by the vm resv. 270 + */ 271 + u64 tlb_flush_seqno; 264 272 /** @batch_invalidate_tlb: Always invalidate TLB before batch start */ 265 273 bool batch_invalidate_tlb; 266 274 /** @xef: XE file handle for tracking this VM's drm client */