Merge tag 'drm-fixes-2023-08-04' of git://anongit.freedesktop.org/drm/drm

tjh.dev / kernel

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Merge tag 'drm-fixes-2023-08-04' of git://anongit.freedesktop.org/drm/drm

Pull drm fixes from Dave Airlie:
"Small set of fixes this week, i915 and a few misc ones. I didn't see
an amd pull so maybe next week it'll have a few more on that driver.

ttm:
- NULL ptr deref fix

panel:
- add missing MODULE_DEVICE_TABLE

imx/ipuv3:
- timing fix

i915:
- Fix bug in getting msg length in AUX CH registers handler
- Gen12 AUX invalidation fixes
- Fix premature release of request's reusable memory"

* tag 'drm-fixes-2023-08-04' of git://anongit.freedesktop.org/drm/drm:
drm/panel: samsung-s6d7aa0: Add MODULE_DEVICE_TABLE
drm/i915: Fix premature release of request's reusable memory
drm/i915/gt: Support aux invalidation on all engines
drm/i915/gt: Poll aux invalidation register bit on invalidation
drm/i915/gt: Enable the CCS_FLUSH bit in the pipe control and in the CS
drm/i915/gt: Rename flags with bit_group_X according to the datasheet
drm/i915/gt: Ensure memory quiesced before invalidation
drm/i915: Add the gen12_needs_ccs_aux_inv helper
drm/i915/gt: Cleanup aux invalidation registers
drm/i915/gvt: Fix bug in getting msg length in AUX CH registers handler
drm/imx/ipuv3: Fix front porch adjustment upon hactive aligning
drm/ttm: check null pointer before accessing when swapping

Linus Torvalds 2 years ago 4142fc67 4593f3c2

+203 -111

11 changed files

expand all collapse all

drivers

gpu

drm

i915

gen8_engine_cs.c

gen8_engine_cs.h

intel_gpu_commands.h

intel_gt_regs.h

intel_lrc.c

gvt

edid.c

i915_active.c

i915_request.c

imx

ipuv3

ipuv3-crtc.c

panel

panel-samsung-s6d7aa0.c

ttm

ttm_bo.c

+92 -48

drivers/gpu/drm/i915/gt/gen8_engine_cs.c

reviewed

··· 165 165 return MI_ARB_CHECK | 1 << 8 | state; 166 166 } 167 167 168 168 - u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg) 168 168 + static i915_reg_t gen12_get_aux_inv_reg(struct intel_engine_cs *engine) 169 169 { 170 170 - u32 gsi_offset = gt->uncore->gsi_offset; 170 170 + switch (engine->id) { 171 171 + case RCS0: 172 172 + return GEN12_CCS_AUX_INV; 173 173 + case BCS0: 174 174 + return GEN12_BCS0_AUX_INV; 175 175 + case VCS0: 176 176 + return GEN12_VD0_AUX_INV; 177 177 + case VCS2: 178 178 + return GEN12_VD2_AUX_INV; 179 179 + case VECS0: 180 180 + return GEN12_VE0_AUX_INV; 181 181 + case CCS0: 182 182 + return GEN12_CCS0_AUX_INV; 183 183 + default: 184 184 + return INVALID_MMIO_REG; 185 185 + } 186 186 + } 187 187 + 188 188 + static bool gen12_needs_ccs_aux_inv(struct intel_engine_cs *engine) 189 189 + { 190 190 + i915_reg_t reg = gen12_get_aux_inv_reg(engine); 191 191 + 192 192 + if (IS_PONTEVECCHIO(engine->i915)) 193 193 + return false; 194 194 + 195 195 + /* 196 196 + * So far platforms supported by i915 having flat ccs do not require 197 197 + * AUX invalidation. Check also whether the engine requires it. 198 198 + */ 199 199 + return i915_mmio_reg_valid(reg) && !HAS_FLAT_CCS(engine->i915); 200 200 + } 201 201 + 202 202 + u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs) 203 203 + { 204 204 + i915_reg_t inv_reg = gen12_get_aux_inv_reg(engine); 205 205 + u32 gsi_offset = engine->gt->uncore->gsi_offset; 206 206 + 207 207 + if (!gen12_needs_ccs_aux_inv(engine)) 208 208 + return cs; 171 209 172 210 *cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN; 173 211 *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset; 174 212 *cs++ = AUX_INV; 175 175 - *cs++ = MI_NOOP; 213 213 + 214 214 + *cs++ = MI_SEMAPHORE_WAIT_TOKEN | 215 215 + MI_SEMAPHORE_REGISTER_POLL | 216 216 + MI_SEMAPHORE_POLL | 217 217 + MI_SEMAPHORE_SAD_EQ_SDD; 218 218 + *cs++ = 0; 219 219 + *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset; 220 220 + *cs++ = 0; 221 221 + *cs++ = 0; 176 222 177 223 return cs; 178 224 } ··· 248 202 { 249 203 struct intel_engine_cs *engine = rq->engine; 250 204 251 251 - if (mode & EMIT_FLUSH) { 252 252 - u32 flags = 0; 205 205 + /* 206 206 + * On Aux CCS platforms the invalidation of the Aux 207 207 + * table requires quiescing memory traffic beforehand 208 208 + */ 209 209 + if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) { 210 210 + u32 bit_group_0 = 0; 211 211 + u32 bit_group_1 = 0; 253 212 int err; 254 213 u32 *cs; 255 214 ··· 262 211 if (err) 263 212 return err; 264 213 265 265 - flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; 266 266 - flags |= PIPE_CONTROL_FLUSH_L3; 267 267 - flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 268 268 - flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 214 214 + bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH; 215 215 + 216 216 + /* 217 217 + * When required, in MTL and beyond platforms we 218 218 + * need to set the CCS_FLUSH bit in the pipe control 219 219 + */ 220 220 + if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70)) 221 221 + bit_group_0 |= PIPE_CONTROL_CCS_FLUSH; 222 222 + 223 223 + bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH; 224 224 + bit_group_1 |= PIPE_CONTROL_FLUSH_L3; 225 225 + bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 226 226 + bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 269 227 /* Wa_1409600907:tgl,adl-p */ 270 270 - flags |= PIPE_CONTROL_DEPTH_STALL; 271 271 - flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; 272 272 - flags |= PIPE_CONTROL_FLUSH_ENABLE; 228 228 + bit_group_1 |= PIPE_CONTROL_DEPTH_STALL; 229 229 + bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE; 230 230 + bit_group_1 |= PIPE_CONTROL_FLUSH_ENABLE; 273 231 274 274 - flags |= PIPE_CONTROL_STORE_DATA_INDEX; 275 275 - flags |= PIPE_CONTROL_QW_WRITE; 232 232 + bit_group_1 |= PIPE_CONTROL_STORE_DATA_INDEX; 233 233 + bit_group_1 |= PIPE_CONTROL_QW_WRITE; 276 234 277 277 - flags |= PIPE_CONTROL_CS_STALL; 235 235 + bit_group_1 |= PIPE_CONTROL_CS_STALL; 278 236 279 237 if (!HAS_3D_PIPELINE(engine->i915)) 280 280 - flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS; 238 238 + bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS; 281 239 else if (engine->class == COMPUTE_CLASS) 282 282 - flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; 240 240 + bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; 283 241 284 242 cs = intel_ring_begin(rq, 6); 285 243 if (IS_ERR(cs)) 286 244 return PTR_ERR(cs); 287 245 288 288 - cs = gen12_emit_pipe_control(cs, 289 289 - PIPE_CONTROL0_HDC_PIPELINE_FLUSH, 290 290 - flags, LRC_PPHWSP_SCRATCH_ADDR); 246 246 + cs = gen12_emit_pipe_control(cs, bit_group_0, bit_group_1, 247 247 + LRC_PPHWSP_SCRATCH_ADDR); 291 248 intel_ring_advance(rq, cs); 292 249 } 293 250 ··· 326 267 else if (engine->class == COMPUTE_CLASS) 327 268 flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; 328 269 329 329 - if (!HAS_FLAT_CCS(rq->engine->i915)) 330 330 - count = 8 + 4; 331 331 - else 332 332 - count = 8; 270 270 + count = 8; 271 271 + if (gen12_needs_ccs_aux_inv(rq->engine)) 272 272 + count += 8; 333 273 334 274 cs = intel_ring_begin(rq, count); 335 275 if (IS_ERR(cs)) ··· 343 285 344 286 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); 345 287 346 346 - if (!HAS_FLAT_CCS(rq->engine->i915)) { 347 347 - /* hsdes: 1809175790 */ 348 348 - cs = gen12_emit_aux_table_inv(rq->engine->gt, 349 349 - cs, GEN12_GFX_CCS_AUX_NV); 350 350 - } 288 288 + cs = gen12_emit_aux_table_inv(engine, cs); 351 289 352 290 *cs++ = preparser_disable(false); 353 291 intel_ring_advance(rq, cs); ··· 354 300 355 301 int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) 356 302 { 357 357 - intel_engine_mask_t aux_inv = 0; 358 358 - u32 cmd, *cs; 303 303 + u32 cmd = 4; 304 304 + u32 *cs; 359 305 360 360 - cmd = 4; 361 306 if (mode & EMIT_INVALIDATE) { 362 307 cmd += 2; 363 308 364 364 - if (!HAS_FLAT_CCS(rq->engine->i915) && 365 365 - (rq->engine->class == VIDEO_DECODE_CLASS || 366 366 - rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) { 367 367 - aux_inv = rq->engine->mask & 368 368 - ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0); 369 369 - if (aux_inv) 370 370 - cmd += 4; 371 371 - } 309 309 + if (gen12_needs_ccs_aux_inv(rq->engine)) 310 310 + cmd += 8; 372 311 } 373 312 374 313 cs = intel_ring_begin(rq, cmd); ··· 385 338 cmd |= MI_INVALIDATE_TLB; 386 339 if (rq->engine->class == VIDEO_DECODE_CLASS) 387 340 cmd |= MI_INVALIDATE_BSD; 341 341 + 342 342 + if (gen12_needs_ccs_aux_inv(rq->engine) && 343 343 + rq->engine->class == COPY_ENGINE_CLASS) 344 344 + cmd |= MI_FLUSH_DW_CCS; 388 345 } 389 346 390 347 *cs++ = cmd; ··· 396 345 *cs++ = 0; /* upper addr */ 397 346 *cs++ = 0; /* value */ 398 347 399 399 - if (aux_inv) { /* hsdes: 1809175790 */ 400 400 - if (rq->engine->class == VIDEO_DECODE_CLASS) 401 401 - cs = gen12_emit_aux_table_inv(rq->engine->gt, 402 402 - cs, GEN12_VD0_AUX_NV); 403 403 - else 404 404 - cs = gen12_emit_aux_table_inv(rq->engine->gt, 405 405 - cs, GEN12_VE0_AUX_NV); 406 406 - } 348 348 + cs = gen12_emit_aux_table_inv(rq->engine, cs); 407 349 408 350 if (mode & EMIT_INVALIDATE) 409 351 *cs++ = preparser_disable(false);

+13 -8

drivers/gpu/drm/i915/gt/gen8_engine_cs.h

reviewed

··· 13 13 #include "intel_gt_regs.h" 14 14 #include "intel_gpu_commands.h" 15 15 16 16 + struct intel_engine_cs; 16 17 struct intel_gt; 17 18 struct i915_request; 18 19 ··· 47 46 u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); 48 47 u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); 49 48 50 50 - u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg); 49 49 + u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs); 51 50 52 51 static inline u32 * 53 53 - __gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) 52 52 + __gen8_emit_pipe_control(u32 *batch, u32 bit_group_0, 53 53 + u32 bit_group_1, u32 offset) 54 54 { 55 55 memset(batch, 0, 6 * sizeof(u32)); 56 56 57 57 - batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0; 58 58 - batch[1] = flags1; 57 57 + batch[0] = GFX_OP_PIPE_CONTROL(6) | bit_group_0; 58 58 + batch[1] = bit_group_1; 59 59 batch[2] = offset; 60 60 61 61 return batch + 6; 62 62 } 63 63 64 64 - static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) 64 64 + static inline u32 *gen8_emit_pipe_control(u32 *batch, 65 65 + u32 bit_group_1, u32 offset) 65 66 { 66 66 - return __gen8_emit_pipe_control(batch, 0, flags, offset); 67 67 + return __gen8_emit_pipe_control(batch, 0, bit_group_1, offset); 67 68 } 68 69 69 69 - static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) 70 70 + static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 bit_group_0, 71 71 + u32 bit_group_1, u32 offset) 70 72 { 71 71 - return __gen8_emit_pipe_control(batch, flags0, flags1, offset); 73 73 + return __gen8_emit_pipe_control(batch, bit_group_0, 74 74 + bit_group_1, offset); 72 75 } 73 76 74 77 static inline u32 *

drivers/gpu/drm/i915/gt/intel_gpu_commands.h

reviewed

··· 121 121 #define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) 122 122 #define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ 123 123 #define MI_SEMAPHORE_WAIT_TOKEN MI_INSTR(0x1c, 3) /* GEN12+ */ 124 124 + #define MI_SEMAPHORE_REGISTER_POLL (1 << 16) 124 125 #define MI_SEMAPHORE_POLL (1 << 15) 125 126 #define MI_SEMAPHORE_SAD_GT_SDD (0 << 12) 126 127 #define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12) ··· 300 299 #define PIPE_CONTROL_QW_WRITE (1<<14) 301 300 #define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14) 302 301 #define PIPE_CONTROL_DEPTH_STALL (1<<13) 302 302 + #define PIPE_CONTROL_CCS_FLUSH (1<<13) /* MTL+ */ 303 303 #define PIPE_CONTROL_WRITE_FLUSH (1<<12) 304 304 #define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */ 305 305 #define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */

+8 -8

drivers/gpu/drm/i915/gt/intel_gt_regs.h

reviewed

··· 332 332 #define GEN8_PRIVATE_PAT_HI _MMIO(0x40e0 + 4) 333 333 #define GEN10_PAT_INDEX(index) _MMIO(0x40e0 + (index) * 4) 334 334 #define BSD_HWS_PGA_GEN7 _MMIO(0x4180) 335 335 - #define GEN12_GFX_CCS_AUX_NV _MMIO(0x4208) 336 336 - #define GEN12_VD0_AUX_NV _MMIO(0x4218) 337 337 - #define GEN12_VD1_AUX_NV _MMIO(0x4228) 335 335 + 336 336 + #define GEN12_CCS_AUX_INV _MMIO(0x4208) 337 337 + #define GEN12_VD0_AUX_INV _MMIO(0x4218) 338 338 + #define GEN12_VE0_AUX_INV _MMIO(0x4238) 339 339 + #define GEN12_BCS0_AUX_INV _MMIO(0x4248) 338 340 339 341 #define GEN8_RTCR _MMIO(0x4260) 340 342 #define GEN8_M1TCR _MMIO(0x4264) ··· 344 342 #define GEN8_BTCR _MMIO(0x426c) 345 343 #define GEN8_VTCR _MMIO(0x4270) 346 344 347 347 - #define GEN12_VD2_AUX_NV _MMIO(0x4298) 348 348 - #define GEN12_VD3_AUX_NV _MMIO(0x42a8) 349 349 - #define GEN12_VE0_AUX_NV _MMIO(0x4238) 350 350 - 351 345 #define BLT_HWS_PGA_GEN7 _MMIO(0x4280) 352 346 353 353 - #define GEN12_VE1_AUX_NV _MMIO(0x42b8) 347 347 + #define GEN12_VD2_AUX_INV _MMIO(0x4298) 348 348 + #define GEN12_CCS0_AUX_INV _MMIO(0x42c8) 354 349 #define AUX_INV REG_BIT(0) 350 350 + 355 351 #define VEBOX_HWS_PGA_GEN7 _MMIO(0x4380) 356 352 357 353 #define GEN12_AUX_ERR_DBG _MMIO(0x43f4)

+2 -15

drivers/gpu/drm/i915/gt/intel_lrc.c

reviewed

··· 1364 1364 IS_DG2_G11(ce->engine->i915)) 1365 1365 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0); 1366 1366 1367 1367 - /* hsdes: 1809175790 */ 1368 1368 - if (!HAS_FLAT_CCS(ce->engine->i915)) 1369 1369 - cs = gen12_emit_aux_table_inv(ce->engine->gt, 1370 1370 - cs, GEN12_GFX_CCS_AUX_NV); 1367 1367 + cs = gen12_emit_aux_table_inv(ce->engine, cs); 1371 1368 1372 1369 /* Wa_16014892111 */ 1373 1370 if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) || ··· 1389 1392 PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 1390 1393 0); 1391 1394 1392 1392 - /* hsdes: 1809175790 */ 1393 1393 - if (!HAS_FLAT_CCS(ce->engine->i915)) { 1394 1394 - if (ce->engine->class == VIDEO_DECODE_CLASS) 1395 1395 - cs = gen12_emit_aux_table_inv(ce->engine->gt, 1396 1396 - cs, GEN12_VD0_AUX_NV); 1397 1397 - else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS) 1398 1398 - cs = gen12_emit_aux_table_inv(ce->engine->gt, 1399 1399 - cs, GEN12_VE0_AUX_NV); 1400 1400 - } 1401 1401 - 1402 1402 - return cs; 1395 1395 + return gen12_emit_aux_table_inv(ce->engine, cs); 1403 1396 } 1404 1397 1405 1398 static void

+1 -1

drivers/gpu/drm/i915/gvt/edid.c

reviewed

··· 491 491 return; 492 492 } 493 493 494 494 - msg_length = REG_FIELD_GET(DP_AUX_CH_CTL_MESSAGE_SIZE_MASK, reg); 494 494 + msg_length = REG_FIELD_GET(DP_AUX_CH_CTL_MESSAGE_SIZE_MASK, value); 495 495 496 496 // check the msg in DATA register. 497 497 msg = vgpu_vreg(vgpu, offset + 4);

+70 -29

drivers/gpu/drm/i915/i915_active.c

reviewed

··· 449 449 } 450 450 } while (unlikely(is_barrier(active))); 451 451 452 452 - if (!__i915_active_fence_set(active, fence)) 452 452 + fence = __i915_active_fence_set(active, fence); 453 453 + if (!fence) 453 454 __i915_active_acquire(ref); 455 455 + else 456 456 + dma_fence_put(fence); 454 457 455 458 out: 456 459 i915_active_release(ref); ··· 472 469 return NULL; 473 470 } 474 471 475 475 - rcu_read_lock(); 476 472 prev = __i915_active_fence_set(active, fence); 477 477 - if (prev) 478 478 - prev = dma_fence_get_rcu(prev); 479 479 - else 473 473 + if (!prev) 480 474 __i915_active_acquire(ref); 481 481 - rcu_read_unlock(); 482 475 483 476 return prev; 484 477 } ··· 1018 1019 * 1019 1020 * Records the new @fence as the last active fence along its timeline in 1020 1021 * this active tracker, moving the tracking callbacks from the previous 1021 1021 - * fence onto this one. Returns the previous fence (if not already completed), 1022 1022 - * which the caller must ensure is executed before the new fence. To ensure 1023 1023 - * that the order of fences within the timeline of the i915_active_fence is 1024 1024 - * understood, it should be locked by the caller. 1022 1022 + * fence onto this one. Gets and returns a reference to the previous fence 1023 1023 + * (if not already completed), which the caller must put after making sure 1024 1024 + * that it is executed before the new fence. To ensure that the order of 1025 1025 + * fences within the timeline of the i915_active_fence is understood, it 1026 1026 + * should be locked by the caller. 1025 1027 */ 1026 1028 struct dma_fence * 1027 1029 __i915_active_fence_set(struct i915_active_fence *active, ··· 1031 1031 struct dma_fence *prev; 1032 1032 unsigned long flags; 1033 1033 1034 1034 - if (fence == rcu_access_pointer(active->fence)) 1034 1034 + /* 1035 1035 + * In case of fences embedded in i915_requests, their memory is 1036 1036 + * SLAB_FAILSAFE_BY_RCU, then it can be reused right after release 1037 1037 + * by new requests. Then, there is a risk of passing back a pointer 1038 1038 + * to a new, completely unrelated fence that reuses the same memory 1039 1039 + * while tracked under a different active tracker. Combined with i915 1040 1040 + * perf open/close operations that build await dependencies between 1041 1041 + * engine kernel context requests and user requests from different 1042 1042 + * timelines, this can lead to dependency loops and infinite waits. 1043 1043 + * 1044 1044 + * As a countermeasure, we try to get a reference to the active->fence 1045 1045 + * first, so if we succeed and pass it back to our user then it is not 1046 1046 + * released and potentially reused by an unrelated request before the 1047 1047 + * user has a chance to set up an await dependency on it. 1048 1048 + */ 1049 1049 + prev = i915_active_fence_get(active); 1050 1050 + if (fence == prev) 1035 1051 return fence; 1036 1052 1037 1053 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)); ··· 1056 1040 * Consider that we have two threads arriving (A and B), with 1057 1041 * C already resident as the active->fence. 1058 1042 * 1059 1059 - * A does the xchg first, and so it sees C or NULL depending 1060 1060 - * on the timing of the interrupt handler. If it is NULL, the 1061 1061 - * previous fence must have been signaled and we know that 1062 1062 - * we are first on the timeline. If it is still present, 1063 1063 - * we acquire the lock on that fence and serialise with the interrupt 1064 1064 - * handler, in the process removing it from any future interrupt 1065 1065 - * callback. A will then wait on C before executing (if present). 1066 1066 - * 1067 1067 - * As B is second, it sees A as the previous fence and so waits for 1068 1068 - * it to complete its transition and takes over the occupancy for 1069 1069 - * itself -- remembering that it needs to wait on A before executing. 1043 1043 + * Both A and B have got a reference to C or NULL, depending on the 1044 1044 + * timing of the interrupt handler. Let's assume that if A has got C 1045 1045 + * then it has locked C first (before B). 1070 1046 * 1071 1047 * Note the strong ordering of the timeline also provides consistent 1072 1048 * nesting rules for the fence->lock; the inner lock is always the 1073 1049 * older lock. 1074 1050 */ 1075 1051 spin_lock_irqsave(fence->lock, flags); 1076 1076 - prev = xchg(__active_fence_slot(active), fence); 1077 1077 - if (prev) { 1078 1078 - GEM_BUG_ON(prev == fence); 1052 1052 + if (prev) 1079 1053 spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); 1054 1054 + 1055 1055 + /* 1056 1056 + * A does the cmpxchg first, and so it sees C or NULL, as before, or 1057 1057 + * something else, depending on the timing of other threads and/or 1058 1058 + * interrupt handler. If not the same as before then A unlocks C if 1059 1059 + * applicable and retries, starting from an attempt to get a new 1060 1060 + * active->fence. Meanwhile, B follows the same path as A. 1061 1061 + * Once A succeeds with cmpxch, B fails again, retires, gets A from 1062 1062 + * active->fence, locks it as soon as A completes, and possibly 1063 1063 + * succeeds with cmpxchg. 1064 1064 + */ 1065 1065 + while (cmpxchg(__active_fence_slot(active), prev, fence) != prev) { 1066 1066 + if (prev) { 1067 1067 + spin_unlock(prev->lock); 1068 1068 + dma_fence_put(prev); 1069 1069 + } 1070 1070 + spin_unlock_irqrestore(fence->lock, flags); 1071 1071 + 1072 1072 + prev = i915_active_fence_get(active); 1073 1073 + GEM_BUG_ON(prev == fence); 1074 1074 + 1075 1075 + spin_lock_irqsave(fence->lock, flags); 1076 1076 + if (prev) 1077 1077 + spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); 1078 1078 + } 1079 1079 + 1080 1080 + /* 1081 1081 + * If prev is NULL then the previous fence must have been signaled 1082 1082 + * and we know that we are first on the timeline. If it is still 1083 1083 + * present then, having the lock on that fence already acquired, we 1084 1084 + * serialise with the interrupt handler, in the process of removing it 1085 1085 + * from any future interrupt callback. A will then wait on C before 1086 1086 + * executing (if present). 1087 1087 + * 1088 1088 + * As B is second, it sees A as the previous fence and so waits for 1089 1089 + * it to complete its transition and takes over the occupancy for 1090 1090 + * itself -- remembering that it needs to wait on A before executing. 1091 1091 + */ 1092 1092 + if (prev) { 1080 1093 __list_del_entry(&active->cb.node); 1081 1094 spin_unlock(prev->lock); /* serialise with prev->cb_list */ 1082 1095 } ··· 1122 1077 int err = 0; 1123 1078 1124 1079 /* Must maintain timeline ordering wrt previous active requests */ 1125 1125 - rcu_read_lock(); 1126 1080 fence = __i915_active_fence_set(active, &rq->fence); 1127 1127 - if (fence) /* but the previous fence may not belong to that timeline! */ 1128 1128 - fence = dma_fence_get_rcu(fence); 1129 1129 - rcu_read_unlock(); 1130 1081 if (fence) { 1131 1082 err = i915_request_await_dma_fence(rq, fence); 1132 1083 dma_fence_put(fence);

+11

drivers/gpu/drm/i915/i915_request.c

reviewed

··· 1661 1661 1662 1662 request_to_parent(rq)->parallel.last_rq = i915_request_get(rq); 1663 1663 1664 1664 + /* 1665 1665 + * Users have to put a reference potentially got by 1666 1666 + * __i915_active_fence_set() to the returned request 1667 1667 + * when no longer needed 1668 1668 + */ 1664 1669 return to_request(__i915_active_fence_set(&timeline->last_request, 1665 1670 &rq->fence)); 1666 1671 } ··· 1712 1707 0); 1713 1708 } 1714 1709 1710 1710 + /* 1711 1711 + * Users have to put the reference to prev potentially got 1712 1712 + * by __i915_active_fence_set() when no longer needed 1713 1713 + */ 1715 1714 return prev; 1716 1715 } 1717 1716 ··· 1769 1760 prev = __i915_request_ensure_ordering(rq, timeline); 1770 1761 else 1771 1762 prev = __i915_request_ensure_parallel_ordering(rq, timeline); 1763 1763 + if (prev) 1764 1764 + i915_request_put(prev); 1772 1765 1773 1766 /* 1774 1767 * Make sure that no request gazumped us - if it was allocated after

+1 -1

drivers/gpu/drm/imx/ipuv3/ipuv3-crtc.c

reviewed

··· 310 310 dev_warn(ipu_crtc->dev, "8-pixel align hactive %d -> %d\n", 311 311 sig_cfg.mode.hactive, new_hactive); 312 312 313 313 - sig_cfg.mode.hfront_porch = new_hactive - sig_cfg.mode.hactive; 313 313 + sig_cfg.mode.hfront_porch -= new_hactive - sig_cfg.mode.hactive; 314 314 sig_cfg.mode.hactive = new_hactive; 315 315 } 316 316

drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c

reviewed

··· 569 569 }, 570 570 { /* sentinel */ } 571 571 }; 572 572 + MODULE_DEVICE_TABLE(of, s6d7aa0_of_match); 572 573 573 574 static struct mipi_dsi_driver s6d7aa0_driver = { 574 575 .probe = s6d7aa0_probe,

+2 -1

drivers/gpu/drm/ttm/ttm_bo.c

reviewed

··· 519 519 520 520 if (bo->pin_count) { 521 521 *locked = false; 522 522 - *busy = false; 522 522 + if (busy) 523 523 + *busy = false; 523 524 return false; 524 525 } 525 526