Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'drm-xe-fixes-2026-02-26' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes

- W/a fix for multi-cast registers (Roper)
- Fix xe_sync initialization issues (Shuicheng)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patch.msgid.link/aaBGHy_0RLGGIBP5@intel.com

+81 -21
+6
drivers/gpu/drm/xe/regs/xe_engine_regs.h
··· 96 96 #define ENABLE_SEMAPHORE_POLL_BIT REG_BIT(13) 97 97 98 98 #define RING_CMD_CCTL(base) XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED) 99 + 100 + #define CS_MMIO_GROUP_INSTANCE_SELECT(base) XE_REG((base) + 0xcc) 101 + #define SELECTIVE_READ_ADDRESSING REG_BIT(30) 102 + #define SELECTIVE_READ_GROUP REG_GENMASK(29, 23) 103 + #define SELECTIVE_READ_INSTANCE REG_GENMASK(22, 16) 104 + 99 105 /* 100 106 * CMD_CCTL read/write fields take a MOCS value and _not_ a table index. 101 107 * The lsb of each can be considered a separate enabling bit for encryption.
+54 -12
drivers/gpu/drm/xe/xe_gt.c
··· 210 210 return ret; 211 211 } 212 212 213 + /* Dwords required to emit a RMW of a register */ 214 + #define EMIT_RMW_DW 20 215 + 213 216 static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) 214 217 { 215 - struct xe_reg_sr *sr = &q->hwe->reg_lrc; 218 + struct xe_hw_engine *hwe = q->hwe; 219 + struct xe_reg_sr *sr = &hwe->reg_lrc; 216 220 struct xe_reg_sr_entry *entry; 217 - int count_rmw = 0, count = 0, ret; 221 + int count_rmw = 0, count_rmw_mcr = 0, count = 0, ret; 218 222 unsigned long idx; 219 223 struct xe_bb *bb; 220 224 size_t bb_len = 0; ··· 228 224 xa_for_each(&sr->xa, idx, entry) { 229 225 if (entry->reg.masked || entry->clr_bits == ~0) 230 226 ++count; 227 + else if (entry->reg.mcr) 228 + ++count_rmw_mcr; 231 229 else 232 230 ++count_rmw; 233 231 } ··· 237 231 if (count) 238 232 bb_len += count * 2 + 1; 239 233 240 - if (count_rmw) 241 - bb_len += count_rmw * 20 + 7; 234 + /* 235 + * RMW of MCR registers is the same as a normal RMW, except an 236 + * additional LRI (3 dwords) is required per register to steer the read 237 + * to a nom-terminated instance. 238 + * 239 + * We could probably shorten the batch slightly by eliding the 240 + * steering for consecutive MCR registers that have the same 241 + * group/instance target, but it's not worth the extra complexity to do 242 + * so. 243 + */ 244 + bb_len += count_rmw * EMIT_RMW_DW; 245 + bb_len += count_rmw_mcr * (EMIT_RMW_DW + 3); 242 246 243 - if (q->hwe->class == XE_ENGINE_CLASS_RENDER) 247 + /* 248 + * After doing all RMW, we need 7 trailing dwords to clean up, 249 + * plus an additional 3 dwords to reset steering if any of the 250 + * registers were MCR. 251 + */ 252 + if (count_rmw || count_rmw_mcr) 253 + bb_len += 7 + (count_rmw_mcr ? 3 : 0); 254 + 255 + if (hwe->class == XE_ENGINE_CLASS_RENDER) 244 256 /* 245 257 * Big enough to emit all of the context's 3DSTATE via 246 258 * xe_lrc_emit_hwe_state_instructions() 247 259 */ 248 - bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32); 260 + bb_len += xe_gt_lrc_size(gt, hwe->class) / sizeof(u32); 249 261 250 - xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len); 262 + xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", hwe->name, bb_len); 251 263 252 264 bb = xe_bb_new(gt, bb_len, false); 253 265 if (IS_ERR(bb)) ··· 300 276 } 301 277 } 302 278 303 - if (count_rmw) { 304 - /* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */ 305 - 279 + if (count_rmw || count_rmw_mcr) { 306 280 xa_for_each(&sr->xa, idx, entry) { 307 281 if (entry->reg.masked || entry->clr_bits == ~0) 308 282 continue; 283 + 284 + if (entry->reg.mcr) { 285 + struct xe_reg_mcr reg = { .__reg.raw = entry->reg.raw }; 286 + u8 group, instance; 287 + 288 + xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance); 289 + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); 290 + *cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(hwe->mmio_base).addr; 291 + *cs++ = SELECTIVE_READ_ADDRESSING | 292 + REG_FIELD_PREP(SELECTIVE_READ_GROUP, group) | 293 + REG_FIELD_PREP(SELECTIVE_READ_INSTANCE, instance); 294 + } 309 295 310 296 *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; 311 297 *cs++ = entry->reg.addr; ··· 342 308 *cs++ = CS_GPR_REG(0, 0).addr; 343 309 *cs++ = entry->reg.addr; 344 310 345 - xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n", 346 - entry->reg.addr, entry->clr_bits, entry->set_bits); 311 + xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x%s\n", 312 + entry->reg.addr, entry->clr_bits, entry->set_bits, 313 + entry->reg.mcr ? " (MCR)" : ""); 347 314 } 348 315 349 316 /* reset used GPR */ ··· 356 321 *cs++ = 0; 357 322 *cs++ = CS_GPR_REG(0, 2).addr; 358 323 *cs++ = 0; 324 + 325 + /* reset steering */ 326 + if (count_rmw_mcr) { 327 + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); 328 + *cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(q->hwe->mmio_base).addr; 329 + *cs++ = 0; 330 + } 359 331 } 360 332 361 333 cs = xe_lrc_emit_hwe_state_instructions(q, cs);
+21 -9
drivers/gpu/drm/xe/xe_sync.c
··· 146 146 147 147 if (!signal) { 148 148 sync->fence = drm_syncobj_fence_get(sync->syncobj); 149 - if (XE_IOCTL_DBG(xe, !sync->fence)) 150 - return -EINVAL; 149 + if (XE_IOCTL_DBG(xe, !sync->fence)) { 150 + err = -EINVAL; 151 + goto free_sync; 152 + } 151 153 } 152 154 break; 153 155 ··· 169 167 170 168 if (signal) { 171 169 sync->chain_fence = dma_fence_chain_alloc(); 172 - if (!sync->chain_fence) 173 - return -ENOMEM; 170 + if (!sync->chain_fence) { 171 + err = -ENOMEM; 172 + goto free_sync; 173 + } 174 174 } else { 175 175 sync->fence = drm_syncobj_fence_get(sync->syncobj); 176 - if (XE_IOCTL_DBG(xe, !sync->fence)) 177 - return -EINVAL; 176 + if (XE_IOCTL_DBG(xe, !sync->fence)) { 177 + err = -EINVAL; 178 + goto free_sync; 179 + } 178 180 179 181 err = dma_fence_chain_find_seqno(&sync->fence, 180 182 sync_in.timeline_value); 181 183 if (err) 182 - return err; 184 + goto free_sync; 183 185 } 184 186 break; 185 187 ··· 206 200 if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence))) 207 201 return PTR_ERR(sync->ufence); 208 202 sync->ufence_chain_fence = dma_fence_chain_alloc(); 209 - if (!sync->ufence_chain_fence) 210 - return -ENOMEM; 203 + if (!sync->ufence_chain_fence) { 204 + err = -ENOMEM; 205 + goto free_sync; 206 + } 211 207 sync->ufence_syncobj = ufence_syncobj; 212 208 } 213 209 ··· 224 216 sync->timeline_value = sync_in.timeline_value; 225 217 226 218 return 0; 219 + 220 + free_sync: 221 + xe_sync_entry_cleanup(sync); 222 + return err; 227 223 } 228 224 ALLOW_ERROR_INJECTION(xe_sync_entry_parse, ERRNO); 229 225