Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/xe: Wait on in-syncs when swicthing to dma-fence mode

If a dma-fence submission has in-fences and pagefault queues are running
work, there is little incentive to kick the pagefault queues off the
hardware until the dma-fence submission is ready to run. Therefore, wait
on the in-fences of the dma-fence submission before removing the
pagefault queues from the hardware.

v2:
- Fix kernel doc (CI)
- Don't wait under lock (Thomas)
- Make wait interruptable

Suggested-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Link: https://patch.msgid.link/20251212182847.1683222-6-matthew.brost@intel.com

+87 -11
+7 -2
drivers/gpu/drm/xe/xe_exec.c
··· 121 121 u64 addresses[XE_HW_ENGINE_MAX_INSTANCE]; 122 122 struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn}; 123 123 struct drm_exec *exec = &vm_exec.exec; 124 - u32 i, num_syncs, num_ufence = 0; 124 + u32 i, num_syncs, num_in_sync = 0, num_ufence = 0; 125 125 struct xe_validation_ctx ctx; 126 126 struct xe_sched_job *job; 127 127 struct xe_vm *vm; ··· 183 183 184 184 if (xe_sync_is_ufence(&syncs[num_syncs])) 185 185 num_ufence++; 186 + 187 + if (!num_in_sync && xe_sync_needs_wait(&syncs[num_syncs])) 188 + num_in_sync++; 186 189 } 187 190 188 191 if (XE_IOCTL_DBG(xe, num_ufence > 1)) { ··· 206 203 mode = xe_hw_engine_group_find_exec_mode(q); 207 204 208 205 if (mode == EXEC_MODE_DMA_FENCE) { 209 - err = xe_hw_engine_group_get_mode(group, mode, &previous_mode); 206 + err = xe_hw_engine_group_get_mode(group, mode, &previous_mode, 207 + syncs, num_in_sync ? 208 + num_syncs : 0); 210 209 if (err) 211 210 goto err_syncs; 212 211 }
+47 -8
drivers/gpu/drm/xe/xe_hw_engine_group.c
··· 11 11 #include "xe_gt.h" 12 12 #include "xe_gt_stats.h" 13 13 #include "xe_hw_engine_group.h" 14 + #include "xe_sync.h" 14 15 #include "xe_vm.h" 15 16 16 17 static void ··· 22 21 int err; 23 22 enum xe_hw_engine_group_execution_mode previous_mode; 24 23 25 - err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode); 24 + err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode, 25 + NULL, 0); 26 26 if (err) 27 27 return; 28 28 ··· 191 189 /** 192 190 * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group 193 191 * @group: The hw engine group 192 + * @has_deps: dma-fence job triggering suspend has dependencies 194 193 * 195 194 * Return: 0 on success, negative error code on error. 196 195 */ 197 - static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group *group) 196 + static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group *group, 197 + bool has_deps) 198 198 { 199 199 int err; 200 200 struct xe_exec_queue *q; ··· 205 201 lockdep_assert_held_write(&group->mode_sem); 206 202 207 203 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 204 + bool idle_skip_suspend; 205 + 208 206 if (!xe_vm_in_fault_mode(q->vm)) 209 207 continue; 210 208 209 + idle_skip_suspend = xe_exec_queue_idle_skip_suspend(q); 210 + if (!idle_skip_suspend && has_deps) 211 + return -EAGAIN; 212 + 211 213 xe_gt_stats_incr(q->gt, XE_GT_STATS_ID_HW_ENGINE_GROUP_SUSPEND_LR_QUEUE_COUNT, 1); 212 - need_resume |= !xe_exec_queue_idle_skip_suspend(q); 214 + 215 + need_resume |= !idle_skip_suspend; 213 216 q->ops->suspend(q); 214 217 } 215 218 ··· 269 258 return 0; 270 259 } 271 260 272 - static int switch_mode(struct xe_hw_engine_group *group) 261 + static int switch_mode(struct xe_hw_engine_group *group, bool has_deps) 273 262 { 274 263 int err = 0; 275 264 enum xe_hw_engine_group_execution_mode new_mode; ··· 279 268 switch (group->cur_mode) { 280 269 case EXEC_MODE_LR: 281 270 new_mode = EXEC_MODE_DMA_FENCE; 282 - err = xe_hw_engine_group_suspend_faulting_lr_jobs(group); 271 + err = xe_hw_engine_group_suspend_faulting_lr_jobs(group, 272 + has_deps); 283 273 break; 284 274 case EXEC_MODE_DMA_FENCE: 285 275 new_mode = EXEC_MODE_LR; ··· 296 284 return 0; 297 285 } 298 286 287 + static int wait_syncs(struct xe_sync_entry *syncs, int num_syncs) 288 + { 289 + int err, i; 290 + 291 + for (i = 0; i < num_syncs; ++i) { 292 + err = xe_sync_entry_wait(syncs + i); 293 + if (err) 294 + return err; 295 + } 296 + 297 + return 0; 298 + } 299 + 299 300 /** 300 301 * xe_hw_engine_group_get_mode() - Get the group to execute in the new mode 301 302 * @group: The hw engine group 302 303 * @new_mode: The new execution mode 303 304 * @previous_mode: Pointer to the previous mode provided for use by caller 305 + * @syncs: Syncs from exec IOCTL 306 + * @num_syncs: Number of syncs from exec IOCTL 304 307 * 305 308 * Return: 0 if successful, -EINTR if locking failed. 306 309 */ 307 310 int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, 308 311 enum xe_hw_engine_group_execution_mode new_mode, 309 - enum xe_hw_engine_group_execution_mode *previous_mode) 312 + enum xe_hw_engine_group_execution_mode *previous_mode, 313 + struct xe_sync_entry *syncs, int num_syncs) 310 314 __acquires(&group->mode_sem) 311 315 { 316 + bool has_deps = !!num_syncs; 312 317 int err = down_read_interruptible(&group->mode_sem); 313 318 314 319 if (err) ··· 335 306 336 307 if (new_mode != group->cur_mode) { 337 308 up_read(&group->mode_sem); 309 + retry: 338 310 err = down_write_killable(&group->mode_sem); 339 311 if (err) 340 312 return err; 341 313 342 314 if (new_mode != group->cur_mode) { 343 - err = switch_mode(group); 315 + err = switch_mode(group, has_deps); 344 316 if (err) { 345 317 up_write(&group->mode_sem); 346 - return err; 318 + 319 + if (err != -EAGAIN) 320 + return err; 321 + 322 + err = wait_syncs(syncs, num_syncs); 323 + if (err) 324 + return err; 325 + 326 + has_deps = false; 327 + goto retry; 347 328 } 348 329 } 349 330 downgrade_write(&group->mode_sem);
+3 -1
drivers/gpu/drm/xe/xe_hw_engine_group.h
··· 11 11 struct drm_device; 12 12 struct xe_exec_queue; 13 13 struct xe_gt; 14 + struct xe_sync_entry; 14 15 15 16 int xe_hw_engine_setup_groups(struct xe_gt *gt); 16 17 ··· 20 19 21 20 int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, 22 21 enum xe_hw_engine_group_execution_mode new_mode, 23 - enum xe_hw_engine_group_execution_mode *previous_mode); 22 + enum xe_hw_engine_group_execution_mode *previous_mode, 23 + struct xe_sync_entry *syncs, int num_syncs); 24 24 void xe_hw_engine_group_put(struct xe_hw_engine_group *group); 25 25 26 26 enum xe_hw_engine_group_execution_mode
+28
drivers/gpu/drm/xe/xe_sync.c
··· 228 228 return 0; 229 229 } 230 230 231 + /** 232 + * xe_sync_entry_wait() - Wait on in-sync 233 + * @sync: Sync object 234 + * 235 + * If the sync is in an in-sync, wait on the sync to signal. 236 + * 237 + * Return: 0 on success, -ERESTARTSYS on failure (interruption) 238 + */ 239 + int xe_sync_entry_wait(struct xe_sync_entry *sync) 240 + { 241 + if (sync->flags & DRM_XE_SYNC_FLAG_SIGNAL) 242 + return 0; 243 + 244 + return dma_fence_wait(sync->fence, true); 245 + } 246 + 247 + /** 248 + * xe_sync_needs_wait() - Sync needs a wait (input dma-fence not signaled) 249 + * @sync: Sync object 250 + * 251 + * Return: True if sync needs a wait, False otherwise 252 + */ 253 + bool xe_sync_needs_wait(struct xe_sync_entry *sync) 254 + { 255 + return !(sync->flags & DRM_XE_SYNC_FLAG_SIGNAL) && 256 + !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &sync->fence->flags); 257 + } 258 + 231 259 void xe_sync_entry_signal(struct xe_sync_entry *sync, struct dma_fence *fence) 232 260 { 233 261 if (!(sync->flags & DRM_XE_SYNC_FLAG_SIGNAL))
+2
drivers/gpu/drm/xe/xe_sync.h
··· 29 29 struct xe_sched_job *job); 30 30 void xe_sync_entry_signal(struct xe_sync_entry *sync, 31 31 struct dma_fence *fence); 32 + int xe_sync_entry_wait(struct xe_sync_entry *sync); 33 + bool xe_sync_needs_wait(struct xe_sync_entry *sync); 32 34 void xe_sync_entry_cleanup(struct xe_sync_entry *sync); 33 35 struct dma_fence * 34 36 xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,