Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/xe: Append page reclamation action to tlb inval

Add page reclamation action to tlb inval backend. The page reclamation
action is paired with range tlb invalidations so both are issued at the
same time.

Page reclamation will issue the TLB invalidation with an invalid seqno
and a H2G page reclamation action with the fence's corresponding seqno
and handle the fence accordingly on page reclaim action done handler.

If page reclamation fails, tlb timeout handler will be responsible for
signalling fence and cleaning up.

v2:
- add send_page_reclaim to patch.
- Remove flush_cache and use prl_sa pointer to determine PPC flush
instead of explicit bool. Add NULL as fallback for others. (Matthew B)

v3:
- Add comments for flush_cache with media.

Signed-off-by: Brian Nguyen <brian3.nguyen@intel.com>
Suggested-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20251212213225.3564537-20-brian3.nguyen@intel.com

authored by

Brian Nguyen and committed by
Matthew Brost
684965d9 9945e6a5

+36 -13
+25 -5
drivers/gpu/drm/xe/xe_guc_tlb_inval.c
··· 13 13 #include "xe_guc_tlb_inval.h" 14 14 #include "xe_force_wake.h" 15 15 #include "xe_mmio.h" 16 + #include "xe_sa.h" 16 17 #include "xe_tlb_inval.h" 17 18 18 19 #include "regs/xe_guc_regs.h" ··· 94 93 return -ECANCELED; 95 94 } 96 95 96 + static int send_page_reclaim(struct xe_guc *guc, u32 seqno, 97 + u64 gpu_addr) 98 + { 99 + u32 action[] = { 100 + XE_GUC_ACTION_PAGE_RECLAMATION, 101 + seqno, 102 + lower_32_bits(gpu_addr), 103 + upper_32_bits(gpu_addr), 104 + }; 105 + 106 + return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 107 + G2H_LEN_DW_PAGE_RECLAMATION, 1); 108 + } 109 + 97 110 /* 98 111 * Ensure that roundup_pow_of_two(length) doesn't overflow. 99 112 * Note that roundup_pow_of_two() operates on unsigned long, ··· 116 101 #define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX)) 117 102 118 103 static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, 119 - u64 start, u64 end, u32 asid) 104 + u64 start, u64 end, u32 asid, 105 + struct drm_suballoc *prl_sa) 120 106 { 121 107 #define MAX_TLB_INVALIDATION_LEN 7 122 108 struct xe_guc *guc = tlb_inval->private; 123 109 struct xe_gt *gt = guc_to_gt(guc); 124 110 u32 action[MAX_TLB_INVALIDATION_LEN]; 125 111 u64 length = end - start; 126 - int len = 0; 112 + int len = 0, err; 127 113 128 114 if (guc_to_xe(guc)->info.force_execlist) 129 115 return -ECANCELED; 130 116 131 117 action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; 132 - action[len++] = seqno; 118 + action[len++] = !prl_sa ? seqno : TLB_INVALIDATION_SEQNO_INVALID; 133 119 if (!gt_to_xe(gt)->info.has_range_tlb_inval || 134 120 length > MAX_RANGE_TLB_INVALIDATION_LENGTH) { 135 121 action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); ··· 171 155 ilog2(SZ_2M) + 1))); 172 156 xe_gt_assert(gt, IS_ALIGNED(start, length)); 173 157 174 - action[len++] = MAKE_INVAL_OP_FLUSH(XE_GUC_TLB_INVAL_PAGE_SELECTIVE, true); 158 + /* Flush on NULL case, Media is not required to modify flush due to no PPC so NOP */ 159 + action[len++] = MAKE_INVAL_OP_FLUSH(XE_GUC_TLB_INVAL_PAGE_SELECTIVE, !prl_sa); 175 160 action[len++] = asid; 176 161 action[len++] = lower_32_bits(start); 177 162 action[len++] = upper_32_bits(start); ··· 181 164 182 165 xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN); 183 166 184 - return send_tlb_inval(guc, action, len); 167 + err = send_tlb_inval(guc, action, len); 168 + if (!err && prl_sa) 169 + err = send_page_reclaim(guc, seqno, xe_sa_bo_gpu_addr(prl_sa)); 170 + return err; 185 171 } 186 172 187 173 static bool tlb_inval_initialized(struct xe_tlb_inval *tlb_inval)
+4 -3
drivers/gpu/drm/xe/xe_tlb_inval.c
··· 313 313 * @start: start address 314 314 * @end: end address 315 315 * @asid: address space id 316 + * @prl_sa: suballocation of page reclaim list if used, NULL indicates PPC flush 316 317 * 317 318 * Issue a range based TLB invalidation if supported, if not fallback to a full 318 319 * TLB invalidation. Completion of TLB is asynchronous and caller can use ··· 323 322 */ 324 323 int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval, 325 324 struct xe_tlb_inval_fence *fence, u64 start, u64 end, 326 - u32 asid) 325 + u32 asid, struct drm_suballoc *prl_sa) 327 326 { 328 327 return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->ppgtt, 329 - start, end, asid); 328 + start, end, asid, prl_sa); 330 329 } 331 330 332 331 /** ··· 342 341 u64 range = 1ull << vm->xe->info.va_bits; 343 342 344 343 xe_tlb_inval_fence_init(tlb_inval, &fence, true); 345 - xe_tlb_inval_range(tlb_inval, &fence, 0, range, vm->usm.asid); 344 + xe_tlb_inval_range(tlb_inval, &fence, 0, range, vm->usm.asid, NULL); 346 345 xe_tlb_inval_fence_wait(&fence); 347 346 } 348 347
+1 -1
drivers/gpu/drm/xe/xe_tlb_inval.h
··· 23 23 void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm); 24 24 int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval, 25 25 struct xe_tlb_inval_fence *fence, 26 - u64 start, u64 end, u32 asid); 26 + u64 start, u64 end, u32 asid, struct drm_suballoc *prl_sa); 27 27 28 28 void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval, 29 29 struct xe_tlb_inval_fence *fence,
+1 -1
drivers/gpu/drm/xe/xe_tlb_inval_job.c
··· 60 60 } 61 61 62 62 xe_tlb_inval_range(job->tlb_inval, ifence, job->start, 63 - job->end, job->vm->usm.asid); 63 + job->end, job->vm->usm.asid, prl_sa); 64 64 65 65 return job->fence; 66 66 }
+3 -1
drivers/gpu/drm/xe/xe_tlb_inval_types.h
··· 9 9 #include <linux/workqueue.h> 10 10 #include <linux/dma-fence.h> 11 11 12 + struct drm_suballoc; 12 13 struct xe_tlb_inval; 13 14 14 15 /** struct xe_tlb_inval_ops - TLB invalidation ops (backend) */ ··· 41 40 * @start: Start address 42 41 * @end: End address 43 42 * @asid: Address space ID 43 + * @prl_sa: Suballocation for page reclaim list 44 44 * 45 45 * Return 0 on success, -ECANCELED if backend is mid-reset, error on 46 46 * failure 47 47 */ 48 48 int (*ppgtt)(struct xe_tlb_inval *tlb_inval, u32 seqno, u64 start, 49 - u64 end, u32 asid); 49 + u64 end, u32 asid, struct drm_suballoc *prl_sa); 50 50 51 51 /** 52 52 * @initialized: Backend is initialized
+2 -2
drivers/gpu/drm/xe/xe_vm.c
··· 3928 3928 3929 3929 err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval, 3930 3930 &fence[fence_id], start, end, 3931 - vm->usm.asid); 3931 + vm->usm.asid, NULL); 3932 3932 if (err) 3933 3933 goto wait; 3934 3934 ++fence_id; ··· 3941 3941 3942 3942 err = xe_tlb_inval_range(&tile->media_gt->tlb_inval, 3943 3943 &fence[fence_id], start, end, 3944 - vm->usm.asid); 3944 + vm->usm.asid, NULL); 3945 3945 if (err) 3946 3946 goto wait; 3947 3947 ++fence_id;