Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/xe: Optimize flushing of L2$ by skipping unnecessary page reclaim

There are additional hardware managed L2$ flushing such as the
transient display. In those scenarios, page reclamation is
unnecessary resulting in redundant cacheline flushes, so skip
over those corresponding ranges.

v2:
- Elaborated on reasoning for page reclamation skip based on
Tejas's discussion. (Matthew A, Tejas)

v3:
- Removed MEDIA_IS_ON due to racy condition resulting in removal of
relevant registers and values. (Matthew A)
- Moved l3 policy access to xe_pat. (Matthew A)

v4:
- Updated comments based on previous change. (Tejas)
- Move back PAT index macros to xe_pat.c.

Signed-off-by: Brian Nguyen <brian3.nguyen@intel.com>
Reviewed-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20251212213225.3564537-21-brian3.nguyen@intel.com

authored by

Brian Nguyen and committed by
Matthew Brost
7c52f13b 684965d9

+55 -1
+32
drivers/gpu/drm/xe/xe_page_reclaim.c
··· 13 13 #include "regs/xe_gt_regs.h" 14 14 #include "xe_assert.h" 15 15 #include "xe_macros.h" 16 + #include "xe_mmio.h" 17 + #include "xe_pat.h" 16 18 #include "xe_sa.h" 17 19 #include "xe_tlb_inval_types.h" 20 + #include "xe_vm.h" 21 + 22 + /** 23 + * xe_page_reclaim_skip() - Decide whether PRL should be skipped for a VMA 24 + * @tile: Tile owning the VMA 25 + * @vma: VMA under consideration 26 + * 27 + * PPC flushing may be handled by HW for specific PAT encodings. 28 + * Skip PPC flushing/Page Reclaim for scenarios below due to redundant 29 + * flushes. 30 + * - pat_index is transient display (1) 31 + * 32 + * Return: true when page reclamation is unnecessary, false otherwise. 33 + */ 34 + bool xe_page_reclaim_skip(struct xe_tile *tile, struct xe_vma *vma) 35 + { 36 + u8 l3_policy; 37 + 38 + l3_policy = xe_pat_index_get_l3_policy(tile->xe, vma->attr.pat_index); 39 + 40 + /* 41 + * - l3_policy: 0=WB, 1=XD ("WB - Transient Display"), 3=UC 42 + * Transient display flushes is taken care by HW, l3_policy = 1. 43 + * 44 + * HW will sequence these transient flushes at various sync points so 45 + * any event of page reclamation will hit these sync points before 46 + * page reclamation could execute. 47 + */ 48 + return (l3_policy == XE_L3_POLICY_XD); 49 + } 18 50 19 51 /** 20 52 * xe_page_reclaim_create_prl_bo() - Back a PRL with a suballocated GGTT BO
+3
drivers/gpu/drm/xe/xe_page_reclaim.h
··· 18 18 19 19 struct xe_tlb_inval; 20 20 struct xe_tlb_inval_fence; 21 + struct xe_tile; 22 + struct xe_vma; 21 23 22 24 struct xe_guc_page_reclaim_entry { 23 25 u64 qw; ··· 70 68 prl->num_entries != XE_PAGE_RECLAIM_INVALID_LIST; 71 69 } 72 70 71 + bool xe_page_reclaim_skip(struct xe_tile *tile, struct xe_vma *vma); 73 72 struct drm_suballoc *xe_page_reclaim_create_prl_bo(struct xe_tlb_inval *tlb_inval, 74 73 struct xe_page_reclaim_list *prl, 75 74 struct xe_tlb_inval_fence *fence);
+8
drivers/gpu/drm/xe/xe_pat.c
··· 9 9 10 10 #include <generated/xe_wa_oob.h> 11 11 12 + #include "regs/xe_gt_regs.h" 12 13 #include "regs/xe_reg_defs.h" 13 14 #include "xe_assert.h" 14 15 #include "xe_device.h" ··· 230 229 { 231 230 WARN_ON(pat_index >= xe->pat.n_entries); 232 231 return !!(xe->pat.table[pat_index].value & XE2_COMP_EN); 232 + } 233 + 234 + u16 xe_pat_index_get_l3_policy(struct xe_device *xe, u16 pat_index) 235 + { 236 + WARN_ON(pat_index >= xe->pat.n_entries); 237 + 238 + return REG_FIELD_GET(XE2_L3_POLICY, xe->pat.table[pat_index].value); 233 239 } 234 240 235 241 static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[],
+10
drivers/gpu/drm/xe/xe_pat.h
··· 69 69 */ 70 70 bool xe_pat_index_get_comp_en(struct xe_device *xe, u16 pat_index); 71 71 72 + #define XE_L3_POLICY_WB 0 /* Write-back */ 73 + #define XE_L3_POLICY_XD 1 /* WB - Transient Display */ 74 + #define XE_L3_POLICY_UC 3 /* Uncached */ 75 + /** 76 + * xe_pat_index_get_l3_policy - Extract the L3 policy for the given pat_index. 77 + * @xe: xe device 78 + * @pat_index: The pat_index to query 79 + */ 80 + u16 xe_pat_index_get_l3_policy(struct xe_device *xe, u16 pat_index); 81 + 72 82 #endif
+2 -1
drivers/gpu/drm/xe/xe_pt.c
··· 2030 2030 xe_page_reclaim_list_alloc_entries(&pt_update_ops->prl); 2031 2031 2032 2032 /* Page reclaim may not be needed due to other features, so skip the corresponding VMA */ 2033 - pt_op->prl = (xe_page_reclaim_list_valid(&pt_update_ops->prl)) ? &pt_update_ops->prl : NULL; 2033 + pt_op->prl = (xe_page_reclaim_list_valid(&pt_update_ops->prl) && 2034 + !xe_page_reclaim_skip(tile, vma)) ? &pt_update_ops->prl : NULL; 2034 2035 2035 2036 err = vma_reserve_fences(tile_to_xe(tile), vma); 2036 2037 if (err)