Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2022 Intel Corporation
4 */
5
6#include "xe_tuning.h"
7
8#include <kunit/visibility.h>
9
10#include <drm/drm_managed.h>
11#include <drm/drm_print.h>
12
13#include "regs/xe_engine_regs.h"
14#include "regs/xe_gt_regs.h"
15#include "xe_gt_types.h"
16#include "xe_platform_types.h"
17#include "xe_rtp.h"
18#include "xe_sriov.h"
19
20#undef XE_REG_MCR
21#define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1)
22
23static const struct xe_rtp_entry_sr gt_tunings[] = {
24 { XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"),
25 XE_RTP_RULES(PLATFORM(DG2)),
26 XE_RTP_ACTIONS(SET(XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS))
27 },
28 { XE_RTP_NAME("Tuning: 32B Access Enable"),
29 XE_RTP_RULES(PLATFORM(DG2)),
30 XE_RTP_ACTIONS(SET(XEHP_SQCM, EN_32B_ACCESS))
31 },
32
33 /* Xe2 */
34
35 { XE_RTP_NAME("Tuning: L3 cache"),
36 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 3499)),
37 XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
38 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
39 },
40 { XE_RTP_NAME("Tuning: L3 cache - media"),
41 XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, 3499)),
42 XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
43 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
44 },
45 { XE_RTP_NAME("Tuning: Compression Overfetch"),
46 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED),
47 FUNC(xe_rtp_match_has_flat_ccs)),
48 XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX),
49 SET(CCCHKNREG1, L3CMPCTRL))
50 },
51 { XE_RTP_NAME("Tuning: Compression Overfetch - media"),
52 XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
53 XE_RTP_ACTIONS(CLR(XE2LPM_CCCHKNREG1, ENCOMPPERFFIX),
54 SET(XE2LPM_CCCHKNREG1, L3CMPCTRL))
55 },
56 { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"),
57 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 3499)),
58 XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN))
59 },
60 { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3 - media"),
61 XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
62 XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3, COMPPWOVERFETCHEN))
63 },
64 { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"),
65 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED),
66 FUNC(xe_rtp_match_has_flat_ccs)),
67 XE_RTP_ACTIONS(SET(L3SQCREG2,
68 COMPMEMRD256BOVRFETCHEN))
69 },
70 { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media"),
71 XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED),
72 FUNC(xe_rtp_match_has_flat_ccs)),
73 XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2,
74 COMPMEMRD256BOVRFETCHEN))
75 },
76 { XE_RTP_NAME("Tuning: Stateless compression control"),
77 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
78 XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT,
79 REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
80 },
81 { XE_RTP_NAME("Tuning: Stateless compression control - media"),
82 XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, XE_RTP_END_VERSION_UNDEFINED)),
83 XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT,
84 REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
85 },
86 { XE_RTP_NAME("Tuning: L3 RW flush all Cache"),
87 XE_RTP_RULES(GRAPHICS_VERSION(2004)),
88 XE_RTP_ACTIONS(SET(SCRATCH3_LBCF, RWFLUSHALLEN))
89 },
90 { XE_RTP_NAME("Tuning: L3 RW flush all cache - media"),
91 XE_RTP_RULES(MEDIA_VERSION(2000)),
92 XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN))
93 },
94
95 /* Xe3p */
96
97 { XE_RTP_NAME("Tuning: Set STLB Bank Hash Mode to 4KB"),
98 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3510, XE_RTP_END_VERSION_UNDEFINED),
99 IS_INTEGRATED),
100 XE_RTP_ACTIONS(FIELD_SET(GAMSTLB_CTRL, BANK_HASH_MODE,
101 BANK_HASH_4KB_MODE))
102 },
103};
104
105static const struct xe_rtp_entry_sr engine_tunings[] = {
106 { XE_RTP_NAME("Tuning: L3 Hashing Mask"),
107 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210),
108 FUNC(xe_rtp_match_first_render_or_compute)),
109 XE_RTP_ACTIONS(CLR(XELP_GARBCNTL, XELP_BUS_HASH_CTL_BIT_EXC))
110 },
111 { XE_RTP_NAME("Tuning: Set Indirect State Override"),
112 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1274),
113 ENGINE_CLASS(RENDER)),
114 XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE))
115 },
116 { XE_RTP_NAME("Tuning: Disable NULL query for Anyhit Shader"),
117 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED),
118 FUNC(xe_rtp_match_first_render_or_compute)),
119 XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY))
120 },
121 { XE_RTP_NAME("Tuning: disable HW reporting of ctx switch to GHWSP"),
122 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3500, XE_RTP_END_VERSION_UNDEFINED)),
123 XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0),
124 GHWSP_CSB_REPORT_DIS,
125 XE_RTP_ACTION_FLAG(ENGINE_BASE)))
126 },
127};
128
129static const struct xe_rtp_entry_sr lrc_tunings[] = {
130 { XE_RTP_NAME("Tuning: Windower HW Filtering"),
131 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3599), ENGINE_CLASS(RENDER)),
132 XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, HW_FILTERING))
133 },
134
135 /* DG2 */
136
137 { XE_RTP_NAME("Tuning: L3 cache"),
138 XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
139 XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
140 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
141 },
142 { XE_RTP_NAME("Tuning: TDS gang timer"),
143 XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
144 /* read verification is ignored as in i915 - need to check enabling */
145 XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2,
146 FF_MODE2_TDS_TIMER_MASK,
147 FF_MODE2_TDS_TIMER_128))
148 },
149 { XE_RTP_NAME("Tuning: TBIMR fast clip"),
150 XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
151 XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP))
152 },
153
154 /* Xe_LPG */
155
156 { XE_RTP_NAME("Tuning: L3 cache"),
157 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), ENGINE_CLASS(RENDER)),
158 XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
159 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
160 },
161
162 /* Xe2_HPG */
163
164 { XE_RTP_NAME("Tuning: vs hit max value"),
165 XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
166 XE_RTP_ACTIONS(FIELD_SET(FF_MODE, VS_HIT_MAX_VALUE_MASK,
167 REG_FIELD_PREP(VS_HIT_MAX_VALUE_MASK, 0x3f)))
168 },
169};
170
171/**
172 * xe_tuning_init - initialize gt with tunings bookkeeping
173 * @gt: GT instance to initialize
174 *
175 * Returns 0 for success, negative error code otherwise.
176 */
177int xe_tuning_init(struct xe_gt *gt)
178{
179 struct xe_device *xe = gt_to_xe(gt);
180 size_t n_lrc, n_engine, n_gt, total;
181 unsigned long *p;
182
183 n_gt = BITS_TO_LONGS(ARRAY_SIZE(gt_tunings));
184 n_engine = BITS_TO_LONGS(ARRAY_SIZE(engine_tunings));
185 n_lrc = BITS_TO_LONGS(ARRAY_SIZE(lrc_tunings));
186 total = n_gt + n_engine + n_lrc;
187
188 p = drmm_kzalloc(&xe->drm, sizeof(*p) * total, GFP_KERNEL);
189 if (!p)
190 return -ENOMEM;
191
192 gt->tuning_active.gt = p;
193 p += n_gt;
194 gt->tuning_active.engine = p;
195 p += n_engine;
196 gt->tuning_active.lrc = p;
197
198 return 0;
199}
200ALLOW_ERROR_INJECTION(xe_tuning_init, ERRNO); /* See xe_pci_probe() */
201
202void xe_tuning_process_gt(struct xe_gt *gt)
203{
204 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
205
206 xe_rtp_process_ctx_enable_active_tracking(&ctx,
207 gt->tuning_active.gt,
208 ARRAY_SIZE(gt_tunings));
209 xe_rtp_process_to_sr(&ctx, gt_tunings, ARRAY_SIZE(gt_tunings),
210 >->reg_sr, false);
211}
212EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt);
213
214void xe_tuning_process_engine(struct xe_hw_engine *hwe)
215{
216 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
217
218 xe_rtp_process_ctx_enable_active_tracking(&ctx,
219 hwe->gt->tuning_active.engine,
220 ARRAY_SIZE(engine_tunings));
221 xe_rtp_process_to_sr(&ctx, engine_tunings, ARRAY_SIZE(engine_tunings),
222 &hwe->reg_sr, false);
223}
224EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine);
225
226/**
227 * xe_tuning_process_lrc - process lrc tunings
228 * @hwe: engine instance to process tunings for
229 *
230 * Process LRC table for this platform, saving in @hwe all the tunings that need
231 * to be applied on context restore. These are tunings touching registers that
232 * are part of the HW context image.
233 */
234void xe_tuning_process_lrc(struct xe_hw_engine *hwe)
235{
236 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
237
238 xe_rtp_process_ctx_enable_active_tracking(&ctx,
239 hwe->gt->tuning_active.lrc,
240 ARRAY_SIZE(lrc_tunings));
241 xe_rtp_process_to_sr(&ctx, lrc_tunings, ARRAY_SIZE(lrc_tunings),
242 &hwe->reg_lrc, true);
243}
244
245/**
246 * xe_tuning_dump() - Dump GT tuning info into a drm printer.
247 * @gt: the &xe_gt
248 * @p: the &drm_printer
249 *
250 * Return: always 0.
251 */
252int xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p)
253{
254 size_t idx;
255
256 drm_printf(p, "GT Tunings\n");
257 for_each_set_bit(idx, gt->tuning_active.gt, ARRAY_SIZE(gt_tunings))
258 drm_printf_indent(p, 1, "%s\n", gt_tunings[idx].name);
259
260 drm_puts(p, "\n");
261 drm_printf(p, "Engine Tunings\n");
262 for_each_set_bit(idx, gt->tuning_active.engine, ARRAY_SIZE(engine_tunings))
263 drm_printf_indent(p, 1, "%s\n", engine_tunings[idx].name);
264
265 drm_puts(p, "\n");
266 drm_printf(p, "LRC Tunings\n");
267 for_each_set_bit(idx, gt->tuning_active.lrc, ARRAY_SIZE(lrc_tunings))
268 drm_printf_indent(p, 1, "%s\n", lrc_tunings[idx].name);
269
270 return 0;
271}