Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 *
5 */
6
7/* Support for NVIDIA specific attributes. */
8
9#include <linux/io.h>
10#include <linux/module.h>
11#include <linux/property.h>
12#include <linux/topology.h>
13
14#include "arm_cspmu.h"
15
16#define NV_PCIE_PORT_COUNT 10ULL
17#define NV_PCIE_FILTER_ID_MASK GENMASK_ULL(NV_PCIE_PORT_COUNT - 1, 0)
18
19#define NV_NVL_C2C_PORT_COUNT 2ULL
20#define NV_NVL_C2C_FILTER_ID_MASK GENMASK_ULL(NV_NVL_C2C_PORT_COUNT - 1, 0)
21
22#define NV_CNVL_PORT_COUNT 4ULL
23#define NV_CNVL_FILTER_ID_MASK GENMASK_ULL(NV_CNVL_PORT_COUNT - 1, 0)
24
25#define NV_UCF_SRC_COUNT 3ULL
26#define NV_UCF_DST_COUNT 4ULL
27#define NV_UCF_FILTER_ID_MASK GENMASK_ULL(11, 0)
28#define NV_UCF_FILTER_SRC GENMASK_ULL(2, 0)
29#define NV_UCF_FILTER_DST GENMASK_ULL(11, 8)
30#define NV_UCF_FILTER_DEFAULT (NV_UCF_FILTER_SRC | NV_UCF_FILTER_DST)
31
32#define NV_PCIE_V2_PORT_COUNT 8ULL
33#define NV_PCIE_V2_FILTER_ID_MASK GENMASK_ULL(24, 0)
34#define NV_PCIE_V2_FILTER_PORT GENMASK_ULL(NV_PCIE_V2_PORT_COUNT - 1, 0)
35#define NV_PCIE_V2_FILTER_BDF_VAL GENMASK_ULL(23, NV_PCIE_V2_PORT_COUNT)
36#define NV_PCIE_V2_FILTER_BDF_EN BIT(24)
37#define NV_PCIE_V2_FILTER_BDF_VAL_EN GENMASK_ULL(24, NV_PCIE_V2_PORT_COUNT)
38#define NV_PCIE_V2_FILTER_DEFAULT NV_PCIE_V2_FILTER_PORT
39
40#define NV_PCIE_V2_DST_COUNT 5ULL
41#define NV_PCIE_V2_FILTER2_ID_MASK GENMASK_ULL(4, 0)
42#define NV_PCIE_V2_FILTER2_DST GENMASK_ULL(NV_PCIE_V2_DST_COUNT - 1, 0)
43#define NV_PCIE_V2_FILTER2_DEFAULT NV_PCIE_V2_FILTER2_DST
44
45#define NV_PCIE_TGT_PORT_COUNT 8ULL
46#define NV_PCIE_TGT_EV_TYPE_CC 0x4
47#define NV_PCIE_TGT_EV_TYPE_COUNT 3ULL
48#define NV_PCIE_TGT_EV_TYPE_MASK GENMASK_ULL(NV_PCIE_TGT_EV_TYPE_COUNT - 1, 0)
49#define NV_PCIE_TGT_FILTER2_MASK GENMASK_ULL(NV_PCIE_TGT_PORT_COUNT, 0)
50#define NV_PCIE_TGT_FILTER2_PORT GENMASK_ULL(NV_PCIE_TGT_PORT_COUNT - 1, 0)
51#define NV_PCIE_TGT_FILTER2_ADDR_EN BIT(NV_PCIE_TGT_PORT_COUNT)
52#define NV_PCIE_TGT_FILTER2_ADDR GENMASK_ULL(15, NV_PCIE_TGT_PORT_COUNT)
53#define NV_PCIE_TGT_FILTER2_DEFAULT NV_PCIE_TGT_FILTER2_PORT
54
55#define NV_PCIE_TGT_ADDR_COUNT 8ULL
56#define NV_PCIE_TGT_ADDR_STRIDE 20
57#define NV_PCIE_TGT_ADDR_CTRL 0xD38
58#define NV_PCIE_TGT_ADDR_BASE_LO 0xD3C
59#define NV_PCIE_TGT_ADDR_BASE_HI 0xD40
60#define NV_PCIE_TGT_ADDR_MASK_LO 0xD44
61#define NV_PCIE_TGT_ADDR_MASK_HI 0xD48
62
63#define NV_GENERIC_FILTER_ID_MASK GENMASK_ULL(31, 0)
64
65#define NV_PRODID_MASK (PMIIDR_PRODUCTID | PMIIDR_VARIANT | PMIIDR_REVISION)
66
67#define NV_FORMAT_NAME_GENERIC 0
68
69#define to_nv_cspmu_ctx(cspmu) ((struct nv_cspmu_ctx *)(cspmu->impl.ctx))
70
71#define NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _num, _suff, _config) \
72 ARM_CSPMU_EVENT_ATTR(_pref##_num##_suff, _config)
73
74#define NV_CSPMU_EVENT_ATTR_4(_pref, _suff, _config) \
75 NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _0_, _suff, _config), \
76 NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _1_, _suff, _config + 1), \
77 NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _2_, _suff, _config + 2), \
78 NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _3_, _suff, _config + 3)
79
80struct nv_cspmu_ctx {
81 const char *name;
82
83 struct attribute **event_attr;
84 struct attribute **format_attr;
85
86 u32 filter_mask;
87 u32 filter_default_val;
88 u32 filter2_mask;
89 u32 filter2_default_val;
90
91 u32 (*get_filter)(const struct perf_event *event);
92 u32 (*get_filter2)(const struct perf_event *event);
93
94 void *data;
95
96 int (*init_data)(struct arm_cspmu *cspmu);
97};
98
99static struct attribute *scf_pmu_event_attrs[] = {
100 ARM_CSPMU_EVENT_ATTR(bus_cycles, 0x1d),
101
102 ARM_CSPMU_EVENT_ATTR(scf_cache_allocate, 0xF0),
103 ARM_CSPMU_EVENT_ATTR(scf_cache_refill, 0xF1),
104 ARM_CSPMU_EVENT_ATTR(scf_cache, 0xF2),
105 ARM_CSPMU_EVENT_ATTR(scf_cache_wb, 0xF3),
106
107 NV_CSPMU_EVENT_ATTR_4(socket, rd_data, 0x101),
108 NV_CSPMU_EVENT_ATTR_4(socket, wb_data, 0x109),
109
110 NV_CSPMU_EVENT_ATTR_4(socket, rd_outstanding, 0x115),
111
112 NV_CSPMU_EVENT_ATTR_4(socket, rd_access, 0x12d),
113 NV_CSPMU_EVENT_ATTR_4(socket, wb_access, 0x135),
114 NV_CSPMU_EVENT_ATTR_4(socket, wr_access, 0x139),
115
116 ARM_CSPMU_EVENT_ATTR(gmem_rd_data, 0x16d),
117 ARM_CSPMU_EVENT_ATTR(gmem_rd_access, 0x16e),
118 ARM_CSPMU_EVENT_ATTR(gmem_rd_outstanding, 0x16f),
119 ARM_CSPMU_EVENT_ATTR(gmem_wb_data, 0x173),
120 ARM_CSPMU_EVENT_ATTR(gmem_wb_access, 0x174),
121 ARM_CSPMU_EVENT_ATTR(gmem_wr_data, 0x179),
122 ARM_CSPMU_EVENT_ATTR(gmem_wr_access, 0x17b),
123
124 NV_CSPMU_EVENT_ATTR_4(socket, wr_data, 0x17c),
125
126 ARM_CSPMU_EVENT_ATTR(gmem_wr_total_bytes, 0x1a0),
127 ARM_CSPMU_EVENT_ATTR(remote_socket_wr_total_bytes, 0x1a1),
128 ARM_CSPMU_EVENT_ATTR(remote_socket_rd_data, 0x1a2),
129 ARM_CSPMU_EVENT_ATTR(remote_socket_rd_outstanding, 0x1a3),
130 ARM_CSPMU_EVENT_ATTR(remote_socket_rd_access, 0x1a4),
131
132 ARM_CSPMU_EVENT_ATTR(cmem_rd_data, 0x1a5),
133 ARM_CSPMU_EVENT_ATTR(cmem_rd_access, 0x1a6),
134 ARM_CSPMU_EVENT_ATTR(cmem_rd_outstanding, 0x1a7),
135 ARM_CSPMU_EVENT_ATTR(cmem_wb_data, 0x1ab),
136 ARM_CSPMU_EVENT_ATTR(cmem_wb_access, 0x1ac),
137 ARM_CSPMU_EVENT_ATTR(cmem_wr_data, 0x1b1),
138
139 ARM_CSPMU_EVENT_ATTR(cmem_wr_access, 0x1ca),
140
141 ARM_CSPMU_EVENT_ATTR(cmem_wr_total_bytes, 0x1db),
142
143 ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
144 NULL,
145};
146
147static struct attribute *mcf_pmu_event_attrs[] = {
148 ARM_CSPMU_EVENT_ATTR(rd_bytes_loc, 0x0),
149 ARM_CSPMU_EVENT_ATTR(rd_bytes_rem, 0x1),
150 ARM_CSPMU_EVENT_ATTR(wr_bytes_loc, 0x2),
151 ARM_CSPMU_EVENT_ATTR(wr_bytes_rem, 0x3),
152 ARM_CSPMU_EVENT_ATTR(total_bytes_loc, 0x4),
153 ARM_CSPMU_EVENT_ATTR(total_bytes_rem, 0x5),
154 ARM_CSPMU_EVENT_ATTR(rd_req_loc, 0x6),
155 ARM_CSPMU_EVENT_ATTR(rd_req_rem, 0x7),
156 ARM_CSPMU_EVENT_ATTR(wr_req_loc, 0x8),
157 ARM_CSPMU_EVENT_ATTR(wr_req_rem, 0x9),
158 ARM_CSPMU_EVENT_ATTR(total_req_loc, 0xa),
159 ARM_CSPMU_EVENT_ATTR(total_req_rem, 0xb),
160 ARM_CSPMU_EVENT_ATTR(rd_cum_outs_loc, 0xc),
161 ARM_CSPMU_EVENT_ATTR(rd_cum_outs_rem, 0xd),
162 ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
163 NULL,
164};
165
166static struct attribute *ucf_pmu_event_attrs[] = {
167 ARM_CSPMU_EVENT_ATTR(bus_cycles, 0x1D),
168
169 ARM_CSPMU_EVENT_ATTR(slc_allocate, 0xF0),
170 ARM_CSPMU_EVENT_ATTR(slc_wb, 0xF3),
171 ARM_CSPMU_EVENT_ATTR(slc_refill_rd, 0x109),
172 ARM_CSPMU_EVENT_ATTR(slc_refill_wr, 0x10A),
173 ARM_CSPMU_EVENT_ATTR(slc_hit_rd, 0x119),
174
175 ARM_CSPMU_EVENT_ATTR(slc_access_dataless, 0x183),
176 ARM_CSPMU_EVENT_ATTR(slc_access_atomic, 0x184),
177
178 ARM_CSPMU_EVENT_ATTR(slc_access_rd, 0x111),
179 ARM_CSPMU_EVENT_ATTR(slc_access_wr, 0x112),
180 ARM_CSPMU_EVENT_ATTR(slc_bytes_rd, 0x113),
181 ARM_CSPMU_EVENT_ATTR(slc_bytes_wr, 0x114),
182
183 ARM_CSPMU_EVENT_ATTR(mem_access_rd, 0x121),
184 ARM_CSPMU_EVENT_ATTR(mem_access_wr, 0x122),
185 ARM_CSPMU_EVENT_ATTR(mem_bytes_rd, 0x123),
186 ARM_CSPMU_EVENT_ATTR(mem_bytes_wr, 0x124),
187
188 ARM_CSPMU_EVENT_ATTR(local_snoop, 0x180),
189 ARM_CSPMU_EVENT_ATTR(ext_snp_access, 0x181),
190 ARM_CSPMU_EVENT_ATTR(ext_snp_evict, 0x182),
191
192 ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
193 NULL
194};
195
196static struct attribute *pcie_v2_pmu_event_attrs[] = {
197 ARM_CSPMU_EVENT_ATTR(rd_bytes, 0x0),
198 ARM_CSPMU_EVENT_ATTR(wr_bytes, 0x1),
199 ARM_CSPMU_EVENT_ATTR(rd_req, 0x2),
200 ARM_CSPMU_EVENT_ATTR(wr_req, 0x3),
201 ARM_CSPMU_EVENT_ATTR(rd_cum_outs, 0x4),
202 ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
203 NULL
204};
205
206static struct attribute *pcie_tgt_pmu_event_attrs[] = {
207 ARM_CSPMU_EVENT_ATTR(rd_bytes, 0x0),
208 ARM_CSPMU_EVENT_ATTR(wr_bytes, 0x1),
209 ARM_CSPMU_EVENT_ATTR(rd_req, 0x2),
210 ARM_CSPMU_EVENT_ATTR(wr_req, 0x3),
211 ARM_CSPMU_EVENT_ATTR(cycles, NV_PCIE_TGT_EV_TYPE_CC),
212 NULL
213};
214
215static struct attribute *generic_pmu_event_attrs[] = {
216 ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
217 NULL,
218};
219
220static struct attribute *scf_pmu_format_attrs[] = {
221 ARM_CSPMU_FORMAT_EVENT_ATTR,
222 NULL,
223};
224
225static struct attribute *pcie_pmu_format_attrs[] = {
226 ARM_CSPMU_FORMAT_EVENT_ATTR,
227 ARM_CSPMU_FORMAT_ATTR(root_port, "config1:0-9"),
228 NULL,
229};
230
231static struct attribute *nvlink_c2c_pmu_format_attrs[] = {
232 ARM_CSPMU_FORMAT_EVENT_ATTR,
233 ARM_CSPMU_FORMAT_ATTR(port, "config1:0-1"),
234 NULL,
235};
236
237static struct attribute *cnvlink_pmu_format_attrs[] = {
238 ARM_CSPMU_FORMAT_EVENT_ATTR,
239 ARM_CSPMU_FORMAT_ATTR(rem_socket, "config1:0-3"),
240 NULL,
241};
242
243static struct attribute *ucf_pmu_format_attrs[] = {
244 ARM_CSPMU_FORMAT_EVENT_ATTR,
245 ARM_CSPMU_FORMAT_ATTR(src_loc_noncpu, "config1:0"),
246 ARM_CSPMU_FORMAT_ATTR(src_loc_cpu, "config1:1"),
247 ARM_CSPMU_FORMAT_ATTR(src_rem, "config1:2"),
248 ARM_CSPMU_FORMAT_ATTR(dst_loc_cmem, "config1:8"),
249 ARM_CSPMU_FORMAT_ATTR(dst_loc_gmem, "config1:9"),
250 ARM_CSPMU_FORMAT_ATTR(dst_loc_other, "config1:10"),
251 ARM_CSPMU_FORMAT_ATTR(dst_rem, "config1:11"),
252 NULL
253};
254
255static struct attribute *pcie_v2_pmu_format_attrs[] = {
256 ARM_CSPMU_FORMAT_EVENT_ATTR,
257 ARM_CSPMU_FORMAT_ATTR(src_rp_mask, "config1:0-7"),
258 ARM_CSPMU_FORMAT_ATTR(src_bdf, "config1:8-23"),
259 ARM_CSPMU_FORMAT_ATTR(src_bdf_en, "config1:24"),
260 ARM_CSPMU_FORMAT_ATTR(dst_loc_cmem, "config2:0"),
261 ARM_CSPMU_FORMAT_ATTR(dst_loc_gmem, "config2:1"),
262 ARM_CSPMU_FORMAT_ATTR(dst_loc_pcie_p2p, "config2:2"),
263 ARM_CSPMU_FORMAT_ATTR(dst_loc_pcie_cxl, "config2:3"),
264 ARM_CSPMU_FORMAT_ATTR(dst_rem, "config2:4"),
265 NULL
266};
267
268static struct attribute *pcie_tgt_pmu_format_attrs[] = {
269 ARM_CSPMU_FORMAT_ATTR(event, "config:0-2"),
270 ARM_CSPMU_FORMAT_ATTR(dst_rp_mask, "config:3-10"),
271 ARM_CSPMU_FORMAT_ATTR(dst_addr_en, "config:11"),
272 ARM_CSPMU_FORMAT_ATTR(dst_addr_base, "config1:0-63"),
273 ARM_CSPMU_FORMAT_ATTR(dst_addr_mask, "config2:0-63"),
274 NULL
275};
276
277static struct attribute *generic_pmu_format_attrs[] = {
278 ARM_CSPMU_FORMAT_EVENT_ATTR,
279 ARM_CSPMU_FORMAT_FILTER_ATTR,
280 ARM_CSPMU_FORMAT_FILTER2_ATTR,
281 NULL,
282};
283
284static struct attribute **
285nv_cspmu_get_event_attrs(const struct arm_cspmu *cspmu)
286{
287 const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
288
289 return ctx->event_attr;
290}
291
292static struct attribute **
293nv_cspmu_get_format_attrs(const struct arm_cspmu *cspmu)
294{
295 const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
296
297 return ctx->format_attr;
298}
299
300static const char *
301nv_cspmu_get_name(const struct arm_cspmu *cspmu)
302{
303 const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
304
305 return ctx->name;
306}
307
308#if defined(CONFIG_ACPI) && defined(CONFIG_ARM64)
309static int nv_cspmu_get_inst_id(const struct arm_cspmu *cspmu, u32 *id)
310{
311 struct fwnode_handle *fwnode;
312 struct acpi_device *adev;
313 int ret;
314
315 adev = arm_cspmu_acpi_dev_get(cspmu);
316 if (!adev)
317 return -ENODEV;
318
319 fwnode = acpi_fwnode_handle(adev);
320 ret = fwnode_property_read_u32(fwnode, "instance_id", id);
321 if (ret)
322 dev_err(cspmu->dev, "Failed to get instance ID\n");
323
324 acpi_dev_put(adev);
325 return ret;
326}
327#else
328static int nv_cspmu_get_inst_id(const struct arm_cspmu *cspmu, u32 *id)
329{
330 return -EINVAL;
331}
332#endif
333
334static u32 nv_cspmu_event_filter(const struct perf_event *event)
335{
336 const struct nv_cspmu_ctx *ctx =
337 to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
338
339 const u32 filter_val = event->attr.config1 & ctx->filter_mask;
340
341 if (filter_val == 0)
342 return ctx->filter_default_val;
343
344 return filter_val;
345}
346
347static u32 nv_cspmu_event_filter2(const struct perf_event *event)
348{
349 const struct nv_cspmu_ctx *ctx =
350 to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
351
352 const u32 filter_val = event->attr.config2 & ctx->filter2_mask;
353
354 if (filter_val == 0)
355 return ctx->filter2_default_val;
356
357 return filter_val;
358}
359
360static void nv_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
361 const struct perf_event *event)
362{
363 u32 filter, offset;
364 const struct nv_cspmu_ctx *ctx =
365 to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
366 offset = 4 * event->hw.idx;
367
368 if (ctx->get_filter) {
369 filter = ctx->get_filter(event);
370 writel(filter, cspmu->base0 + PMEVFILTR + offset);
371 }
372
373 if (ctx->get_filter2) {
374 filter = ctx->get_filter2(event);
375 writel(filter, cspmu->base0 + PMEVFILT2R + offset);
376 }
377}
378
379static void nv_cspmu_reset_ev_filter(struct arm_cspmu *cspmu,
380 const struct perf_event *event)
381{
382 const struct nv_cspmu_ctx *ctx =
383 to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
384 const u32 offset = 4 * event->hw.idx;
385
386 if (ctx->get_filter)
387 writel(0, cspmu->base0 + PMEVFILTR + offset);
388
389 if (ctx->get_filter2)
390 writel(0, cspmu->base0 + PMEVFILT2R + offset);
391}
392
393static void nv_cspmu_set_cc_filter(struct arm_cspmu *cspmu,
394 const struct perf_event *event)
395{
396 u32 filter = nv_cspmu_event_filter(event);
397
398 writel(filter, cspmu->base0 + PMCCFILTR);
399}
400
401static u32 ucf_pmu_event_filter(const struct perf_event *event)
402{
403 u32 ret, filter, src, dst;
404
405 filter = nv_cspmu_event_filter(event);
406
407 /* Monitor all sources if none is selected. */
408 src = FIELD_GET(NV_UCF_FILTER_SRC, filter);
409 if (src == 0)
410 src = GENMASK_ULL(NV_UCF_SRC_COUNT - 1, 0);
411
412 /* Monitor all destinations if none is selected. */
413 dst = FIELD_GET(NV_UCF_FILTER_DST, filter);
414 if (dst == 0)
415 dst = GENMASK_ULL(NV_UCF_DST_COUNT - 1, 0);
416
417 ret = FIELD_PREP(NV_UCF_FILTER_SRC, src);
418 ret |= FIELD_PREP(NV_UCF_FILTER_DST, dst);
419
420 return ret;
421}
422
423static u32 pcie_v2_pmu_bdf_val_en(u32 filter)
424{
425 const u32 bdf_en = FIELD_GET(NV_PCIE_V2_FILTER_BDF_EN, filter);
426
427 /* Returns both BDF value and enable bit if BDF filtering is enabled. */
428 if (bdf_en)
429 return FIELD_GET(NV_PCIE_V2_FILTER_BDF_VAL_EN, filter);
430
431 /* Ignore the BDF value if BDF filter is not enabled. */
432 return 0;
433}
434
435static u32 pcie_v2_pmu_event_filter(const struct perf_event *event)
436{
437 u32 filter, lead_filter, lead_bdf;
438 struct perf_event *leader;
439 const struct nv_cspmu_ctx *ctx =
440 to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
441
442 filter = event->attr.config1 & ctx->filter_mask;
443 if (filter != 0)
444 return filter;
445
446 leader = event->group_leader;
447
448 /* Use leader's filter value if its BDF filtering is enabled. */
449 if (event != leader) {
450 lead_filter = pcie_v2_pmu_event_filter(leader);
451 lead_bdf = pcie_v2_pmu_bdf_val_en(lead_filter);
452 if (lead_bdf != 0)
453 return lead_filter;
454 }
455
456 /* Otherwise, return default filter value. */
457 return ctx->filter_default_val;
458}
459
460static int pcie_v2_pmu_validate_event(struct arm_cspmu *cspmu,
461 struct perf_event *new_ev)
462{
463 /*
464 * Make sure the events are using same BDF filter since the PCIE-SRC PMU
465 * only supports one common BDF filter setting for all of the counters.
466 */
467
468 int idx;
469 u32 new_filter, new_rp, new_bdf, new_lead_filter, new_lead_bdf;
470 struct perf_event *new_leader;
471
472 if (cspmu->impl.ops.is_cycle_counter_event(new_ev))
473 return 0;
474
475 new_leader = new_ev->group_leader;
476
477 new_filter = pcie_v2_pmu_event_filter(new_ev);
478 new_lead_filter = pcie_v2_pmu_event_filter(new_leader);
479
480 new_bdf = pcie_v2_pmu_bdf_val_en(new_filter);
481 new_lead_bdf = pcie_v2_pmu_bdf_val_en(new_lead_filter);
482
483 new_rp = FIELD_GET(NV_PCIE_V2_FILTER_PORT, new_filter);
484
485 if (new_rp != 0 && new_bdf != 0) {
486 dev_err(cspmu->dev,
487 "RP and BDF filtering are mutually exclusive\n");
488 return -EINVAL;
489 }
490
491 if (new_bdf != new_lead_bdf) {
492 dev_err(cspmu->dev,
493 "sibling and leader BDF value should be equal\n");
494 return -EINVAL;
495 }
496
497 /* Compare BDF filter on existing events. */
498 idx = find_first_bit(cspmu->hw_events.used_ctrs,
499 cspmu->cycle_counter_logical_idx);
500
501 if (idx != cspmu->cycle_counter_logical_idx) {
502 struct perf_event *leader = cspmu->hw_events.events[idx]->group_leader;
503
504 const u32 lead_filter = pcie_v2_pmu_event_filter(leader);
505 const u32 lead_bdf = pcie_v2_pmu_bdf_val_en(lead_filter);
506
507 if (new_lead_bdf != lead_bdf) {
508 dev_err(cspmu->dev, "only one BDF value is supported\n");
509 return -EINVAL;
510 }
511 }
512
513 return 0;
514}
515
516struct pcie_tgt_addr_filter {
517 u32 refcount;
518 u64 base;
519 u64 mask;
520};
521
522struct pcie_tgt_data {
523 struct pcie_tgt_addr_filter addr_filter[NV_PCIE_TGT_ADDR_COUNT];
524 void __iomem *addr_filter_reg;
525};
526
527#if defined(CONFIG_ACPI) && defined(CONFIG_ARM64)
528static int pcie_tgt_init_data(struct arm_cspmu *cspmu)
529{
530 int ret;
531 struct acpi_device *adev;
532 struct pcie_tgt_data *data;
533 struct list_head resource_list;
534 struct resource_entry *rentry;
535 struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
536 struct device *dev = cspmu->dev;
537
538 data = devm_kzalloc(dev, sizeof(struct pcie_tgt_data), GFP_KERNEL);
539 if (!data)
540 return -ENOMEM;
541
542 adev = arm_cspmu_acpi_dev_get(cspmu);
543 if (!adev) {
544 dev_err(dev, "failed to get associated PCIE-TGT device\n");
545 return -ENODEV;
546 }
547
548 INIT_LIST_HEAD(&resource_list);
549 ret = acpi_dev_get_memory_resources(adev, &resource_list);
550 if (ret < 0) {
551 dev_err(dev, "failed to get PCIE-TGT device memory resources\n");
552 acpi_dev_put(adev);
553 return ret;
554 }
555
556 rentry = list_first_entry_or_null(
557 &resource_list, struct resource_entry, node);
558 if (rentry) {
559 data->addr_filter_reg = devm_ioremap_resource(dev, rentry->res);
560 ret = 0;
561 }
562
563 if (IS_ERR(data->addr_filter_reg)) {
564 dev_err(dev, "failed to get address filter resource\n");
565 ret = PTR_ERR(data->addr_filter_reg);
566 }
567
568 acpi_dev_free_resource_list(&resource_list);
569 acpi_dev_put(adev);
570
571 ctx->data = data;
572
573 return ret;
574}
575#else
576static int pcie_tgt_init_data(struct arm_cspmu *cspmu)
577{
578 return -ENODEV;
579}
580#endif
581
582static struct pcie_tgt_data *pcie_tgt_get_data(struct arm_cspmu *cspmu)
583{
584 struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
585
586 return ctx->data;
587}
588
589/* Find the first available address filter slot. */
590static int pcie_tgt_find_addr_idx(struct arm_cspmu *cspmu, u64 base, u64 mask,
591 bool is_reset)
592{
593 int i;
594 struct pcie_tgt_data *data = pcie_tgt_get_data(cspmu);
595
596 for (i = 0; i < NV_PCIE_TGT_ADDR_COUNT; i++) {
597 if (!is_reset && data->addr_filter[i].refcount == 0)
598 return i;
599
600 if (data->addr_filter[i].base == base &&
601 data->addr_filter[i].mask == mask)
602 return i;
603 }
604
605 return -ENODEV;
606}
607
608static u32 pcie_tgt_pmu_event_filter(const struct perf_event *event)
609{
610 u32 filter;
611
612 filter = (event->attr.config >> NV_PCIE_TGT_EV_TYPE_COUNT) &
613 NV_PCIE_TGT_FILTER2_MASK;
614
615 return filter;
616}
617
618static bool pcie_tgt_pmu_addr_en(const struct perf_event *event)
619{
620 u32 filter = pcie_tgt_pmu_event_filter(event);
621
622 return FIELD_GET(NV_PCIE_TGT_FILTER2_ADDR_EN, filter) != 0;
623}
624
625static u32 pcie_tgt_pmu_port_filter(const struct perf_event *event)
626{
627 u32 filter = pcie_tgt_pmu_event_filter(event);
628
629 return FIELD_GET(NV_PCIE_TGT_FILTER2_PORT, filter);
630}
631
632static u64 pcie_tgt_pmu_dst_addr_base(const struct perf_event *event)
633{
634 return event->attr.config1;
635}
636
637static u64 pcie_tgt_pmu_dst_addr_mask(const struct perf_event *event)
638{
639 return event->attr.config2;
640}
641
642static int pcie_tgt_pmu_validate_event(struct arm_cspmu *cspmu,
643 struct perf_event *new_ev)
644{
645 u64 base, mask;
646 int idx;
647
648 if (!pcie_tgt_pmu_addr_en(new_ev))
649 return 0;
650
651 /* Make sure there is a slot available for the address filter. */
652 base = pcie_tgt_pmu_dst_addr_base(new_ev);
653 mask = pcie_tgt_pmu_dst_addr_mask(new_ev);
654 idx = pcie_tgt_find_addr_idx(cspmu, base, mask, false);
655 if (idx < 0)
656 return -EINVAL;
657
658 return 0;
659}
660
661static void pcie_tgt_pmu_config_addr_filter(struct arm_cspmu *cspmu,
662 bool en, u64 base, u64 mask, int idx)
663{
664 struct pcie_tgt_data *data;
665 struct pcie_tgt_addr_filter *filter;
666 void __iomem *filter_reg;
667
668 data = pcie_tgt_get_data(cspmu);
669 filter = &data->addr_filter[idx];
670 filter_reg = data->addr_filter_reg + (idx * NV_PCIE_TGT_ADDR_STRIDE);
671
672 if (en) {
673 filter->refcount++;
674 if (filter->refcount == 1) {
675 filter->base = base;
676 filter->mask = mask;
677
678 writel(lower_32_bits(base), filter_reg + NV_PCIE_TGT_ADDR_BASE_LO);
679 writel(upper_32_bits(base), filter_reg + NV_PCIE_TGT_ADDR_BASE_HI);
680 writel(lower_32_bits(mask), filter_reg + NV_PCIE_TGT_ADDR_MASK_LO);
681 writel(upper_32_bits(mask), filter_reg + NV_PCIE_TGT_ADDR_MASK_HI);
682 writel(1, filter_reg + NV_PCIE_TGT_ADDR_CTRL);
683 }
684 } else {
685 filter->refcount--;
686 if (filter->refcount == 0) {
687 writel(0, filter_reg + NV_PCIE_TGT_ADDR_CTRL);
688 writel(0, filter_reg + NV_PCIE_TGT_ADDR_BASE_LO);
689 writel(0, filter_reg + NV_PCIE_TGT_ADDR_BASE_HI);
690 writel(0, filter_reg + NV_PCIE_TGT_ADDR_MASK_LO);
691 writel(0, filter_reg + NV_PCIE_TGT_ADDR_MASK_HI);
692
693 filter->base = 0;
694 filter->mask = 0;
695 }
696 }
697}
698
699static void pcie_tgt_pmu_set_ev_filter(struct arm_cspmu *cspmu,
700 const struct perf_event *event)
701{
702 bool addr_filter_en;
703 int idx;
704 u32 filter2_val, filter2_offset, port_filter;
705 u64 base, mask;
706
707 filter2_val = 0;
708 filter2_offset = PMEVFILT2R + (4 * event->hw.idx);
709
710 addr_filter_en = pcie_tgt_pmu_addr_en(event);
711 if (addr_filter_en) {
712 base = pcie_tgt_pmu_dst_addr_base(event);
713 mask = pcie_tgt_pmu_dst_addr_mask(event);
714 idx = pcie_tgt_find_addr_idx(cspmu, base, mask, false);
715
716 if (idx < 0) {
717 dev_err(cspmu->dev,
718 "Unable to find a slot for address filtering\n");
719 writel(0, cspmu->base0 + filter2_offset);
720 return;
721 }
722
723 /* Configure address range filter registers.*/
724 pcie_tgt_pmu_config_addr_filter(cspmu, true, base, mask, idx);
725
726 /* Config the counter to use the selected address filter slot. */
727 filter2_val |= FIELD_PREP(NV_PCIE_TGT_FILTER2_ADDR, 1U << idx);
728 }
729
730 port_filter = pcie_tgt_pmu_port_filter(event);
731
732 /* Monitor all ports if no filter is selected. */
733 if (!addr_filter_en && port_filter == 0)
734 port_filter = NV_PCIE_TGT_FILTER2_PORT;
735
736 filter2_val |= FIELD_PREP(NV_PCIE_TGT_FILTER2_PORT, port_filter);
737
738 writel(filter2_val, cspmu->base0 + filter2_offset);
739}
740
741static void pcie_tgt_pmu_reset_ev_filter(struct arm_cspmu *cspmu,
742 const struct perf_event *event)
743{
744 bool addr_filter_en;
745 u64 base, mask;
746 int idx;
747
748 addr_filter_en = pcie_tgt_pmu_addr_en(event);
749 if (!addr_filter_en)
750 return;
751
752 base = pcie_tgt_pmu_dst_addr_base(event);
753 mask = pcie_tgt_pmu_dst_addr_mask(event);
754 idx = pcie_tgt_find_addr_idx(cspmu, base, mask, true);
755
756 if (idx < 0) {
757 dev_err(cspmu->dev,
758 "Unable to find the address filter slot to reset\n");
759 return;
760 }
761
762 pcie_tgt_pmu_config_addr_filter(cspmu, false, base, mask, idx);
763}
764
765static u32 pcie_tgt_pmu_event_type(const struct perf_event *event)
766{
767 return event->attr.config & NV_PCIE_TGT_EV_TYPE_MASK;
768}
769
770static bool pcie_tgt_pmu_is_cycle_counter_event(const struct perf_event *event)
771{
772 u32 event_type = pcie_tgt_pmu_event_type(event);
773
774 return event_type == NV_PCIE_TGT_EV_TYPE_CC;
775}
776
777enum nv_cspmu_name_fmt {
778 NAME_FMT_GENERIC,
779 NAME_FMT_SOCKET,
780 NAME_FMT_SOCKET_INST,
781};
782
783struct nv_cspmu_match {
784 u32 prodid;
785 u32 prodid_mask;
786 const char *name_pattern;
787 enum nv_cspmu_name_fmt name_fmt;
788 struct nv_cspmu_ctx template_ctx;
789 struct arm_cspmu_impl_ops ops;
790};
791
792static const struct nv_cspmu_match nv_cspmu_match[] = {
793 {
794 .prodid = 0x10300000,
795 .prodid_mask = NV_PRODID_MASK,
796 .name_pattern = "nvidia_pcie_pmu_%u",
797 .name_fmt = NAME_FMT_SOCKET,
798 .template_ctx = {
799 .event_attr = mcf_pmu_event_attrs,
800 .format_attr = pcie_pmu_format_attrs,
801 .filter_mask = NV_PCIE_FILTER_ID_MASK,
802 .filter_default_val = NV_PCIE_FILTER_ID_MASK,
803 .filter2_mask = 0x0,
804 .filter2_default_val = 0x0,
805 .get_filter = nv_cspmu_event_filter,
806 .get_filter2 = NULL,
807 .data = NULL,
808 .init_data = NULL
809 },
810 },
811 {
812 .prodid = 0x10400000,
813 .prodid_mask = NV_PRODID_MASK,
814 .name_pattern = "nvidia_nvlink_c2c1_pmu_%u",
815 .name_fmt = NAME_FMT_SOCKET,
816 .template_ctx = {
817 .event_attr = mcf_pmu_event_attrs,
818 .format_attr = nvlink_c2c_pmu_format_attrs,
819 .filter_mask = NV_NVL_C2C_FILTER_ID_MASK,
820 .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
821 .filter2_mask = 0x0,
822 .filter2_default_val = 0x0,
823 .get_filter = nv_cspmu_event_filter,
824 .get_filter2 = NULL,
825 .data = NULL,
826 .init_data = NULL
827 },
828 },
829 {
830 .prodid = 0x10500000,
831 .prodid_mask = NV_PRODID_MASK,
832 .name_pattern = "nvidia_nvlink_c2c0_pmu_%u",
833 .name_fmt = NAME_FMT_SOCKET,
834 .template_ctx = {
835 .event_attr = mcf_pmu_event_attrs,
836 .format_attr = nvlink_c2c_pmu_format_attrs,
837 .filter_mask = NV_NVL_C2C_FILTER_ID_MASK,
838 .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
839 .filter2_mask = 0x0,
840 .filter2_default_val = 0x0,
841 .get_filter = nv_cspmu_event_filter,
842 .get_filter2 = NULL,
843 .data = NULL,
844 .init_data = NULL
845 },
846 },
847 {
848 .prodid = 0x10600000,
849 .prodid_mask = NV_PRODID_MASK,
850 .name_pattern = "nvidia_cnvlink_pmu_%u",
851 .name_fmt = NAME_FMT_SOCKET,
852 .template_ctx = {
853 .event_attr = mcf_pmu_event_attrs,
854 .format_attr = cnvlink_pmu_format_attrs,
855 .filter_mask = NV_CNVL_FILTER_ID_MASK,
856 .filter_default_val = NV_CNVL_FILTER_ID_MASK,
857 .filter2_mask = 0x0,
858 .filter2_default_val = 0x0,
859 .get_filter = nv_cspmu_event_filter,
860 .get_filter2 = NULL,
861 .data = NULL,
862 .init_data = NULL
863 },
864 },
865 {
866 .prodid = 0x2CF00000,
867 .prodid_mask = NV_PRODID_MASK,
868 .name_pattern = "nvidia_scf_pmu_%u",
869 .name_fmt = NAME_FMT_SOCKET,
870 .template_ctx = {
871 .event_attr = scf_pmu_event_attrs,
872 .format_attr = scf_pmu_format_attrs,
873 .filter_mask = 0x0,
874 .filter_default_val = 0x0,
875 .filter2_mask = 0x0,
876 .filter2_default_val = 0x0,
877 .get_filter = nv_cspmu_event_filter,
878 .get_filter2 = NULL,
879 .data = NULL,
880 .init_data = NULL
881 },
882 },
883 {
884 .prodid = 0x2CF20000,
885 .prodid_mask = NV_PRODID_MASK,
886 .name_pattern = "nvidia_ucf_pmu_%u",
887 .name_fmt = NAME_FMT_SOCKET,
888 .template_ctx = {
889 .event_attr = ucf_pmu_event_attrs,
890 .format_attr = ucf_pmu_format_attrs,
891 .filter_mask = NV_UCF_FILTER_ID_MASK,
892 .filter_default_val = NV_UCF_FILTER_DEFAULT,
893 .filter2_mask = 0x0,
894 .filter2_default_val = 0x0,
895 .get_filter = ucf_pmu_event_filter,
896 },
897 },
898 {
899 .prodid = 0x10301000,
900 .prodid_mask = NV_PRODID_MASK,
901 .name_pattern = "nvidia_pcie_pmu_%u_rc_%u",
902 .name_fmt = NAME_FMT_SOCKET_INST,
903 .template_ctx = {
904 .event_attr = pcie_v2_pmu_event_attrs,
905 .format_attr = pcie_v2_pmu_format_attrs,
906 .filter_mask = NV_PCIE_V2_FILTER_ID_MASK,
907 .filter_default_val = NV_PCIE_V2_FILTER_DEFAULT,
908 .filter2_mask = NV_PCIE_V2_FILTER2_ID_MASK,
909 .filter2_default_val = NV_PCIE_V2_FILTER2_DEFAULT,
910 .get_filter = pcie_v2_pmu_event_filter,
911 .get_filter2 = nv_cspmu_event_filter2,
912 },
913 .ops = {
914 .validate_event = pcie_v2_pmu_validate_event,
915 .reset_ev_filter = nv_cspmu_reset_ev_filter,
916 }
917 },
918 {
919 .prodid = 0x10700000,
920 .prodid_mask = NV_PRODID_MASK,
921 .name_pattern = "nvidia_pcie_tgt_pmu_%u_rc_%u",
922 .name_fmt = NAME_FMT_SOCKET_INST,
923 .template_ctx = {
924 .event_attr = pcie_tgt_pmu_event_attrs,
925 .format_attr = pcie_tgt_pmu_format_attrs,
926 .filter_mask = 0x0,
927 .filter_default_val = 0x0,
928 .filter2_mask = NV_PCIE_TGT_FILTER2_MASK,
929 .filter2_default_val = NV_PCIE_TGT_FILTER2_DEFAULT,
930 .init_data = pcie_tgt_init_data
931 },
932 .ops = {
933 .is_cycle_counter_event = pcie_tgt_pmu_is_cycle_counter_event,
934 .event_type = pcie_tgt_pmu_event_type,
935 .validate_event = pcie_tgt_pmu_validate_event,
936 .set_ev_filter = pcie_tgt_pmu_set_ev_filter,
937 .reset_ev_filter = pcie_tgt_pmu_reset_ev_filter,
938 }
939 },
940 {
941 .prodid = 0,
942 .prodid_mask = 0,
943 .name_pattern = "nvidia_uncore_pmu_%u",
944 .name_fmt = NAME_FMT_GENERIC,
945 .template_ctx = {
946 .event_attr = generic_pmu_event_attrs,
947 .format_attr = generic_pmu_format_attrs,
948 .filter_mask = NV_GENERIC_FILTER_ID_MASK,
949 .filter_default_val = NV_GENERIC_FILTER_ID_MASK,
950 .filter2_mask = NV_GENERIC_FILTER_ID_MASK,
951 .filter2_default_val = NV_GENERIC_FILTER_ID_MASK,
952 .get_filter = nv_cspmu_event_filter,
953 .get_filter2 = nv_cspmu_event_filter2,
954 .data = NULL,
955 .init_data = NULL
956 },
957 },
958};
959
960static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu,
961 const struct nv_cspmu_match *match)
962{
963 char *name = NULL;
964 struct device *dev = cspmu->dev;
965
966 static atomic_t pmu_generic_idx = {0};
967
968 switch (match->name_fmt) {
969 case NAME_FMT_SOCKET: {
970 const int cpu = cpumask_first(&cspmu->associated_cpus);
971 const int socket = cpu_to_node(cpu);
972
973 name = devm_kasprintf(dev, GFP_KERNEL, match->name_pattern,
974 socket);
975 break;
976 }
977 case NAME_FMT_SOCKET_INST: {
978 const int cpu = cpumask_first(&cspmu->associated_cpus);
979 const int socket = cpu_to_node(cpu);
980 u32 inst_id;
981
982 if (!nv_cspmu_get_inst_id(cspmu, &inst_id))
983 name = devm_kasprintf(dev, GFP_KERNEL,
984 match->name_pattern, socket, inst_id);
985 break;
986 }
987 case NAME_FMT_GENERIC:
988 name = devm_kasprintf(dev, GFP_KERNEL, match->name_pattern,
989 atomic_fetch_inc(&pmu_generic_idx));
990 break;
991 }
992
993 return name;
994}
995
996#define SET_OP(name, impl, match, default_op) \
997 do { \
998 if (match->ops.name) \
999 impl->name = match->ops.name; \
1000 else if (default_op != NULL) \
1001 impl->name = default_op; \
1002 } while (false)
1003
1004static int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
1005{
1006 struct nv_cspmu_ctx *ctx;
1007 struct device *dev = cspmu->dev;
1008 struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
1009 const struct nv_cspmu_match *match = nv_cspmu_match;
1010
1011 ctx = devm_kzalloc(dev, sizeof(struct nv_cspmu_ctx), GFP_KERNEL);
1012 if (!ctx)
1013 return -ENOMEM;
1014
1015 /* Find matching PMU. */
1016 for (; match->prodid; match++) {
1017 const u32 prodid_mask = match->prodid_mask;
1018
1019 if ((match->prodid & prodid_mask) ==
1020 (cspmu->impl.pmiidr & prodid_mask))
1021 break;
1022 }
1023
1024 /* Initialize the context with the matched template. */
1025 memcpy(ctx, &match->template_ctx, sizeof(struct nv_cspmu_ctx));
1026 ctx->name = nv_cspmu_format_name(cspmu, match);
1027
1028 cspmu->impl.ctx = ctx;
1029
1030 /* NVIDIA specific callbacks. */
1031 SET_OP(validate_event, impl_ops, match, NULL);
1032 SET_OP(event_type, impl_ops, match, NULL);
1033 SET_OP(is_cycle_counter_event, impl_ops, match, NULL);
1034 SET_OP(set_cc_filter, impl_ops, match, nv_cspmu_set_cc_filter);
1035 SET_OP(set_ev_filter, impl_ops, match, nv_cspmu_set_ev_filter);
1036 SET_OP(reset_ev_filter, impl_ops, match, NULL);
1037 SET_OP(get_event_attrs, impl_ops, match, nv_cspmu_get_event_attrs);
1038 SET_OP(get_format_attrs, impl_ops, match, nv_cspmu_get_format_attrs);
1039 SET_OP(get_name, impl_ops, match, nv_cspmu_get_name);
1040
1041 if (ctx->init_data)
1042 return ctx->init_data(cspmu);
1043
1044 return 0;
1045}
1046
1047/* Match all NVIDIA Coresight PMU devices */
1048static const struct arm_cspmu_impl_match nv_cspmu_param = {
1049 .pmiidr_val = ARM_CSPMU_IMPL_ID_NVIDIA,
1050 .module = THIS_MODULE,
1051 .impl_init_ops = nv_cspmu_init_ops
1052};
1053
1054static int __init nvidia_cspmu_init(void)
1055{
1056 int ret;
1057
1058 ret = arm_cspmu_impl_register(&nv_cspmu_param);
1059 if (ret)
1060 pr_err("nvidia_cspmu backend registration error: %d\n", ret);
1061
1062 return ret;
1063}
1064
1065static void __exit nvidia_cspmu_exit(void)
1066{
1067 arm_cspmu_impl_unregister(&nv_cspmu_param);
1068}
1069
1070module_init(nvidia_cspmu_init);
1071module_exit(nvidia_cspmu_exit);
1072
1073MODULE_DESCRIPTION("NVIDIA Coresight Architecture Performance Monitor Driver");
1074MODULE_LICENSE("GPL v2");