Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
4 */
5
6#ifndef _AIE2_PCI_H_
7#define _AIE2_PCI_H_
8
9#include <drm/amdxdna_accel.h>
10#include <linux/limits.h>
11#include <linux/semaphore.h>
12
13#include "aie2_msg_priv.h"
14#include "amdxdna_mailbox.h"
15
16#define AIE2_INTERVAL 20000 /* us */
17#define AIE2_TIMEOUT 1000000 /* us */
18
19/* Firmware determines device memory base address and size */
20#define AIE2_DEVM_BASE 0x4000000
21#define AIE2_DEVM_SIZE SZ_64M
22
23#define NDEV2PDEV(ndev) (to_pci_dev((ndev)->xdna->ddev.dev))
24
25#define AIE2_SRAM_OFF(ndev, addr) ((addr) - (ndev)->priv->sram_dev_addr)
26#define AIE2_MBOX_OFF(ndev, addr) ((addr) - (ndev)->priv->mbox_dev_addr)
27
28#define PSP_REG_BAR(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].bar_idx)
29#define PSP_REG_OFF(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].offset)
30#define SRAM_REG_OFF(ndev, idx) ((ndev)->priv->sram_offs[(idx)].offset)
31
32#define SMU_REG(ndev, idx) \
33({ \
34 typeof(ndev) _ndev = ndev; \
35 ((_ndev)->smu_base + (_ndev)->priv->smu_regs_off[(idx)].offset); \
36})
37#define SRAM_GET_ADDR(ndev, idx) \
38({ \
39 typeof(ndev) _ndev = ndev; \
40 ((_ndev)->sram_base + SRAM_REG_OFF((_ndev), (idx))); \
41})
42
43#define CHAN_SLOT_SZ SZ_8K
44#define MBOX_SIZE(ndev) \
45({ \
46 typeof(ndev) _ndev = (ndev); \
47 ((_ndev)->priv->mbox_size) ? (_ndev)->priv->mbox_size : \
48 pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \
49})
50
51#if IS_ENABLED(CONFIG_AMD_PMF)
52#define AIE2_GET_PMF_NPU_METRICS(metrics) amd_pmf_get_npu_data(metrics)
53#define AIE2_GET_PMF_NPU_DATA(field, val) \
54({ \
55 struct amd_pmf_npu_metrics _npu_metrics; \
56 int _ret; \
57 \
58 _ret = amd_pmf_get_npu_data(&_npu_metrics); \
59 val = _ret ? U32_MAX : _npu_metrics.field; \
60 (_ret); \
61})
62#else
63#define AIE2_GET_PMF_NPU_METRICS(metrics) \
64({ \
65 typeof(metrics) _m = metrics; \
66 memset(_m, 0xff, sizeof(*_m)); \
67 (-EOPNOTSUPP); \
68})
69
70#define SENSOR_DEFAULT_npu_power U32_MAX
71#define AIE2_GET_PMF_NPU_DATA(field, val) \
72({ \
73 val = SENSOR_DEFAULT_##field; \
74 (-EOPNOTSUPP); \
75})
76#endif
77
78enum aie2_smu_reg_idx {
79 SMU_CMD_REG = 0,
80 SMU_ARG_REG,
81 SMU_INTR_REG,
82 SMU_RESP_REG,
83 SMU_OUT_REG,
84 SMU_MAX_REGS /* Keep this at the end */
85};
86
87enum aie2_sram_reg_idx {
88 MBOX_CHANN_OFF = 0,
89 FW_ALIVE_OFF,
90 SRAM_MAX_INDEX /* Keep this at the end */
91};
92
93enum psp_reg_idx {
94 PSP_CMD_REG = 0,
95 PSP_ARG0_REG,
96 PSP_ARG1_REG,
97 PSP_ARG2_REG,
98 PSP_NUM_IN_REGS, /* number of input registers */
99 PSP_INTR_REG = PSP_NUM_IN_REGS,
100 PSP_STATUS_REG,
101 PSP_RESP_REG,
102 PSP_PWAITMODE_REG,
103 PSP_MAX_REGS /* Keep this at the end */
104};
105
106struct amdxdna_client;
107struct amdxdna_fw_ver;
108struct amdxdna_hwctx;
109struct amdxdna_sched_job;
110
111struct psp_config {
112 const void *fw_buf;
113 u32 fw_size;
114 void __iomem *psp_regs[PSP_MAX_REGS];
115};
116
117struct aie_version {
118 u16 major;
119 u16 minor;
120};
121
122struct aie_tile_metadata {
123 u16 row_count;
124 u16 row_start;
125 u16 dma_channel_count;
126 u16 lock_count;
127 u16 event_reg_count;
128};
129
130struct aie_metadata {
131 u32 size;
132 u16 cols;
133 u16 rows;
134 struct aie_version version;
135 struct aie_tile_metadata core;
136 struct aie_tile_metadata mem;
137 struct aie_tile_metadata shim;
138};
139
140enum rt_config_category {
141 AIE2_RT_CFG_INIT,
142 AIE2_RT_CFG_CLK_GATING,
143 AIE2_RT_CFG_FORCE_PREEMPT,
144 AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT,
145};
146
147struct rt_config {
148 u32 type;
149 u32 value;
150 u32 category;
151 unsigned long feature_mask;
152};
153
154struct dpm_clk_freq {
155 u32 npuclk;
156 u32 hclk;
157};
158
159/*
160 * Define the maximum number of pending commands in a hardware context.
161 * Must be power of 2!
162 */
163#define HWCTX_MAX_CMDS 4
164#define get_job_idx(seq) ((seq) & (HWCTX_MAX_CMDS - 1))
165struct amdxdna_hwctx_priv {
166 struct amdxdna_gem_obj *heap;
167 void *mbox_chann;
168
169 struct drm_gpu_scheduler sched;
170 struct drm_sched_entity entity;
171
172 struct mutex io_lock; /* protect seq and cmd order */
173 struct wait_queue_head job_free_wq;
174 u32 num_pending;
175 u64 seq;
176 struct semaphore job_sem;
177 bool job_done;
178
179 /* Completed job counter */
180 u64 completed;
181
182 struct amdxdna_gem_obj *cmd_buf[HWCTX_MAX_CMDS];
183 struct drm_syncobj *syncobj;
184};
185
186enum aie2_dev_status {
187 AIE2_DEV_UNINIT,
188 AIE2_DEV_INIT,
189 AIE2_DEV_START,
190};
191
192struct aie2_exec_msg_ops {
193 int (*init_cu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
194 size_t *size, u32 *msg_op);
195 int (*init_dpu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
196 size_t *size, u32 *msg_op);
197 void (*init_chain_req)(void *req, u64 slot_addr, size_t size, u32 cmd_cnt);
198 int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
199 int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
200 int (*fill_preempt_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
201 int (*fill_elf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
202 u32 (*get_chain_msg_op)(u32 cmd_op);
203};
204
205struct amdxdna_dev_hdl {
206 struct amdxdna_dev *xdna;
207 const struct amdxdna_dev_priv *priv;
208 void __iomem *sram_base;
209 void __iomem *smu_base;
210 void __iomem *mbox_base;
211 struct psp_device *psp_hdl;
212
213 struct xdna_mailbox_chann_res mgmt_x2i;
214 struct xdna_mailbox_chann_res mgmt_i2x;
215 u32 mgmt_chan_idx;
216 u32 mgmt_prot_major;
217 u32 mgmt_prot_minor;
218
219 u32 total_col;
220 struct aie_version version;
221 struct aie_metadata metadata;
222 unsigned long feature_mask;
223 struct aie2_exec_msg_ops *exec_msg_ops;
224
225 /* power management and clock*/
226 enum amdxdna_power_mode_type pw_mode;
227 u32 dpm_level;
228 u32 dft_dpm_level;
229 u32 max_dpm_level;
230 u32 clk_gating;
231 u32 npuclk_freq;
232 u32 hclk_freq;
233 u32 max_tops;
234 u32 curr_tops;
235 u32 force_preempt_enabled;
236 u32 frame_boundary_preempt;
237
238 /* Mailbox and the management channel */
239 struct mailbox *mbox;
240 struct mailbox_channel *mgmt_chann;
241 struct async_events *async_events;
242
243 enum aie2_dev_status dev_status;
244 u32 hwctx_num;
245
246 struct amdxdna_async_error last_async_err;
247};
248
249#define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
250 [reg_name] = {bar##_BAR_INDEX, (reg_addr) - bar##_BAR_BASE}
251
252struct aie2_bar_off_pair {
253 int bar_idx;
254 u32 offset;
255};
256
257struct aie2_hw_ops {
258 int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
259};
260
261enum aie2_fw_feature {
262 AIE2_NPU_COMMAND,
263 AIE2_PREEMPT,
264 AIE2_TEMPORAL_ONLY,
265 AIE2_APP_HEALTH,
266 AIE2_FEATURE_MAX
267};
268
269struct aie2_fw_feature_tbl {
270 u64 features;
271 u32 major;
272 u32 max_minor;
273 u32 min_minor;
274};
275
276#define AIE2_ALL_FEATURES GENMASK_ULL(AIE2_FEATURE_MAX - 1, AIE2_NPU_COMMAND)
277#define AIE2_FEATURE_ON(ndev, feature) test_bit(feature, &(ndev)->feature_mask)
278
279struct amdxdna_dev_priv {
280 const char *fw_path;
281 const struct rt_config *rt_config;
282 const struct dpm_clk_freq *dpm_clk_tbl;
283 const struct aie2_fw_feature_tbl *fw_feature_tbl;
284
285#define COL_ALIGN_NONE 0
286#define COL_ALIGN_NATURE 1
287 u32 col_align;
288 u32 mbox_dev_addr;
289 /* If mbox_size is 0, use BAR size. See MBOX_SIZE macro */
290 u32 mbox_size;
291 u32 hwctx_limit;
292 u32 sram_dev_addr;
293 struct aie2_bar_off_pair sram_offs[SRAM_MAX_INDEX];
294 struct aie2_bar_off_pair psp_regs_off[PSP_MAX_REGS];
295 struct aie2_bar_off_pair smu_regs_off[SMU_MAX_REGS];
296 struct aie2_hw_ops hw_ops;
297};
298
299extern const struct amdxdna_dev_ops aie2_ops;
300
301int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
302 enum rt_config_category category, u32 *val);
303
304/* aie2 npu hw config */
305extern const struct dpm_clk_freq npu1_dpm_clk_table[];
306extern const struct dpm_clk_freq npu4_dpm_clk_table[];
307extern const struct rt_config npu1_default_rt_cfg[];
308extern const struct rt_config npu4_default_rt_cfg[];
309extern const struct aie2_fw_feature_tbl npu4_fw_feature_table[];
310
311/* aie2_smu.c */
312int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
313void aie2_smu_fini(struct amdxdna_dev_hdl *ndev);
314int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
315int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
316
317/* aie2_pm.c */
318int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
319int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target);
320int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
321
322/* aie2_psp.c */
323struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf);
324int aie2_psp_start(struct psp_device *psp);
325void aie2_psp_stop(struct psp_device *psp);
326int aie2_psp_waitmode_poll(struct psp_device *psp);
327
328/* aie2_error.c */
329int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev);
330void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev);
331int aie2_error_async_msg_thread(void *data);
332int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev,
333 struct amdxdna_drm_get_array *args);
334
335/* aie2_message.c */
336void aie2_msg_init(struct amdxdna_dev_hdl *ndev);
337void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev);
338int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
339int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
340int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
341int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value);
342int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid);
343int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version);
344int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata);
345int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev,
346 struct amdxdna_fw_ver *fw_ver);
347int aie2_query_app_health(struct amdxdna_dev_hdl *ndev, u32 context_id,
348 struct app_health_report *report);
349int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
350int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
351int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size);
352int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled);
353int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
354 char __user *buf, u32 size,
355 struct amdxdna_drm_query_telemetry_header *header);
356int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
357 void *handle, int (*cb)(void*, void __iomem *, size_t));
358int aie2_config_cu(struct amdxdna_hwctx *hwctx,
359 int (*notify_cb)(void *, void __iomem *, size_t));
360int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
361 int (*notify_cb)(void *, void __iomem *, size_t));
362int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
363 struct amdxdna_sched_job *job,
364 int (*notify_cb)(void *, void __iomem *, size_t));
365int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
366 struct amdxdna_sched_job *job,
367 int (*notify_cb)(void *, void __iomem *, size_t));
368int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
369 int (*notify_cb)(void *, void __iomem *, size_t));
370int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
371 int (*notify_cb)(void *, void __iomem *, size_t));
372void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size,
373 dma_addr_t *dma_addr);
374void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t size,
375 void *cpu_addr, dma_addr_t dma_addr);
376
377/* aie2_hwctx.c */
378int aie2_hwctx_init(struct amdxdna_hwctx *hwctx);
379void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx);
380int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
381int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl);
382void aie2_hwctx_suspend(struct amdxdna_client *client);
383int aie2_hwctx_resume(struct amdxdna_client *client);
384int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
385void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
386
387#endif /* _AIE2_PCI_H_ */