Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0 or MIT
2/* Copyright 2023 Collabora ltd. */
3
4#ifdef CONFIG_ARM_ARCH_TIMER
5#include <asm/arch_timer.h>
6#endif
7
8#include <linux/clk.h>
9#include <linux/dma-mapping.h>
10#include <linux/firmware.h>
11#include <linux/iopoll.h>
12#include <linux/iosys-map.h>
13#include <linux/mutex.h>
14#include <linux/platform_device.h>
15#include <linux/pm_runtime.h>
16
17#include <drm/drm_drv.h>
18#include <drm/drm_managed.h>
19#include <drm/drm_print.h>
20
21#include "panthor_device.h"
22#include "panthor_fw.h"
23#include "panthor_gem.h"
24#include "panthor_gpu.h"
25#include "panthor_hw.h"
26#include "panthor_mmu.h"
27#include "panthor_regs.h"
28#include "panthor_sched.h"
29#include "panthor_trace.h"
30
31#define CSF_FW_NAME "mali_csffw.bin"
32
33#define PING_INTERVAL_MS 12000
34#define PROGRESS_TIMEOUT_CYCLES (5ull * 500 * 1024 * 1024)
35#define PROGRESS_TIMEOUT_SCALE_SHIFT 10
36#define IDLE_HYSTERESIS_US 800
37#define PWROFF_HYSTERESIS_US 10000
38#define MCU_HALT_TIMEOUT_US (1ULL * USEC_PER_SEC)
39
40/**
41 * struct panthor_fw_binary_hdr - Firmware binary header.
42 */
43struct panthor_fw_binary_hdr {
44 /** @magic: Magic value to check binary validity. */
45 u32 magic;
46#define CSF_FW_BINARY_HEADER_MAGIC 0xc3f13a6e
47
48 /** @minor: Minor FW version. */
49 u8 minor;
50
51 /** @major: Major FW version. */
52 u8 major;
53#define CSF_FW_BINARY_HEADER_MAJOR_MAX 0
54
55 /** @padding1: MBZ. */
56 u16 padding1;
57
58 /** @version_hash: FW version hash. */
59 u32 version_hash;
60
61 /** @padding2: MBZ. */
62 u32 padding2;
63
64 /** @size: FW binary size. */
65 u32 size;
66};
67
68/**
69 * enum panthor_fw_binary_entry_type - Firmware binary entry type
70 */
71enum panthor_fw_binary_entry_type {
72 /** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */
73 CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0,
74
75 /** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */
76 CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1,
77
78 /** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */
79 CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2,
80
81 /** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */
82 CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3,
83
84 /** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */
85 CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4,
86
87 /**
88 * @CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA: Metadata about how
89 * the FW binary was built.
90 */
91 CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA = 6
92};
93
94#define CSF_FW_BINARY_ENTRY_TYPE(ehdr) ((ehdr) & 0xff)
95#define CSF_FW_BINARY_ENTRY_SIZE(ehdr) (((ehdr) >> 8) & 0xff)
96#define CSF_FW_BINARY_ENTRY_UPDATE BIT(30)
97#define CSF_FW_BINARY_ENTRY_OPTIONAL BIT(31)
98
99#define CSF_FW_BINARY_IFACE_ENTRY_RD BIT(0)
100#define CSF_FW_BINARY_IFACE_ENTRY_WR BIT(1)
101#define CSF_FW_BINARY_IFACE_ENTRY_EX BIT(2)
102#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_NONE (0 << 3)
103#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED (1 << 3)
104#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_UNCACHED_COHERENT (2 << 3)
105#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED_COHERENT (3 << 3)
106#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK GENMASK(4, 3)
107#define CSF_FW_BINARY_IFACE_ENTRY_PROT BIT(5)
108#define CSF_FW_BINARY_IFACE_ENTRY_SHARED BIT(30)
109#define CSF_FW_BINARY_IFACE_ENTRY_ZERO BIT(31)
110
111#define CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS \
112 (CSF_FW_BINARY_IFACE_ENTRY_RD | \
113 CSF_FW_BINARY_IFACE_ENTRY_WR | \
114 CSF_FW_BINARY_IFACE_ENTRY_EX | \
115 CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK | \
116 CSF_FW_BINARY_IFACE_ENTRY_PROT | \
117 CSF_FW_BINARY_IFACE_ENTRY_SHARED | \
118 CSF_FW_BINARY_IFACE_ENTRY_ZERO)
119
120/**
121 * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary
122 */
123struct panthor_fw_binary_section_entry_hdr {
124 /** @flags: Section flags. */
125 u32 flags;
126
127 /** @va: MCU virtual range to map this binary section to. */
128 struct {
129 /** @start: Start address. */
130 u32 start;
131
132 /** @end: End address. */
133 u32 end;
134 } va;
135
136 /** @data: Data to initialize the FW section with. */
137 struct {
138 /** @start: Start offset in the FW binary. */
139 u32 start;
140
141 /** @end: End offset in the FW binary. */
142 u32 end;
143 } data;
144};
145
146struct panthor_fw_build_info_hdr {
147 /** @meta_start: Offset of the build info data in the FW binary */
148 u32 meta_start;
149 /** @meta_size: Size of the build info data in the FW binary */
150 u32 meta_size;
151};
152
153/**
154 * struct panthor_fw_binary_iter - Firmware binary iterator
155 *
156 * Used to parse a firmware binary.
157 */
158struct panthor_fw_binary_iter {
159 /** @data: FW binary data. */
160 const void *data;
161
162 /** @size: FW binary size. */
163 size_t size;
164
165 /** @offset: Iterator offset. */
166 size_t offset;
167};
168
169/**
170 * struct panthor_fw_section - FW section
171 */
172struct panthor_fw_section {
173 /** @node: Used to keep track of FW sections. */
174 struct list_head node;
175
176 /** @flags: Section flags, as encoded in the FW binary. */
177 u32 flags;
178
179 /** @mem: Section memory. */
180 struct panthor_kernel_bo *mem;
181
182 /**
183 * @name: Name of the section, as specified in the binary.
184 *
185 * Can be NULL.
186 */
187 const char *name;
188
189 /**
190 * @data: Initial data copied to the FW memory.
191 *
192 * We keep data around so we can reload sections after a reset.
193 */
194 struct {
195 /** @buf: Buffed used to store init data. */
196 const void *buf;
197
198 /** @size: Size of @buf in bytes. */
199 size_t size;
200 } data;
201};
202
203#define CSF_MCU_SHARED_REGION_START 0x04000000ULL
204#define CSF_MCU_SHARED_REGION_SIZE 0x04000000ULL
205
206#define MIN_CS_PER_CSG 8
207#define MIN_CSGS 3
208
209#define CSF_IFACE_VERSION(major, minor, patch) \
210 (((major) << 24) | ((minor) << 16) | (patch))
211#define CSF_IFACE_VERSION_MAJOR(v) ((v) >> 24)
212#define CSF_IFACE_VERSION_MINOR(v) (((v) >> 16) & 0xff)
213#define CSF_IFACE_VERSION_PATCH(v) ((v) & 0xffff)
214
215#define CSF_GROUP_CONTROL_OFFSET 0x1000
216#define CSF_STREAM_CONTROL_OFFSET 0x40
217#define CSF_UNPRESERVED_REG_COUNT 4
218
219/**
220 * struct panthor_fw_iface - FW interfaces
221 */
222struct panthor_fw_iface {
223 /** @global: Global interface. */
224 struct panthor_fw_global_iface global;
225
226 /** @groups: Group slot interfaces. */
227 struct panthor_fw_csg_iface groups[MAX_CSGS];
228
229 /** @streams: Command stream slot interfaces. */
230 struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG];
231};
232
233/**
234 * struct panthor_fw - Firmware management
235 */
236struct panthor_fw {
237 /** @vm: MCU VM. */
238 struct panthor_vm *vm;
239
240 /** @sections: List of FW sections. */
241 struct list_head sections;
242
243 /** @shared_section: The section containing the FW interfaces. */
244 struct panthor_fw_section *shared_section;
245
246 /** @iface: FW interfaces. */
247 struct panthor_fw_iface iface;
248
249 /** @watchdog: Collection of fields relating to the FW watchdog. */
250 struct {
251 /** @ping_work: Delayed work used to ping the FW. */
252 struct delayed_work ping_work;
253 } watchdog;
254
255 /**
256 * @req_waitqueue: FW request waitqueue.
257 *
258 * Everytime a request is sent to a command stream group or the global
259 * interface, the caller will first busy wait for the request to be
260 * acknowledged, and then fallback to a sleeping wait.
261 *
262 * This wait queue is here to support the sleeping wait flavor.
263 */
264 wait_queue_head_t req_waitqueue;
265
266 /** @booted: True is the FW is booted */
267 bool booted;
268
269 /** @irq: Job irq data. */
270 struct panthor_irq irq;
271};
272
273struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev)
274{
275 return ptdev->fw->vm;
276}
277
278/**
279 * panthor_fw_get_glb_iface() - Get the global interface
280 * @ptdev: Device.
281 *
282 * Return: The global interface.
283 */
284struct panthor_fw_global_iface *
285panthor_fw_get_glb_iface(struct panthor_device *ptdev)
286{
287 return &ptdev->fw->iface.global;
288}
289
290/**
291 * panthor_fw_get_csg_iface() - Get a command stream group slot interface
292 * @ptdev: Device.
293 * @csg_slot: Index of the command stream group slot.
294 *
295 * Return: The command stream group slot interface.
296 */
297struct panthor_fw_csg_iface *
298panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot)
299{
300 if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS))
301 return NULL;
302
303 return &ptdev->fw->iface.groups[csg_slot];
304}
305
306/**
307 * panthor_fw_get_cs_iface() - Get a command stream slot interface
308 * @ptdev: Device.
309 * @csg_slot: Index of the command stream group slot.
310 * @cs_slot: Index of the command stream slot.
311 *
312 * Return: The command stream slot interface.
313 */
314struct panthor_fw_cs_iface *
315panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot)
316{
317 if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot >= MAX_CS_PER_CSG))
318 return NULL;
319
320 return &ptdev->fw->iface.streams[csg_slot][cs_slot];
321}
322
323static bool panthor_fw_has_glb_state(struct panthor_device *ptdev)
324{
325 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
326
327 return glb_iface->control->version >= CSF_IFACE_VERSION(4, 1, 0);
328}
329
330static bool panthor_fw_has_64bit_ep_req(struct panthor_device *ptdev)
331{
332 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
333
334 return glb_iface->control->version >= CSF_IFACE_VERSION(4, 0, 0);
335}
336
337u64 panthor_fw_csg_endpoint_req_get(struct panthor_device *ptdev,
338 struct panthor_fw_csg_iface *csg_iface)
339{
340 if (panthor_fw_has_64bit_ep_req(ptdev))
341 return csg_iface->input->endpoint_req2;
342 else
343 return csg_iface->input->endpoint_req;
344}
345
346void panthor_fw_csg_endpoint_req_set(struct panthor_device *ptdev,
347 struct panthor_fw_csg_iface *csg_iface, u64 value)
348{
349 if (panthor_fw_has_64bit_ep_req(ptdev))
350 csg_iface->input->endpoint_req2 = value;
351 else
352 csg_iface->input->endpoint_req = lower_32_bits(value);
353}
354
355void panthor_fw_csg_endpoint_req_update(struct panthor_device *ptdev,
356 struct panthor_fw_csg_iface *csg_iface, u64 value,
357 u64 mask)
358{
359 if (panthor_fw_has_64bit_ep_req(ptdev))
360 panthor_fw_update_reqs64(csg_iface, endpoint_req2, value, mask);
361 else
362 panthor_fw_update_reqs(csg_iface, endpoint_req, lower_32_bits(value),
363 lower_32_bits(mask));
364}
365
366/**
367 * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count
368 * @ptdev: Device.
369 * @timeout_us: Timeout expressed in micro-seconds.
370 *
371 * The FW has two timer sources: the GPU counter or arch-timer. We need
372 * to express timeouts in term of number of cycles and specify which
373 * timer source should be used.
374 *
375 * Return: A value suitable for timeout fields in the global interface.
376 */
377static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us)
378{
379 bool use_cycle_counter = false;
380 u32 timer_rate = 0;
381 u64 mod_cycles;
382
383#ifdef CONFIG_ARM_ARCH_TIMER
384 timer_rate = arch_timer_get_cntfrq();
385#endif
386
387 if (!timer_rate) {
388 use_cycle_counter = true;
389 timer_rate = clk_get_rate(ptdev->clks.core);
390 }
391
392 if (drm_WARN_ON(&ptdev->base, !timer_rate)) {
393 /* We couldn't get a valid clock rate, let's just pick the
394 * maximum value so the FW still handles the core
395 * power on/off requests.
396 */
397 return GLB_TIMER_VAL(~0) |
398 GLB_TIMER_SOURCE_GPU_COUNTER;
399 }
400
401 mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate,
402 1000000ull << 10);
403 if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0)))
404 mod_cycles = GLB_TIMER_VAL(~0);
405
406 return GLB_TIMER_VAL(mod_cycles) |
407 (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0);
408}
409
410static int panthor_fw_binary_iter_read(struct panthor_device *ptdev,
411 struct panthor_fw_binary_iter *iter,
412 void *out, size_t size)
413{
414 size_t new_offset = iter->offset + size;
415
416 if (new_offset > iter->size || new_offset < iter->offset) {
417 drm_err(&ptdev->base, "Firmware too small\n");
418 return -EINVAL;
419 }
420
421 memcpy(out, iter->data + iter->offset, size);
422 iter->offset = new_offset;
423 return 0;
424}
425
426static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev,
427 struct panthor_fw_binary_iter *iter,
428 struct panthor_fw_binary_iter *sub_iter,
429 size_t size)
430{
431 size_t new_offset = iter->offset + size;
432
433 if (new_offset > iter->size || new_offset < iter->offset) {
434 drm_err(&ptdev->base, "Firmware entry too long\n");
435 return -EINVAL;
436 }
437
438 sub_iter->offset = 0;
439 sub_iter->data = iter->data + iter->offset;
440 sub_iter->size = size;
441 iter->offset = new_offset;
442 return 0;
443}
444
445static void panthor_fw_init_section_mem(struct panthor_device *ptdev,
446 struct panthor_fw_section *section)
447{
448 bool was_mapped = !!section->mem->kmap;
449 int ret;
450
451 if (!section->data.size &&
452 !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO))
453 return;
454
455 ret = panthor_kernel_bo_vmap(section->mem);
456 if (drm_WARN_ON(&ptdev->base, ret))
457 return;
458
459 memcpy(section->mem->kmap, section->data.buf, section->data.size);
460 if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO) {
461 memset(section->mem->kmap + section->data.size, 0,
462 panthor_kernel_bo_size(section->mem) - section->data.size);
463 }
464
465 if (!was_mapped)
466 panthor_kernel_bo_vunmap(section->mem);
467}
468
469/**
470 * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces.
471 * @ptdev: Device.
472 * @input: Pointer holding the input interface on success.
473 * Should be ignored on failure.
474 * @output: Pointer holding the output interface on success.
475 * Should be ignored on failure.
476 * @input_fw_va: Pointer holding the input interface FW VA on success.
477 * Should be ignored on failure.
478 * @output_fw_va: Pointer holding the output interface FW VA on success.
479 * Should be ignored on failure.
480 *
481 * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input
482 * interface is at offset 0, and the output interface at offset 4096.
483 *
484 * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
485 */
486struct panthor_kernel_bo *
487panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
488 struct panthor_fw_ringbuf_input_iface **input,
489 const struct panthor_fw_ringbuf_output_iface **output,
490 u32 *input_fw_va, u32 *output_fw_va)
491{
492 struct panthor_kernel_bo *mem;
493 int ret;
494
495 mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K,
496 DRM_PANTHOR_BO_NO_MMAP,
497 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
498 DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
499 PANTHOR_VM_KERNEL_AUTO_VA,
500 "Queue FW interface");
501 if (IS_ERR(mem))
502 return mem;
503
504 ret = panthor_kernel_bo_vmap(mem);
505 if (ret) {
506 panthor_kernel_bo_destroy(mem);
507 return ERR_PTR(ret);
508 }
509
510 memset(mem->kmap, 0, panthor_kernel_bo_size(mem));
511 *input = mem->kmap;
512 *output = mem->kmap + SZ_4K;
513 *input_fw_va = panthor_kernel_bo_gpuva(mem);
514 *output_fw_va = *input_fw_va + SZ_4K;
515
516 return mem;
517}
518
519/**
520 * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group.
521 * @ptdev: Device.
522 * @size: Size of the suspend buffer.
523 *
524 * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
525 */
526struct panthor_kernel_bo *
527panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size)
528{
529 return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size,
530 DRM_PANTHOR_BO_NO_MMAP,
531 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
532 PANTHOR_VM_KERNEL_AUTO_VA,
533 "FW suspend buffer");
534}
535
536static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
537 const struct firmware *fw,
538 struct panthor_fw_binary_iter *iter,
539 u32 ehdr)
540{
541 ssize_t vm_pgsz = panthor_vm_page_size(ptdev->fw->vm);
542 struct panthor_fw_binary_section_entry_hdr hdr;
543 struct panthor_fw_section *section;
544 u32 section_size;
545 u32 name_len;
546 int ret;
547
548 ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
549 if (ret)
550 return ret;
551
552 if (hdr.data.end < hdr.data.start) {
553 drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n",
554 hdr.data.end, hdr.data.start);
555 return -EINVAL;
556 }
557
558 if (hdr.va.end < hdr.va.start) {
559 drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n",
560 hdr.va.end, hdr.va.start);
561 return -EINVAL;
562 }
563
564 if (hdr.data.end > fw->size) {
565 drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n",
566 hdr.data.end, fw->size);
567 return -EINVAL;
568 }
569
570 if (!IS_ALIGNED(hdr.va.start, vm_pgsz) || !IS_ALIGNED(hdr.va.end, vm_pgsz)) {
571 drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n",
572 hdr.va.start, hdr.va.end);
573 return -EINVAL;
574 }
575
576 if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS) {
577 drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n",
578 hdr.flags);
579 return -EINVAL;
580 }
581
582 if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_PROT) {
583 drm_warn(&ptdev->base,
584 "Firmware protected mode entry is not supported, ignoring");
585 return 0;
586 }
587
588 if (hdr.va.start == CSF_MCU_SHARED_REGION_START &&
589 !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED)) {
590 drm_err(&ptdev->base,
591 "Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START);
592 return -EINVAL;
593 }
594
595 name_len = iter->size - iter->offset;
596
597 section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL);
598 if (!section)
599 return -ENOMEM;
600
601 list_add_tail(§ion->node, &ptdev->fw->sections);
602 section->flags = hdr.flags;
603 section->data.size = hdr.data.end - hdr.data.start;
604
605 if (section->data.size > 0) {
606 void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL);
607
608 if (!data)
609 return -ENOMEM;
610
611 memcpy(data, fw->data + hdr.data.start, section->data.size);
612 section->data.buf = data;
613 }
614
615 if (name_len > 0) {
616 char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL);
617
618 if (!name)
619 return -ENOMEM;
620
621 memcpy(name, iter->data + iter->offset, name_len);
622 name[name_len] = '\0';
623 section->name = name;
624 }
625
626 section_size = hdr.va.end - hdr.va.start;
627 if (section_size) {
628 u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK;
629 struct panthor_gem_object *bo;
630 u32 vm_map_flags = 0;
631 struct sg_table *sgt;
632 u64 va = hdr.va.start;
633
634 if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_WR))
635 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY;
636
637 if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_EX))
638 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC;
639
640 /* TODO: CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_*_COHERENT are mapped to
641 * non-cacheable for now. We might want to introduce a new
642 * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device
643 * memory and is currently not used by our driver) for
644 * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit
645 * of IO-coherent systems.
646 */
647 if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED)
648 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED;
649
650 section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev),
651 section_size,
652 DRM_PANTHOR_BO_NO_MMAP,
653 vm_map_flags, va, "FW section");
654 if (IS_ERR(section->mem))
655 return PTR_ERR(section->mem);
656
657 if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start))
658 return -EINVAL;
659
660 if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED) {
661 ret = panthor_kernel_bo_vmap(section->mem);
662 if (ret)
663 return ret;
664 }
665
666 panthor_fw_init_section_mem(ptdev, section);
667
668 bo = to_panthor_bo(section->mem->obj);
669 sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
670 if (IS_ERR(sgt))
671 return PTR_ERR(sgt);
672
673 dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
674 }
675
676 if (hdr.va.start == CSF_MCU_SHARED_REGION_START)
677 ptdev->fw->shared_section = section;
678
679 return 0;
680}
681
682static int panthor_fw_read_build_info(struct panthor_device *ptdev,
683 const struct firmware *fw,
684 struct panthor_fw_binary_iter *iter,
685 u32 ehdr)
686{
687 struct panthor_fw_build_info_hdr hdr;
688 static const char git_sha_header[] = "git_sha: ";
689 const int header_len = sizeof(git_sha_header) - 1;
690 int ret;
691
692 ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
693 if (ret)
694 return ret;
695
696 if (hdr.meta_start > fw->size ||
697 hdr.meta_start + hdr.meta_size > fw->size) {
698 drm_err(&ptdev->base, "Firmware build info corrupt\n");
699 /* We don't need the build info, so continue */
700 return 0;
701 }
702
703 if (memcmp(git_sha_header, fw->data + hdr.meta_start, header_len)) {
704 /* Not the expected header, this isn't metadata we understand */
705 return 0;
706 }
707
708 /* Check that the git SHA is NULL terminated as expected */
709 if (fw->data[hdr.meta_start + hdr.meta_size - 1] != '\0') {
710 drm_warn(&ptdev->base, "Firmware's git sha is not NULL terminated\n");
711 /* Don't treat as fatal */
712 return 0;
713 }
714
715 drm_info(&ptdev->base, "Firmware git sha: %s\n",
716 fw->data + hdr.meta_start + header_len);
717
718 return 0;
719}
720
721static void
722panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload)
723{
724 struct panthor_fw_section *section;
725
726 list_for_each_entry(section, &ptdev->fw->sections, node) {
727 struct sg_table *sgt;
728
729 if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_WR))
730 continue;
731
732 panthor_fw_init_section_mem(ptdev, section);
733 sgt = drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base);
734 if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt)))
735 dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
736 }
737}
738
739static int panthor_fw_load_entry(struct panthor_device *ptdev,
740 const struct firmware *fw,
741 struct panthor_fw_binary_iter *iter)
742{
743 struct panthor_fw_binary_iter eiter;
744 u32 ehdr;
745 int ret;
746
747 ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr));
748 if (ret)
749 return ret;
750
751 if ((iter->offset % sizeof(u32)) ||
752 (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) {
753 drm_err(&ptdev->base, "Firmware entry is not 32-bit aligned, offset=0x%x size=0x%x\n",
754 (u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr));
755 return -EINVAL;
756 }
757
758 if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter,
759 CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr)))
760 return -EINVAL;
761
762 switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) {
763 case CSF_FW_BINARY_ENTRY_TYPE_IFACE:
764 return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr);
765 case CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA:
766 return panthor_fw_read_build_info(ptdev, fw, &eiter, ehdr);
767
768 /* FIXME: handle those entry types? */
769 case CSF_FW_BINARY_ENTRY_TYPE_CONFIG:
770 case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST:
771 case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER:
772 case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA:
773 return 0;
774 default:
775 break;
776 }
777
778 if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL)
779 return 0;
780
781 drm_err(&ptdev->base,
782 "Unsupported non-optional entry type %u in firmware\n",
783 CSF_FW_BINARY_ENTRY_TYPE(ehdr));
784 return -EINVAL;
785}
786
787static int panthor_fw_load(struct panthor_device *ptdev)
788{
789 const struct firmware *fw = NULL;
790 struct panthor_fw_binary_iter iter = {};
791 struct panthor_fw_binary_hdr hdr;
792 char fw_path[128];
793 int ret;
794
795 snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s",
796 (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id),
797 (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id),
798 CSF_FW_NAME);
799
800 ret = request_firmware(&fw, fw_path, ptdev->base.dev);
801 if (ret) {
802 drm_err(&ptdev->base, "Failed to load firmware image '%s'\n",
803 CSF_FW_NAME);
804 return ret;
805 }
806
807 iter.data = fw->data;
808 iter.size = fw->size;
809 ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr));
810 if (ret)
811 goto out;
812
813 if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) {
814 ret = -EINVAL;
815 drm_err(&ptdev->base, "Invalid firmware magic\n");
816 goto out;
817 }
818
819 if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) {
820 ret = -EINVAL;
821 drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n",
822 hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX);
823 goto out;
824 }
825
826 if (hdr.size > iter.size) {
827 drm_err(&ptdev->base, "Firmware image is truncated\n");
828 goto out;
829 }
830
831 iter.size = hdr.size;
832
833 while (iter.offset < hdr.size) {
834 ret = panthor_fw_load_entry(ptdev, fw, &iter);
835 if (ret)
836 goto out;
837 }
838
839 if (!ptdev->fw->shared_section) {
840 drm_err(&ptdev->base, "Shared interface region not found\n");
841 ret = -EINVAL;
842 goto out;
843 }
844
845out:
846 release_firmware(fw);
847 return ret;
848}
849
850/**
851 * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address
852 * @ptdev: Device.
853 * @mcu_va: MCU address.
854 *
855 * Return: NULL if the address is not part of the shared section, non-NULL otherwise.
856 */
857static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va)
858{
859 u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem);
860 u64 shared_mem_end = shared_mem_start +
861 panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
862 if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end)
863 return NULL;
864
865 return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start);
866}
867
868static int panthor_init_cs_iface(struct panthor_device *ptdev,
869 unsigned int csg_idx, unsigned int cs_idx)
870{
871 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
872 struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx);
873 struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx];
874 u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
875 u32 iface_offset = CSF_GROUP_CONTROL_OFFSET +
876 (csg_idx * glb_iface->control->group_stride) +
877 CSF_STREAM_CONTROL_OFFSET +
878 (cs_idx * csg_iface->control->stream_stride);
879 struct panthor_fw_cs_iface *first_cs_iface =
880 panthor_fw_get_cs_iface(ptdev, 0, 0);
881
882 if (iface_offset + sizeof(*cs_iface) >= shared_section_sz)
883 return -EINVAL;
884
885 spin_lock_init(&cs_iface->lock);
886 cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
887 cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va);
888 cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va);
889
890 if (!cs_iface->input || !cs_iface->output) {
891 drm_err(&ptdev->base, "Invalid stream control interface input/output VA");
892 return -EINVAL;
893 }
894
895 if (cs_iface != first_cs_iface) {
896 if (cs_iface->control->features != first_cs_iface->control->features) {
897 drm_err(&ptdev->base, "Expecting identical CS slots");
898 return -EINVAL;
899 }
900 } else {
901 u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features);
902
903 ptdev->csif_info.cs_reg_count = reg_count;
904 ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT;
905 }
906
907 return 0;
908}
909
910static bool compare_csg(const struct panthor_fw_csg_control_iface *a,
911 const struct panthor_fw_csg_control_iface *b)
912{
913 if (a->features != b->features)
914 return false;
915 if (a->suspend_size != b->suspend_size)
916 return false;
917 if (a->protm_suspend_size != b->protm_suspend_size)
918 return false;
919 if (a->stream_num != b->stream_num)
920 return false;
921 return true;
922}
923
924static int panthor_init_csg_iface(struct panthor_device *ptdev,
925 unsigned int csg_idx)
926{
927 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
928 struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx];
929 u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
930 u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride);
931 unsigned int i;
932
933 if (iface_offset + sizeof(*csg_iface) >= shared_section_sz)
934 return -EINVAL;
935
936 spin_lock_init(&csg_iface->lock);
937 csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
938 csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va);
939 csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va);
940
941 if (csg_iface->control->stream_num < MIN_CS_PER_CSG ||
942 csg_iface->control->stream_num > MAX_CS_PER_CSG)
943 return -EINVAL;
944
945 if (!csg_iface->input || !csg_iface->output) {
946 drm_err(&ptdev->base, "Invalid group control interface input/output VA");
947 return -EINVAL;
948 }
949
950 if (csg_idx > 0) {
951 struct panthor_fw_csg_iface *first_csg_iface =
952 panthor_fw_get_csg_iface(ptdev, 0);
953
954 if (!compare_csg(first_csg_iface->control, csg_iface->control)) {
955 drm_err(&ptdev->base, "Expecting identical CSG slots");
956 return -EINVAL;
957 }
958 }
959
960 for (i = 0; i < csg_iface->control->stream_num; i++) {
961 int ret = panthor_init_cs_iface(ptdev, csg_idx, i);
962
963 if (ret)
964 return ret;
965 }
966
967 return 0;
968}
969
970static u32 panthor_get_instr_features(struct panthor_device *ptdev)
971{
972 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
973
974 if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0))
975 return 0;
976
977 return glb_iface->control->instr_features;
978}
979
980static int panthor_fw_init_ifaces(struct panthor_device *ptdev)
981{
982 struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global;
983 unsigned int i;
984
985 if (!ptdev->fw->shared_section->mem->kmap)
986 return -EINVAL;
987
988 spin_lock_init(&glb_iface->lock);
989 glb_iface->control = ptdev->fw->shared_section->mem->kmap;
990
991 if (!glb_iface->control->version) {
992 drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot");
993 return -EINVAL;
994 }
995
996 glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va);
997 glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va);
998 if (!glb_iface->input || !glb_iface->output) {
999 drm_err(&ptdev->base, "Invalid global control interface input/output VA");
1000 return -EINVAL;
1001 }
1002
1003 if (glb_iface->control->group_num > MAX_CSGS ||
1004 glb_iface->control->group_num < MIN_CSGS) {
1005 drm_err(&ptdev->base, "Invalid number of control groups");
1006 return -EINVAL;
1007 }
1008
1009 for (i = 0; i < glb_iface->control->group_num; i++) {
1010 int ret = panthor_init_csg_iface(ptdev, i);
1011
1012 if (ret)
1013 return ret;
1014 }
1015
1016 drm_info(&ptdev->base, "CSF FW using interface v%d.%d.%d, Features %#x Instrumentation features %#x",
1017 CSF_IFACE_VERSION_MAJOR(glb_iface->control->version),
1018 CSF_IFACE_VERSION_MINOR(glb_iface->control->version),
1019 CSF_IFACE_VERSION_PATCH(glb_iface->control->version),
1020 glb_iface->control->features,
1021 panthor_get_instr_features(ptdev));
1022 return 0;
1023}
1024
1025static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
1026{
1027 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1028
1029 /* Enable all cores. */
1030 glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present;
1031
1032 /* Setup timers. */
1033 glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US);
1034 glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT;
1035 glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US);
1036
1037 /* Enable interrupts we care about. */
1038 glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN |
1039 GLB_PING |
1040 GLB_CFG_PROGRESS_TIMER |
1041 GLB_CFG_POWEROFF_TIMER |
1042 GLB_IDLE_EN |
1043 GLB_IDLE;
1044
1045 if (panthor_fw_has_glb_state(ptdev))
1046 glb_iface->input->ack_irq_mask |= GLB_STATE_MASK;
1047
1048 panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN | GLB_COUNTER_EN,
1049 GLB_IDLE_EN | GLB_COUNTER_EN);
1050 panthor_fw_toggle_reqs(glb_iface, req, ack,
1051 GLB_CFG_ALLOC_EN |
1052 GLB_CFG_POWEROFF_TIMER |
1053 GLB_CFG_PROGRESS_TIMER);
1054
1055 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1056
1057 /* Kick the watchdog. */
1058 mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work,
1059 msecs_to_jiffies(PING_INTERVAL_MS));
1060}
1061
1062static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
1063{
1064 u32 duration;
1065 u64 start = 0;
1066
1067 if (tracepoint_enabled(gpu_job_irq))
1068 start = ktime_get_ns();
1069
1070 gpu_write(ptdev, JOB_INT_CLEAR, status);
1071
1072 if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
1073 ptdev->fw->booted = true;
1074
1075 wake_up_all(&ptdev->fw->req_waitqueue);
1076
1077 /* If the FW is not booted, don't process IRQs, just flag the FW as booted. */
1078 if (!ptdev->fw->booted)
1079 return;
1080
1081 panthor_sched_report_fw_events(ptdev, status);
1082
1083 if (tracepoint_enabled(gpu_job_irq) && start) {
1084 if (check_sub_overflow(ktime_get_ns(), start, &duration))
1085 duration = U32_MAX;
1086 trace_gpu_job_irq(ptdev->base.dev, status, duration);
1087 }
1088}
1089PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler);
1090
1091static int panthor_fw_start(struct panthor_device *ptdev)
1092{
1093 bool timedout = false;
1094
1095 ptdev->fw->booted = false;
1096 panthor_job_irq_enable_events(&ptdev->fw->irq, ~0);
1097 panthor_job_irq_resume(&ptdev->fw->irq);
1098 gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO);
1099
1100 if (!wait_event_timeout(ptdev->fw->req_waitqueue,
1101 ptdev->fw->booted,
1102 msecs_to_jiffies(1000))) {
1103 if (!ptdev->fw->booted &&
1104 !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF))
1105 timedout = true;
1106 }
1107
1108 if (timedout) {
1109 static const char * const status_str[] = {
1110 [MCU_STATUS_DISABLED] = "disabled",
1111 [MCU_STATUS_ENABLED] = "enabled",
1112 [MCU_STATUS_HALT] = "halt",
1113 [MCU_STATUS_FATAL] = "fatal",
1114 };
1115 u32 status = gpu_read(ptdev, MCU_STATUS);
1116
1117 drm_err(&ptdev->base, "Failed to boot MCU (status=%s)",
1118 status < ARRAY_SIZE(status_str) ? status_str[status] : "unknown");
1119 return -ETIMEDOUT;
1120 }
1121
1122 return 0;
1123}
1124
1125static void panthor_fw_stop(struct panthor_device *ptdev)
1126{
1127 u32 status;
1128
1129 gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE);
1130 if (gpu_read_poll_timeout(ptdev, MCU_STATUS, status,
1131 status == MCU_STATUS_DISABLED, 10, 100000))
1132 drm_err(&ptdev->base, "Failed to stop MCU");
1133}
1134
1135static bool panthor_fw_mcu_halted(struct panthor_device *ptdev)
1136{
1137 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1138 bool halted;
1139
1140 halted = gpu_read(ptdev, MCU_STATUS) == MCU_STATUS_HALT;
1141
1142 if (panthor_fw_has_glb_state(ptdev))
1143 halted &= (GLB_STATE_GET(glb_iface->output->ack) == GLB_STATE_HALT);
1144
1145 return halted;
1146}
1147
1148static void panthor_fw_halt_mcu(struct panthor_device *ptdev)
1149{
1150 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1151
1152 if (panthor_fw_has_glb_state(ptdev))
1153 panthor_fw_update_reqs(glb_iface, req, GLB_STATE(GLB_STATE_HALT), GLB_STATE_MASK);
1154 else
1155 panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT);
1156
1157 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1158}
1159
1160static bool panthor_fw_wait_mcu_halted(struct panthor_device *ptdev)
1161{
1162 bool halted = false;
1163
1164 if (read_poll_timeout_atomic(panthor_fw_mcu_halted, halted, halted, 10,
1165 MCU_HALT_TIMEOUT_US, 0, ptdev)) {
1166 drm_warn(&ptdev->base, "Timed out waiting for MCU to halt");
1167 return false;
1168 }
1169
1170 return true;
1171}
1172
1173static void panthor_fw_mcu_set_active(struct panthor_device *ptdev)
1174{
1175 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1176
1177 if (panthor_fw_has_glb_state(ptdev))
1178 panthor_fw_update_reqs(glb_iface, req, GLB_STATE(GLB_STATE_ACTIVE), GLB_STATE_MASK);
1179 else
1180 panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);
1181}
1182
1183/**
1184 * panthor_fw_pre_reset() - Call before a reset.
1185 * @ptdev: Device.
1186 * @on_hang: true if the reset was triggered on a GPU hang.
1187 *
1188 * If the reset is not triggered on a hang, we try to gracefully halt the
1189 * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called.
1190 */
1191void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
1192{
1193 /* Make sure we won't be woken up by a ping. */
1194 cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1195
1196 ptdev->reset.fast = false;
1197
1198 if (!on_hang) {
1199 panthor_fw_halt_mcu(ptdev);
1200 if (!panthor_fw_wait_mcu_halted(ptdev))
1201 drm_warn(&ptdev->base, "Failed to cleanly suspend MCU");
1202 else
1203 ptdev->reset.fast = true;
1204 }
1205
1206 panthor_job_irq_suspend(&ptdev->fw->irq);
1207 panthor_fw_stop(ptdev);
1208}
1209
1210/**
1211 * panthor_fw_post_reset() - Call after a reset.
1212 * @ptdev: Device.
1213 *
1214 * Start the FW. If this is not a fast reset, all FW sections are reloaded to
1215 * make sure we can recover from a memory corruption.
1216 */
1217int panthor_fw_post_reset(struct panthor_device *ptdev)
1218{
1219 int ret;
1220
1221 /* Make the MCU VM active. */
1222 ret = panthor_vm_active(ptdev->fw->vm);
1223 if (ret)
1224 return ret;
1225
1226 if (!ptdev->reset.fast) {
1227 /* On a slow reset, reload all sections, including RO ones.
1228 * We're not supposed to end up here anyway, let's just assume
1229 * the overhead of reloading everything is acceptable.
1230 */
1231 panthor_reload_fw_sections(ptdev, true);
1232 } else {
1233 /*
1234 * If the FW was previously successfully halted in the pre-reset
1235 * operation, we need to transition it to active again before
1236 * the FW is rebooted.
1237 * This is not needed on a slow reset because FW sections are
1238 * re-initialized.
1239 */
1240 panthor_fw_mcu_set_active(ptdev);
1241 }
1242
1243 ret = panthor_fw_start(ptdev);
1244 if (ret) {
1245 drm_err(&ptdev->base, "FW %s reset failed",
1246 ptdev->reset.fast ? "fast" : "slow");
1247 return ret;
1248 }
1249
1250 /* We must re-initialize the global interface even on fast-reset. */
1251 panthor_fw_init_global_iface(ptdev);
1252 return 0;
1253}
1254
1255/**
1256 * panthor_fw_unplug() - Called when the device is unplugged.
1257 * @ptdev: Device.
1258 *
1259 * This function must make sure all pending operations are flushed before
1260 * will release device resources, thus preventing any interaction with
1261 * the HW.
1262 *
1263 * If there is still FW-related work running after this function returns,
1264 * they must use drm_dev_{enter,exit}() and skip any HW access when
1265 * drm_dev_enter() returns false.
1266 */
1267void panthor_fw_unplug(struct panthor_device *ptdev)
1268{
1269 struct panthor_fw_section *section;
1270
1271 disable_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1272
1273 if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) {
1274 /* Make sure the IRQ handler cannot be called after that point. */
1275 if (ptdev->fw->irq.irq)
1276 panthor_job_irq_suspend(&ptdev->fw->irq);
1277
1278 panthor_fw_stop(ptdev);
1279 }
1280
1281 list_for_each_entry(section, &ptdev->fw->sections, node)
1282 panthor_kernel_bo_destroy(section->mem);
1283
1284 /* We intentionally don't call panthor_vm_idle() and let
1285 * panthor_mmu_unplug() release the AS we acquired with
1286 * panthor_vm_active() so we don't have to track the VM active/idle
1287 * state to keep the active_refcnt balanced.
1288 */
1289 panthor_vm_put(ptdev->fw->vm);
1290 ptdev->fw->vm = NULL;
1291
1292 if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev))
1293 panthor_hw_l2_power_off(ptdev);
1294}
1295
1296/**
1297 * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW.
1298 * @req_ptr: Pointer to the req register.
1299 * @ack_ptr: Pointer to the ack register.
1300 * @wq: Wait queue to use for the sleeping wait.
1301 * @req_mask: Mask of requests to wait for.
1302 * @acked: Pointer to field that's updated with the acked requests.
1303 * If the function returns 0, *acked == req_mask.
1304 * @timeout_ms: Timeout expressed in milliseconds.
1305 *
1306 * Return: 0 on success, -ETIMEDOUT otherwise.
1307 */
1308static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr,
1309 wait_queue_head_t *wq,
1310 u32 req_mask, u32 *acked,
1311 u32 timeout_ms)
1312{
1313 u32 ack, req = READ_ONCE(*req_ptr) & req_mask;
1314 int ret;
1315
1316 /* Busy wait for a few µsecs before falling back to a sleeping wait. */
1317 *acked = req_mask;
1318 ret = read_poll_timeout_atomic(READ_ONCE, ack,
1319 (ack & req_mask) == req,
1320 0, 10, 0,
1321 *ack_ptr);
1322 if (!ret)
1323 return 0;
1324
1325 if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req,
1326 msecs_to_jiffies(timeout_ms)))
1327 return 0;
1328
1329 /* Check one last time, in case we were not woken up for some reason. */
1330 ack = READ_ONCE(*ack_ptr);
1331 if ((ack & req_mask) == req)
1332 return 0;
1333
1334 *acked = ~(req ^ ack) & req_mask;
1335 return -ETIMEDOUT;
1336}
1337
1338/**
1339 * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged.
1340 * @ptdev: Device.
1341 * @req_mask: Mask of requests to wait for.
1342 * @acked: Pointer to field that's updated with the acked requests.
1343 * If the function returns 0, *acked == req_mask.
1344 * @timeout_ms: Timeout expressed in milliseconds.
1345 *
1346 * Return: 0 on success, -ETIMEDOUT otherwise.
1347 */
1348int panthor_fw_glb_wait_acks(struct panthor_device *ptdev,
1349 u32 req_mask, u32 *acked,
1350 u32 timeout_ms)
1351{
1352 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1353
1354 /* GLB_HALT doesn't get acked through the FW interface. */
1355 if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT)))
1356 return -EINVAL;
1357
1358 return panthor_fw_wait_acks(&glb_iface->input->req,
1359 &glb_iface->output->ack,
1360 &ptdev->fw->req_waitqueue,
1361 req_mask, acked, timeout_ms);
1362}
1363
1364/**
1365 * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged.
1366 * @ptdev: Device.
1367 * @csg_slot: CSG slot ID.
1368 * @req_mask: Mask of requests to wait for.
1369 * @acked: Pointer to field that's updated with the acked requests.
1370 * If the function returns 0, *acked == req_mask.
1371 * @timeout_ms: Timeout expressed in milliseconds.
1372 *
1373 * Return: 0 on success, -ETIMEDOUT otherwise.
1374 */
1375int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot,
1376 u32 req_mask, u32 *acked, u32 timeout_ms)
1377{
1378 struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot);
1379 int ret;
1380
1381 if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK))
1382 return -EINVAL;
1383
1384 ret = panthor_fw_wait_acks(&csg_iface->input->req,
1385 &csg_iface->output->ack,
1386 &ptdev->fw->req_waitqueue,
1387 req_mask, acked, timeout_ms);
1388
1389 /*
1390 * Check that all bits in the state field were updated, if any mismatch
1391 * then clear all bits in the state field. This allows code to do
1392 * (acked & CSG_STATE_MASK) and get the right value.
1393 */
1394
1395 if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK)
1396 *acked &= ~CSG_STATE_MASK;
1397
1398 return ret;
1399}
1400
1401/**
1402 * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells.
1403 * @ptdev: Device.
1404 * @csg_mask: Bitmask encoding the command stream group doorbells to ring.
1405 *
1406 * This function is toggling bits in the doorbell_req and ringing the
1407 * global doorbell. It doesn't require a user doorbell to be attached to
1408 * the group.
1409 */
1410void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask)
1411{
1412 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1413
1414 panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask);
1415 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1416}
1417
1418static void panthor_fw_ping_work(struct work_struct *work)
1419{
1420 struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work);
1421 struct panthor_device *ptdev = fw->irq.ptdev;
1422 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1423 u32 acked;
1424 int ret;
1425
1426 if (panthor_device_reset_is_pending(ptdev))
1427 return;
1428
1429 panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING);
1430 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1431
1432 ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100);
1433 if (ret) {
1434 panthor_device_schedule_reset(ptdev);
1435 drm_err(&ptdev->base, "FW ping timeout, scheduling a reset");
1436 } else {
1437 mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work,
1438 msecs_to_jiffies(PING_INTERVAL_MS));
1439 }
1440}
1441
1442/**
1443 * panthor_fw_init() - Initialize FW related data.
1444 * @ptdev: Device.
1445 *
1446 * Return: 0 on success, a negative error code otherwise.
1447 */
1448int panthor_fw_init(struct panthor_device *ptdev)
1449{
1450 struct panthor_fw *fw;
1451 int ret, irq;
1452
1453 fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL);
1454 if (!fw)
1455 return -ENOMEM;
1456
1457 ptdev->fw = fw;
1458 init_waitqueue_head(&fw->req_waitqueue);
1459 INIT_LIST_HEAD(&fw->sections);
1460 INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work);
1461
1462 irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job");
1463 if (irq <= 0)
1464 return -ENODEV;
1465
1466 ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0);
1467 if (ret) {
1468 drm_err(&ptdev->base, "failed to request job irq");
1469 return ret;
1470 }
1471
1472 ret = panthor_hw_l2_power_on(ptdev);
1473 if (ret)
1474 return ret;
1475
1476 fw->vm = panthor_vm_create(ptdev, true,
1477 0, SZ_4G,
1478 CSF_MCU_SHARED_REGION_START,
1479 CSF_MCU_SHARED_REGION_SIZE);
1480 if (IS_ERR(fw->vm)) {
1481 ret = PTR_ERR(fw->vm);
1482 fw->vm = NULL;
1483 goto err_unplug_fw;
1484 }
1485
1486 ret = panthor_fw_load(ptdev);
1487 if (ret)
1488 goto err_unplug_fw;
1489
1490 ret = panthor_vm_active(fw->vm);
1491 if (ret)
1492 goto err_unplug_fw;
1493
1494 ret = panthor_fw_start(ptdev);
1495 if (ret)
1496 goto err_unplug_fw;
1497
1498 ret = panthor_fw_init_ifaces(ptdev);
1499 if (ret)
1500 goto err_unplug_fw;
1501
1502 panthor_fw_init_global_iface(ptdev);
1503 return 0;
1504
1505err_unplug_fw:
1506 panthor_fw_unplug(ptdev);
1507 return ret;
1508}
1509
1510MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin");
1511MODULE_FIRMWARE("arm/mali/arch10.10/mali_csffw.bin");
1512MODULE_FIRMWARE("arm/mali/arch10.12/mali_csffw.bin");
1513MODULE_FIRMWARE("arm/mali/arch11.8/mali_csffw.bin");
1514MODULE_FIRMWARE("arm/mali/arch12.8/mali_csffw.bin");
1515MODULE_FIRMWARE("arm/mali/arch13.8/mali_csffw.bin");
1516MODULE_FIRMWARE("arm/mali/arch14.8/mali_csffw.bin");