Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at master 1516 lines 44 kB view raw
1// SPDX-License-Identifier: GPL-2.0 or MIT 2/* Copyright 2023 Collabora ltd. */ 3 4#ifdef CONFIG_ARM_ARCH_TIMER 5#include <asm/arch_timer.h> 6#endif 7 8#include <linux/clk.h> 9#include <linux/dma-mapping.h> 10#include <linux/firmware.h> 11#include <linux/iopoll.h> 12#include <linux/iosys-map.h> 13#include <linux/mutex.h> 14#include <linux/platform_device.h> 15#include <linux/pm_runtime.h> 16 17#include <drm/drm_drv.h> 18#include <drm/drm_managed.h> 19#include <drm/drm_print.h> 20 21#include "panthor_device.h" 22#include "panthor_fw.h" 23#include "panthor_gem.h" 24#include "panthor_gpu.h" 25#include "panthor_hw.h" 26#include "panthor_mmu.h" 27#include "panthor_regs.h" 28#include "panthor_sched.h" 29#include "panthor_trace.h" 30 31#define CSF_FW_NAME "mali_csffw.bin" 32 33#define PING_INTERVAL_MS 12000 34#define PROGRESS_TIMEOUT_CYCLES (5ull * 500 * 1024 * 1024) 35#define PROGRESS_TIMEOUT_SCALE_SHIFT 10 36#define IDLE_HYSTERESIS_US 800 37#define PWROFF_HYSTERESIS_US 10000 38#define MCU_HALT_TIMEOUT_US (1ULL * USEC_PER_SEC) 39 40/** 41 * struct panthor_fw_binary_hdr - Firmware binary header. 42 */ 43struct panthor_fw_binary_hdr { 44 /** @magic: Magic value to check binary validity. */ 45 u32 magic; 46#define CSF_FW_BINARY_HEADER_MAGIC 0xc3f13a6e 47 48 /** @minor: Minor FW version. */ 49 u8 minor; 50 51 /** @major: Major FW version. */ 52 u8 major; 53#define CSF_FW_BINARY_HEADER_MAJOR_MAX 0 54 55 /** @padding1: MBZ. */ 56 u16 padding1; 57 58 /** @version_hash: FW version hash. */ 59 u32 version_hash; 60 61 /** @padding2: MBZ. */ 62 u32 padding2; 63 64 /** @size: FW binary size. */ 65 u32 size; 66}; 67 68/** 69 * enum panthor_fw_binary_entry_type - Firmware binary entry type 70 */ 71enum panthor_fw_binary_entry_type { 72 /** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */ 73 CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0, 74 75 /** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */ 76 CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1, 77 78 /** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */ 79 CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2, 80 81 /** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */ 82 CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3, 83 84 /** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */ 85 CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4, 86 87 /** 88 * @CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA: Metadata about how 89 * the FW binary was built. 90 */ 91 CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA = 6 92}; 93 94#define CSF_FW_BINARY_ENTRY_TYPE(ehdr) ((ehdr) & 0xff) 95#define CSF_FW_BINARY_ENTRY_SIZE(ehdr) (((ehdr) >> 8) & 0xff) 96#define CSF_FW_BINARY_ENTRY_UPDATE BIT(30) 97#define CSF_FW_BINARY_ENTRY_OPTIONAL BIT(31) 98 99#define CSF_FW_BINARY_IFACE_ENTRY_RD BIT(0) 100#define CSF_FW_BINARY_IFACE_ENTRY_WR BIT(1) 101#define CSF_FW_BINARY_IFACE_ENTRY_EX BIT(2) 102#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_NONE (0 << 3) 103#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED (1 << 3) 104#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_UNCACHED_COHERENT (2 << 3) 105#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED_COHERENT (3 << 3) 106#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK GENMASK(4, 3) 107#define CSF_FW_BINARY_IFACE_ENTRY_PROT BIT(5) 108#define CSF_FW_BINARY_IFACE_ENTRY_SHARED BIT(30) 109#define CSF_FW_BINARY_IFACE_ENTRY_ZERO BIT(31) 110 111#define CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS \ 112 (CSF_FW_BINARY_IFACE_ENTRY_RD | \ 113 CSF_FW_BINARY_IFACE_ENTRY_WR | \ 114 CSF_FW_BINARY_IFACE_ENTRY_EX | \ 115 CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK | \ 116 CSF_FW_BINARY_IFACE_ENTRY_PROT | \ 117 CSF_FW_BINARY_IFACE_ENTRY_SHARED | \ 118 CSF_FW_BINARY_IFACE_ENTRY_ZERO) 119 120/** 121 * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary 122 */ 123struct panthor_fw_binary_section_entry_hdr { 124 /** @flags: Section flags. */ 125 u32 flags; 126 127 /** @va: MCU virtual range to map this binary section to. */ 128 struct { 129 /** @start: Start address. */ 130 u32 start; 131 132 /** @end: End address. */ 133 u32 end; 134 } va; 135 136 /** @data: Data to initialize the FW section with. */ 137 struct { 138 /** @start: Start offset in the FW binary. */ 139 u32 start; 140 141 /** @end: End offset in the FW binary. */ 142 u32 end; 143 } data; 144}; 145 146struct panthor_fw_build_info_hdr { 147 /** @meta_start: Offset of the build info data in the FW binary */ 148 u32 meta_start; 149 /** @meta_size: Size of the build info data in the FW binary */ 150 u32 meta_size; 151}; 152 153/** 154 * struct panthor_fw_binary_iter - Firmware binary iterator 155 * 156 * Used to parse a firmware binary. 157 */ 158struct panthor_fw_binary_iter { 159 /** @data: FW binary data. */ 160 const void *data; 161 162 /** @size: FW binary size. */ 163 size_t size; 164 165 /** @offset: Iterator offset. */ 166 size_t offset; 167}; 168 169/** 170 * struct panthor_fw_section - FW section 171 */ 172struct panthor_fw_section { 173 /** @node: Used to keep track of FW sections. */ 174 struct list_head node; 175 176 /** @flags: Section flags, as encoded in the FW binary. */ 177 u32 flags; 178 179 /** @mem: Section memory. */ 180 struct panthor_kernel_bo *mem; 181 182 /** 183 * @name: Name of the section, as specified in the binary. 184 * 185 * Can be NULL. 186 */ 187 const char *name; 188 189 /** 190 * @data: Initial data copied to the FW memory. 191 * 192 * We keep data around so we can reload sections after a reset. 193 */ 194 struct { 195 /** @buf: Buffed used to store init data. */ 196 const void *buf; 197 198 /** @size: Size of @buf in bytes. */ 199 size_t size; 200 } data; 201}; 202 203#define CSF_MCU_SHARED_REGION_START 0x04000000ULL 204#define CSF_MCU_SHARED_REGION_SIZE 0x04000000ULL 205 206#define MIN_CS_PER_CSG 8 207#define MIN_CSGS 3 208 209#define CSF_IFACE_VERSION(major, minor, patch) \ 210 (((major) << 24) | ((minor) << 16) | (patch)) 211#define CSF_IFACE_VERSION_MAJOR(v) ((v) >> 24) 212#define CSF_IFACE_VERSION_MINOR(v) (((v) >> 16) & 0xff) 213#define CSF_IFACE_VERSION_PATCH(v) ((v) & 0xffff) 214 215#define CSF_GROUP_CONTROL_OFFSET 0x1000 216#define CSF_STREAM_CONTROL_OFFSET 0x40 217#define CSF_UNPRESERVED_REG_COUNT 4 218 219/** 220 * struct panthor_fw_iface - FW interfaces 221 */ 222struct panthor_fw_iface { 223 /** @global: Global interface. */ 224 struct panthor_fw_global_iface global; 225 226 /** @groups: Group slot interfaces. */ 227 struct panthor_fw_csg_iface groups[MAX_CSGS]; 228 229 /** @streams: Command stream slot interfaces. */ 230 struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG]; 231}; 232 233/** 234 * struct panthor_fw - Firmware management 235 */ 236struct panthor_fw { 237 /** @vm: MCU VM. */ 238 struct panthor_vm *vm; 239 240 /** @sections: List of FW sections. */ 241 struct list_head sections; 242 243 /** @shared_section: The section containing the FW interfaces. */ 244 struct panthor_fw_section *shared_section; 245 246 /** @iface: FW interfaces. */ 247 struct panthor_fw_iface iface; 248 249 /** @watchdog: Collection of fields relating to the FW watchdog. */ 250 struct { 251 /** @ping_work: Delayed work used to ping the FW. */ 252 struct delayed_work ping_work; 253 } watchdog; 254 255 /** 256 * @req_waitqueue: FW request waitqueue. 257 * 258 * Everytime a request is sent to a command stream group or the global 259 * interface, the caller will first busy wait for the request to be 260 * acknowledged, and then fallback to a sleeping wait. 261 * 262 * This wait queue is here to support the sleeping wait flavor. 263 */ 264 wait_queue_head_t req_waitqueue; 265 266 /** @booted: True is the FW is booted */ 267 bool booted; 268 269 /** @irq: Job irq data. */ 270 struct panthor_irq irq; 271}; 272 273struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev) 274{ 275 return ptdev->fw->vm; 276} 277 278/** 279 * panthor_fw_get_glb_iface() - Get the global interface 280 * @ptdev: Device. 281 * 282 * Return: The global interface. 283 */ 284struct panthor_fw_global_iface * 285panthor_fw_get_glb_iface(struct panthor_device *ptdev) 286{ 287 return &ptdev->fw->iface.global; 288} 289 290/** 291 * panthor_fw_get_csg_iface() - Get a command stream group slot interface 292 * @ptdev: Device. 293 * @csg_slot: Index of the command stream group slot. 294 * 295 * Return: The command stream group slot interface. 296 */ 297struct panthor_fw_csg_iface * 298panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot) 299{ 300 if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS)) 301 return NULL; 302 303 return &ptdev->fw->iface.groups[csg_slot]; 304} 305 306/** 307 * panthor_fw_get_cs_iface() - Get a command stream slot interface 308 * @ptdev: Device. 309 * @csg_slot: Index of the command stream group slot. 310 * @cs_slot: Index of the command stream slot. 311 * 312 * Return: The command stream slot interface. 313 */ 314struct panthor_fw_cs_iface * 315panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot) 316{ 317 if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot >= MAX_CS_PER_CSG)) 318 return NULL; 319 320 return &ptdev->fw->iface.streams[csg_slot][cs_slot]; 321} 322 323static bool panthor_fw_has_glb_state(struct panthor_device *ptdev) 324{ 325 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 326 327 return glb_iface->control->version >= CSF_IFACE_VERSION(4, 1, 0); 328} 329 330static bool panthor_fw_has_64bit_ep_req(struct panthor_device *ptdev) 331{ 332 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 333 334 return glb_iface->control->version >= CSF_IFACE_VERSION(4, 0, 0); 335} 336 337u64 panthor_fw_csg_endpoint_req_get(struct panthor_device *ptdev, 338 struct panthor_fw_csg_iface *csg_iface) 339{ 340 if (panthor_fw_has_64bit_ep_req(ptdev)) 341 return csg_iface->input->endpoint_req2; 342 else 343 return csg_iface->input->endpoint_req; 344} 345 346void panthor_fw_csg_endpoint_req_set(struct panthor_device *ptdev, 347 struct panthor_fw_csg_iface *csg_iface, u64 value) 348{ 349 if (panthor_fw_has_64bit_ep_req(ptdev)) 350 csg_iface->input->endpoint_req2 = value; 351 else 352 csg_iface->input->endpoint_req = lower_32_bits(value); 353} 354 355void panthor_fw_csg_endpoint_req_update(struct panthor_device *ptdev, 356 struct panthor_fw_csg_iface *csg_iface, u64 value, 357 u64 mask) 358{ 359 if (panthor_fw_has_64bit_ep_req(ptdev)) 360 panthor_fw_update_reqs64(csg_iface, endpoint_req2, value, mask); 361 else 362 panthor_fw_update_reqs(csg_iface, endpoint_req, lower_32_bits(value), 363 lower_32_bits(mask)); 364} 365 366/** 367 * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count 368 * @ptdev: Device. 369 * @timeout_us: Timeout expressed in micro-seconds. 370 * 371 * The FW has two timer sources: the GPU counter or arch-timer. We need 372 * to express timeouts in term of number of cycles and specify which 373 * timer source should be used. 374 * 375 * Return: A value suitable for timeout fields in the global interface. 376 */ 377static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us) 378{ 379 bool use_cycle_counter = false; 380 u32 timer_rate = 0; 381 u64 mod_cycles; 382 383#ifdef CONFIG_ARM_ARCH_TIMER 384 timer_rate = arch_timer_get_cntfrq(); 385#endif 386 387 if (!timer_rate) { 388 use_cycle_counter = true; 389 timer_rate = clk_get_rate(ptdev->clks.core); 390 } 391 392 if (drm_WARN_ON(&ptdev->base, !timer_rate)) { 393 /* We couldn't get a valid clock rate, let's just pick the 394 * maximum value so the FW still handles the core 395 * power on/off requests. 396 */ 397 return GLB_TIMER_VAL(~0) | 398 GLB_TIMER_SOURCE_GPU_COUNTER; 399 } 400 401 mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate, 402 1000000ull << 10); 403 if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0))) 404 mod_cycles = GLB_TIMER_VAL(~0); 405 406 return GLB_TIMER_VAL(mod_cycles) | 407 (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0); 408} 409 410static int panthor_fw_binary_iter_read(struct panthor_device *ptdev, 411 struct panthor_fw_binary_iter *iter, 412 void *out, size_t size) 413{ 414 size_t new_offset = iter->offset + size; 415 416 if (new_offset > iter->size || new_offset < iter->offset) { 417 drm_err(&ptdev->base, "Firmware too small\n"); 418 return -EINVAL; 419 } 420 421 memcpy(out, iter->data + iter->offset, size); 422 iter->offset = new_offset; 423 return 0; 424} 425 426static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev, 427 struct panthor_fw_binary_iter *iter, 428 struct panthor_fw_binary_iter *sub_iter, 429 size_t size) 430{ 431 size_t new_offset = iter->offset + size; 432 433 if (new_offset > iter->size || new_offset < iter->offset) { 434 drm_err(&ptdev->base, "Firmware entry too long\n"); 435 return -EINVAL; 436 } 437 438 sub_iter->offset = 0; 439 sub_iter->data = iter->data + iter->offset; 440 sub_iter->size = size; 441 iter->offset = new_offset; 442 return 0; 443} 444 445static void panthor_fw_init_section_mem(struct panthor_device *ptdev, 446 struct panthor_fw_section *section) 447{ 448 bool was_mapped = !!section->mem->kmap; 449 int ret; 450 451 if (!section->data.size && 452 !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO)) 453 return; 454 455 ret = panthor_kernel_bo_vmap(section->mem); 456 if (drm_WARN_ON(&ptdev->base, ret)) 457 return; 458 459 memcpy(section->mem->kmap, section->data.buf, section->data.size); 460 if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO) { 461 memset(section->mem->kmap + section->data.size, 0, 462 panthor_kernel_bo_size(section->mem) - section->data.size); 463 } 464 465 if (!was_mapped) 466 panthor_kernel_bo_vunmap(section->mem); 467} 468 469/** 470 * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces. 471 * @ptdev: Device. 472 * @input: Pointer holding the input interface on success. 473 * Should be ignored on failure. 474 * @output: Pointer holding the output interface on success. 475 * Should be ignored on failure. 476 * @input_fw_va: Pointer holding the input interface FW VA on success. 477 * Should be ignored on failure. 478 * @output_fw_va: Pointer holding the output interface FW VA on success. 479 * Should be ignored on failure. 480 * 481 * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input 482 * interface is at offset 0, and the output interface at offset 4096. 483 * 484 * Return: A valid pointer in case of success, an ERR_PTR() otherwise. 485 */ 486struct panthor_kernel_bo * 487panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev, 488 struct panthor_fw_ringbuf_input_iface **input, 489 const struct panthor_fw_ringbuf_output_iface **output, 490 u32 *input_fw_va, u32 *output_fw_va) 491{ 492 struct panthor_kernel_bo *mem; 493 int ret; 494 495 mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K, 496 DRM_PANTHOR_BO_NO_MMAP, 497 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | 498 DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, 499 PANTHOR_VM_KERNEL_AUTO_VA, 500 "Queue FW interface"); 501 if (IS_ERR(mem)) 502 return mem; 503 504 ret = panthor_kernel_bo_vmap(mem); 505 if (ret) { 506 panthor_kernel_bo_destroy(mem); 507 return ERR_PTR(ret); 508 } 509 510 memset(mem->kmap, 0, panthor_kernel_bo_size(mem)); 511 *input = mem->kmap; 512 *output = mem->kmap + SZ_4K; 513 *input_fw_va = panthor_kernel_bo_gpuva(mem); 514 *output_fw_va = *input_fw_va + SZ_4K; 515 516 return mem; 517} 518 519/** 520 * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group. 521 * @ptdev: Device. 522 * @size: Size of the suspend buffer. 523 * 524 * Return: A valid pointer in case of success, an ERR_PTR() otherwise. 525 */ 526struct panthor_kernel_bo * 527panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size) 528{ 529 return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size, 530 DRM_PANTHOR_BO_NO_MMAP, 531 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, 532 PANTHOR_VM_KERNEL_AUTO_VA, 533 "FW suspend buffer"); 534} 535 536static int panthor_fw_load_section_entry(struct panthor_device *ptdev, 537 const struct firmware *fw, 538 struct panthor_fw_binary_iter *iter, 539 u32 ehdr) 540{ 541 ssize_t vm_pgsz = panthor_vm_page_size(ptdev->fw->vm); 542 struct panthor_fw_binary_section_entry_hdr hdr; 543 struct panthor_fw_section *section; 544 u32 section_size; 545 u32 name_len; 546 int ret; 547 548 ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr)); 549 if (ret) 550 return ret; 551 552 if (hdr.data.end < hdr.data.start) { 553 drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n", 554 hdr.data.end, hdr.data.start); 555 return -EINVAL; 556 } 557 558 if (hdr.va.end < hdr.va.start) { 559 drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n", 560 hdr.va.end, hdr.va.start); 561 return -EINVAL; 562 } 563 564 if (hdr.data.end > fw->size) { 565 drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n", 566 hdr.data.end, fw->size); 567 return -EINVAL; 568 } 569 570 if (!IS_ALIGNED(hdr.va.start, vm_pgsz) || !IS_ALIGNED(hdr.va.end, vm_pgsz)) { 571 drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n", 572 hdr.va.start, hdr.va.end); 573 return -EINVAL; 574 } 575 576 if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS) { 577 drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n", 578 hdr.flags); 579 return -EINVAL; 580 } 581 582 if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_PROT) { 583 drm_warn(&ptdev->base, 584 "Firmware protected mode entry is not supported, ignoring"); 585 return 0; 586 } 587 588 if (hdr.va.start == CSF_MCU_SHARED_REGION_START && 589 !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED)) { 590 drm_err(&ptdev->base, 591 "Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START); 592 return -EINVAL; 593 } 594 595 name_len = iter->size - iter->offset; 596 597 section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL); 598 if (!section) 599 return -ENOMEM; 600 601 list_add_tail(&section->node, &ptdev->fw->sections); 602 section->flags = hdr.flags; 603 section->data.size = hdr.data.end - hdr.data.start; 604 605 if (section->data.size > 0) { 606 void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL); 607 608 if (!data) 609 return -ENOMEM; 610 611 memcpy(data, fw->data + hdr.data.start, section->data.size); 612 section->data.buf = data; 613 } 614 615 if (name_len > 0) { 616 char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL); 617 618 if (!name) 619 return -ENOMEM; 620 621 memcpy(name, iter->data + iter->offset, name_len); 622 name[name_len] = '\0'; 623 section->name = name; 624 } 625 626 section_size = hdr.va.end - hdr.va.start; 627 if (section_size) { 628 u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK; 629 struct panthor_gem_object *bo; 630 u32 vm_map_flags = 0; 631 struct sg_table *sgt; 632 u64 va = hdr.va.start; 633 634 if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_WR)) 635 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY; 636 637 if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_EX)) 638 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC; 639 640 /* TODO: CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_*_COHERENT are mapped to 641 * non-cacheable for now. We might want to introduce a new 642 * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device 643 * memory and is currently not used by our driver) for 644 * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit 645 * of IO-coherent systems. 646 */ 647 if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED) 648 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED; 649 650 section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), 651 section_size, 652 DRM_PANTHOR_BO_NO_MMAP, 653 vm_map_flags, va, "FW section"); 654 if (IS_ERR(section->mem)) 655 return PTR_ERR(section->mem); 656 657 if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start)) 658 return -EINVAL; 659 660 if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED) { 661 ret = panthor_kernel_bo_vmap(section->mem); 662 if (ret) 663 return ret; 664 } 665 666 panthor_fw_init_section_mem(ptdev, section); 667 668 bo = to_panthor_bo(section->mem->obj); 669 sgt = drm_gem_shmem_get_pages_sgt(&bo->base); 670 if (IS_ERR(sgt)) 671 return PTR_ERR(sgt); 672 673 dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE); 674 } 675 676 if (hdr.va.start == CSF_MCU_SHARED_REGION_START) 677 ptdev->fw->shared_section = section; 678 679 return 0; 680} 681 682static int panthor_fw_read_build_info(struct panthor_device *ptdev, 683 const struct firmware *fw, 684 struct panthor_fw_binary_iter *iter, 685 u32 ehdr) 686{ 687 struct panthor_fw_build_info_hdr hdr; 688 static const char git_sha_header[] = "git_sha: "; 689 const int header_len = sizeof(git_sha_header) - 1; 690 int ret; 691 692 ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr)); 693 if (ret) 694 return ret; 695 696 if (hdr.meta_start > fw->size || 697 hdr.meta_start + hdr.meta_size > fw->size) { 698 drm_err(&ptdev->base, "Firmware build info corrupt\n"); 699 /* We don't need the build info, so continue */ 700 return 0; 701 } 702 703 if (memcmp(git_sha_header, fw->data + hdr.meta_start, header_len)) { 704 /* Not the expected header, this isn't metadata we understand */ 705 return 0; 706 } 707 708 /* Check that the git SHA is NULL terminated as expected */ 709 if (fw->data[hdr.meta_start + hdr.meta_size - 1] != '\0') { 710 drm_warn(&ptdev->base, "Firmware's git sha is not NULL terminated\n"); 711 /* Don't treat as fatal */ 712 return 0; 713 } 714 715 drm_info(&ptdev->base, "Firmware git sha: %s\n", 716 fw->data + hdr.meta_start + header_len); 717 718 return 0; 719} 720 721static void 722panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload) 723{ 724 struct panthor_fw_section *section; 725 726 list_for_each_entry(section, &ptdev->fw->sections, node) { 727 struct sg_table *sgt; 728 729 if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_WR)) 730 continue; 731 732 panthor_fw_init_section_mem(ptdev, section); 733 sgt = drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base); 734 if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt))) 735 dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE); 736 } 737} 738 739static int panthor_fw_load_entry(struct panthor_device *ptdev, 740 const struct firmware *fw, 741 struct panthor_fw_binary_iter *iter) 742{ 743 struct panthor_fw_binary_iter eiter; 744 u32 ehdr; 745 int ret; 746 747 ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr)); 748 if (ret) 749 return ret; 750 751 if ((iter->offset % sizeof(u32)) || 752 (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) { 753 drm_err(&ptdev->base, "Firmware entry is not 32-bit aligned, offset=0x%x size=0x%x\n", 754 (u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr)); 755 return -EINVAL; 756 } 757 758 if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter, 759 CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr))) 760 return -EINVAL; 761 762 switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) { 763 case CSF_FW_BINARY_ENTRY_TYPE_IFACE: 764 return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr); 765 case CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA: 766 return panthor_fw_read_build_info(ptdev, fw, &eiter, ehdr); 767 768 /* FIXME: handle those entry types? */ 769 case CSF_FW_BINARY_ENTRY_TYPE_CONFIG: 770 case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: 771 case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: 772 case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: 773 return 0; 774 default: 775 break; 776 } 777 778 if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL) 779 return 0; 780 781 drm_err(&ptdev->base, 782 "Unsupported non-optional entry type %u in firmware\n", 783 CSF_FW_BINARY_ENTRY_TYPE(ehdr)); 784 return -EINVAL; 785} 786 787static int panthor_fw_load(struct panthor_device *ptdev) 788{ 789 const struct firmware *fw = NULL; 790 struct panthor_fw_binary_iter iter = {}; 791 struct panthor_fw_binary_hdr hdr; 792 char fw_path[128]; 793 int ret; 794 795 snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s", 796 (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id), 797 (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id), 798 CSF_FW_NAME); 799 800 ret = request_firmware(&fw, fw_path, ptdev->base.dev); 801 if (ret) { 802 drm_err(&ptdev->base, "Failed to load firmware image '%s'\n", 803 CSF_FW_NAME); 804 return ret; 805 } 806 807 iter.data = fw->data; 808 iter.size = fw->size; 809 ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr)); 810 if (ret) 811 goto out; 812 813 if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) { 814 ret = -EINVAL; 815 drm_err(&ptdev->base, "Invalid firmware magic\n"); 816 goto out; 817 } 818 819 if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) { 820 ret = -EINVAL; 821 drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n", 822 hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX); 823 goto out; 824 } 825 826 if (hdr.size > iter.size) { 827 drm_err(&ptdev->base, "Firmware image is truncated\n"); 828 goto out; 829 } 830 831 iter.size = hdr.size; 832 833 while (iter.offset < hdr.size) { 834 ret = panthor_fw_load_entry(ptdev, fw, &iter); 835 if (ret) 836 goto out; 837 } 838 839 if (!ptdev->fw->shared_section) { 840 drm_err(&ptdev->base, "Shared interface region not found\n"); 841 ret = -EINVAL; 842 goto out; 843 } 844 845out: 846 release_firmware(fw); 847 return ret; 848} 849 850/** 851 * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address 852 * @ptdev: Device. 853 * @mcu_va: MCU address. 854 * 855 * Return: NULL if the address is not part of the shared section, non-NULL otherwise. 856 */ 857static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va) 858{ 859 u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem); 860 u64 shared_mem_end = shared_mem_start + 861 panthor_kernel_bo_size(ptdev->fw->shared_section->mem); 862 if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end) 863 return NULL; 864 865 return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start); 866} 867 868static int panthor_init_cs_iface(struct panthor_device *ptdev, 869 unsigned int csg_idx, unsigned int cs_idx) 870{ 871 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 872 struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx); 873 struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx]; 874 u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem); 875 u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + 876 (csg_idx * glb_iface->control->group_stride) + 877 CSF_STREAM_CONTROL_OFFSET + 878 (cs_idx * csg_iface->control->stream_stride); 879 struct panthor_fw_cs_iface *first_cs_iface = 880 panthor_fw_get_cs_iface(ptdev, 0, 0); 881 882 if (iface_offset + sizeof(*cs_iface) >= shared_section_sz) 883 return -EINVAL; 884 885 spin_lock_init(&cs_iface->lock); 886 cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset; 887 cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va); 888 cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va); 889 890 if (!cs_iface->input || !cs_iface->output) { 891 drm_err(&ptdev->base, "Invalid stream control interface input/output VA"); 892 return -EINVAL; 893 } 894 895 if (cs_iface != first_cs_iface) { 896 if (cs_iface->control->features != first_cs_iface->control->features) { 897 drm_err(&ptdev->base, "Expecting identical CS slots"); 898 return -EINVAL; 899 } 900 } else { 901 u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features); 902 903 ptdev->csif_info.cs_reg_count = reg_count; 904 ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT; 905 } 906 907 return 0; 908} 909 910static bool compare_csg(const struct panthor_fw_csg_control_iface *a, 911 const struct panthor_fw_csg_control_iface *b) 912{ 913 if (a->features != b->features) 914 return false; 915 if (a->suspend_size != b->suspend_size) 916 return false; 917 if (a->protm_suspend_size != b->protm_suspend_size) 918 return false; 919 if (a->stream_num != b->stream_num) 920 return false; 921 return true; 922} 923 924static int panthor_init_csg_iface(struct panthor_device *ptdev, 925 unsigned int csg_idx) 926{ 927 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 928 struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx]; 929 u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem); 930 u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride); 931 unsigned int i; 932 933 if (iface_offset + sizeof(*csg_iface) >= shared_section_sz) 934 return -EINVAL; 935 936 spin_lock_init(&csg_iface->lock); 937 csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset; 938 csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va); 939 csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va); 940 941 if (csg_iface->control->stream_num < MIN_CS_PER_CSG || 942 csg_iface->control->stream_num > MAX_CS_PER_CSG) 943 return -EINVAL; 944 945 if (!csg_iface->input || !csg_iface->output) { 946 drm_err(&ptdev->base, "Invalid group control interface input/output VA"); 947 return -EINVAL; 948 } 949 950 if (csg_idx > 0) { 951 struct panthor_fw_csg_iface *first_csg_iface = 952 panthor_fw_get_csg_iface(ptdev, 0); 953 954 if (!compare_csg(first_csg_iface->control, csg_iface->control)) { 955 drm_err(&ptdev->base, "Expecting identical CSG slots"); 956 return -EINVAL; 957 } 958 } 959 960 for (i = 0; i < csg_iface->control->stream_num; i++) { 961 int ret = panthor_init_cs_iface(ptdev, csg_idx, i); 962 963 if (ret) 964 return ret; 965 } 966 967 return 0; 968} 969 970static u32 panthor_get_instr_features(struct panthor_device *ptdev) 971{ 972 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 973 974 if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0)) 975 return 0; 976 977 return glb_iface->control->instr_features; 978} 979 980static int panthor_fw_init_ifaces(struct panthor_device *ptdev) 981{ 982 struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global; 983 unsigned int i; 984 985 if (!ptdev->fw->shared_section->mem->kmap) 986 return -EINVAL; 987 988 spin_lock_init(&glb_iface->lock); 989 glb_iface->control = ptdev->fw->shared_section->mem->kmap; 990 991 if (!glb_iface->control->version) { 992 drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot"); 993 return -EINVAL; 994 } 995 996 glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va); 997 glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va); 998 if (!glb_iface->input || !glb_iface->output) { 999 drm_err(&ptdev->base, "Invalid global control interface input/output VA"); 1000 return -EINVAL; 1001 } 1002 1003 if (glb_iface->control->group_num > MAX_CSGS || 1004 glb_iface->control->group_num < MIN_CSGS) { 1005 drm_err(&ptdev->base, "Invalid number of control groups"); 1006 return -EINVAL; 1007 } 1008 1009 for (i = 0; i < glb_iface->control->group_num; i++) { 1010 int ret = panthor_init_csg_iface(ptdev, i); 1011 1012 if (ret) 1013 return ret; 1014 } 1015 1016 drm_info(&ptdev->base, "CSF FW using interface v%d.%d.%d, Features %#x Instrumentation features %#x", 1017 CSF_IFACE_VERSION_MAJOR(glb_iface->control->version), 1018 CSF_IFACE_VERSION_MINOR(glb_iface->control->version), 1019 CSF_IFACE_VERSION_PATCH(glb_iface->control->version), 1020 glb_iface->control->features, 1021 panthor_get_instr_features(ptdev)); 1022 return 0; 1023} 1024 1025static void panthor_fw_init_global_iface(struct panthor_device *ptdev) 1026{ 1027 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1028 1029 /* Enable all cores. */ 1030 glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present; 1031 1032 /* Setup timers. */ 1033 glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US); 1034 glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT; 1035 glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US); 1036 1037 /* Enable interrupts we care about. */ 1038 glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN | 1039 GLB_PING | 1040 GLB_CFG_PROGRESS_TIMER | 1041 GLB_CFG_POWEROFF_TIMER | 1042 GLB_IDLE_EN | 1043 GLB_IDLE; 1044 1045 if (panthor_fw_has_glb_state(ptdev)) 1046 glb_iface->input->ack_irq_mask |= GLB_STATE_MASK; 1047 1048 panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN | GLB_COUNTER_EN, 1049 GLB_IDLE_EN | GLB_COUNTER_EN); 1050 panthor_fw_toggle_reqs(glb_iface, req, ack, 1051 GLB_CFG_ALLOC_EN | 1052 GLB_CFG_POWEROFF_TIMER | 1053 GLB_CFG_PROGRESS_TIMER); 1054 1055 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); 1056 1057 /* Kick the watchdog. */ 1058 mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work, 1059 msecs_to_jiffies(PING_INTERVAL_MS)); 1060} 1061 1062static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status) 1063{ 1064 u32 duration; 1065 u64 start = 0; 1066 1067 if (tracepoint_enabled(gpu_job_irq)) 1068 start = ktime_get_ns(); 1069 1070 gpu_write(ptdev, JOB_INT_CLEAR, status); 1071 1072 if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF)) 1073 ptdev->fw->booted = true; 1074 1075 wake_up_all(&ptdev->fw->req_waitqueue); 1076 1077 /* If the FW is not booted, don't process IRQs, just flag the FW as booted. */ 1078 if (!ptdev->fw->booted) 1079 return; 1080 1081 panthor_sched_report_fw_events(ptdev, status); 1082 1083 if (tracepoint_enabled(gpu_job_irq) && start) { 1084 if (check_sub_overflow(ktime_get_ns(), start, &duration)) 1085 duration = U32_MAX; 1086 trace_gpu_job_irq(ptdev->base.dev, status, duration); 1087 } 1088} 1089PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler); 1090 1091static int panthor_fw_start(struct panthor_device *ptdev) 1092{ 1093 bool timedout = false; 1094 1095 ptdev->fw->booted = false; 1096 panthor_job_irq_enable_events(&ptdev->fw->irq, ~0); 1097 panthor_job_irq_resume(&ptdev->fw->irq); 1098 gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO); 1099 1100 if (!wait_event_timeout(ptdev->fw->req_waitqueue, 1101 ptdev->fw->booted, 1102 msecs_to_jiffies(1000))) { 1103 if (!ptdev->fw->booted && 1104 !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF)) 1105 timedout = true; 1106 } 1107 1108 if (timedout) { 1109 static const char * const status_str[] = { 1110 [MCU_STATUS_DISABLED] = "disabled", 1111 [MCU_STATUS_ENABLED] = "enabled", 1112 [MCU_STATUS_HALT] = "halt", 1113 [MCU_STATUS_FATAL] = "fatal", 1114 }; 1115 u32 status = gpu_read(ptdev, MCU_STATUS); 1116 1117 drm_err(&ptdev->base, "Failed to boot MCU (status=%s)", 1118 status < ARRAY_SIZE(status_str) ? status_str[status] : "unknown"); 1119 return -ETIMEDOUT; 1120 } 1121 1122 return 0; 1123} 1124 1125static void panthor_fw_stop(struct panthor_device *ptdev) 1126{ 1127 u32 status; 1128 1129 gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE); 1130 if (gpu_read_poll_timeout(ptdev, MCU_STATUS, status, 1131 status == MCU_STATUS_DISABLED, 10, 100000)) 1132 drm_err(&ptdev->base, "Failed to stop MCU"); 1133} 1134 1135static bool panthor_fw_mcu_halted(struct panthor_device *ptdev) 1136{ 1137 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1138 bool halted; 1139 1140 halted = gpu_read(ptdev, MCU_STATUS) == MCU_STATUS_HALT; 1141 1142 if (panthor_fw_has_glb_state(ptdev)) 1143 halted &= (GLB_STATE_GET(glb_iface->output->ack) == GLB_STATE_HALT); 1144 1145 return halted; 1146} 1147 1148static void panthor_fw_halt_mcu(struct panthor_device *ptdev) 1149{ 1150 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1151 1152 if (panthor_fw_has_glb_state(ptdev)) 1153 panthor_fw_update_reqs(glb_iface, req, GLB_STATE(GLB_STATE_HALT), GLB_STATE_MASK); 1154 else 1155 panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT); 1156 1157 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); 1158} 1159 1160static bool panthor_fw_wait_mcu_halted(struct panthor_device *ptdev) 1161{ 1162 bool halted = false; 1163 1164 if (read_poll_timeout_atomic(panthor_fw_mcu_halted, halted, halted, 10, 1165 MCU_HALT_TIMEOUT_US, 0, ptdev)) { 1166 drm_warn(&ptdev->base, "Timed out waiting for MCU to halt"); 1167 return false; 1168 } 1169 1170 return true; 1171} 1172 1173static void panthor_fw_mcu_set_active(struct panthor_device *ptdev) 1174{ 1175 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1176 1177 if (panthor_fw_has_glb_state(ptdev)) 1178 panthor_fw_update_reqs(glb_iface, req, GLB_STATE(GLB_STATE_ACTIVE), GLB_STATE_MASK); 1179 else 1180 panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT); 1181} 1182 1183/** 1184 * panthor_fw_pre_reset() - Call before a reset. 1185 * @ptdev: Device. 1186 * @on_hang: true if the reset was triggered on a GPU hang. 1187 * 1188 * If the reset is not triggered on a hang, we try to gracefully halt the 1189 * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called. 1190 */ 1191void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang) 1192{ 1193 /* Make sure we won't be woken up by a ping. */ 1194 cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work); 1195 1196 ptdev->reset.fast = false; 1197 1198 if (!on_hang) { 1199 panthor_fw_halt_mcu(ptdev); 1200 if (!panthor_fw_wait_mcu_halted(ptdev)) 1201 drm_warn(&ptdev->base, "Failed to cleanly suspend MCU"); 1202 else 1203 ptdev->reset.fast = true; 1204 } 1205 1206 panthor_job_irq_suspend(&ptdev->fw->irq); 1207 panthor_fw_stop(ptdev); 1208} 1209 1210/** 1211 * panthor_fw_post_reset() - Call after a reset. 1212 * @ptdev: Device. 1213 * 1214 * Start the FW. If this is not a fast reset, all FW sections are reloaded to 1215 * make sure we can recover from a memory corruption. 1216 */ 1217int panthor_fw_post_reset(struct panthor_device *ptdev) 1218{ 1219 int ret; 1220 1221 /* Make the MCU VM active. */ 1222 ret = panthor_vm_active(ptdev->fw->vm); 1223 if (ret) 1224 return ret; 1225 1226 if (!ptdev->reset.fast) { 1227 /* On a slow reset, reload all sections, including RO ones. 1228 * We're not supposed to end up here anyway, let's just assume 1229 * the overhead of reloading everything is acceptable. 1230 */ 1231 panthor_reload_fw_sections(ptdev, true); 1232 } else { 1233 /* 1234 * If the FW was previously successfully halted in the pre-reset 1235 * operation, we need to transition it to active again before 1236 * the FW is rebooted. 1237 * This is not needed on a slow reset because FW sections are 1238 * re-initialized. 1239 */ 1240 panthor_fw_mcu_set_active(ptdev); 1241 } 1242 1243 ret = panthor_fw_start(ptdev); 1244 if (ret) { 1245 drm_err(&ptdev->base, "FW %s reset failed", 1246 ptdev->reset.fast ? "fast" : "slow"); 1247 return ret; 1248 } 1249 1250 /* We must re-initialize the global interface even on fast-reset. */ 1251 panthor_fw_init_global_iface(ptdev); 1252 return 0; 1253} 1254 1255/** 1256 * panthor_fw_unplug() - Called when the device is unplugged. 1257 * @ptdev: Device. 1258 * 1259 * This function must make sure all pending operations are flushed before 1260 * will release device resources, thus preventing any interaction with 1261 * the HW. 1262 * 1263 * If there is still FW-related work running after this function returns, 1264 * they must use drm_dev_{enter,exit}() and skip any HW access when 1265 * drm_dev_enter() returns false. 1266 */ 1267void panthor_fw_unplug(struct panthor_device *ptdev) 1268{ 1269 struct panthor_fw_section *section; 1270 1271 disable_delayed_work_sync(&ptdev->fw->watchdog.ping_work); 1272 1273 if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) { 1274 /* Make sure the IRQ handler cannot be called after that point. */ 1275 if (ptdev->fw->irq.irq) 1276 panthor_job_irq_suspend(&ptdev->fw->irq); 1277 1278 panthor_fw_stop(ptdev); 1279 } 1280 1281 list_for_each_entry(section, &ptdev->fw->sections, node) 1282 panthor_kernel_bo_destroy(section->mem); 1283 1284 /* We intentionally don't call panthor_vm_idle() and let 1285 * panthor_mmu_unplug() release the AS we acquired with 1286 * panthor_vm_active() so we don't have to track the VM active/idle 1287 * state to keep the active_refcnt balanced. 1288 */ 1289 panthor_vm_put(ptdev->fw->vm); 1290 ptdev->fw->vm = NULL; 1291 1292 if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) 1293 panthor_hw_l2_power_off(ptdev); 1294} 1295 1296/** 1297 * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW. 1298 * @req_ptr: Pointer to the req register. 1299 * @ack_ptr: Pointer to the ack register. 1300 * @wq: Wait queue to use for the sleeping wait. 1301 * @req_mask: Mask of requests to wait for. 1302 * @acked: Pointer to field that's updated with the acked requests. 1303 * If the function returns 0, *acked == req_mask. 1304 * @timeout_ms: Timeout expressed in milliseconds. 1305 * 1306 * Return: 0 on success, -ETIMEDOUT otherwise. 1307 */ 1308static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr, 1309 wait_queue_head_t *wq, 1310 u32 req_mask, u32 *acked, 1311 u32 timeout_ms) 1312{ 1313 u32 ack, req = READ_ONCE(*req_ptr) & req_mask; 1314 int ret; 1315 1316 /* Busy wait for a few µsecs before falling back to a sleeping wait. */ 1317 *acked = req_mask; 1318 ret = read_poll_timeout_atomic(READ_ONCE, ack, 1319 (ack & req_mask) == req, 1320 0, 10, 0, 1321 *ack_ptr); 1322 if (!ret) 1323 return 0; 1324 1325 if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req, 1326 msecs_to_jiffies(timeout_ms))) 1327 return 0; 1328 1329 /* Check one last time, in case we were not woken up for some reason. */ 1330 ack = READ_ONCE(*ack_ptr); 1331 if ((ack & req_mask) == req) 1332 return 0; 1333 1334 *acked = ~(req ^ ack) & req_mask; 1335 return -ETIMEDOUT; 1336} 1337 1338/** 1339 * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged. 1340 * @ptdev: Device. 1341 * @req_mask: Mask of requests to wait for. 1342 * @acked: Pointer to field that's updated with the acked requests. 1343 * If the function returns 0, *acked == req_mask. 1344 * @timeout_ms: Timeout expressed in milliseconds. 1345 * 1346 * Return: 0 on success, -ETIMEDOUT otherwise. 1347 */ 1348int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, 1349 u32 req_mask, u32 *acked, 1350 u32 timeout_ms) 1351{ 1352 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1353 1354 /* GLB_HALT doesn't get acked through the FW interface. */ 1355 if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT))) 1356 return -EINVAL; 1357 1358 return panthor_fw_wait_acks(&glb_iface->input->req, 1359 &glb_iface->output->ack, 1360 &ptdev->fw->req_waitqueue, 1361 req_mask, acked, timeout_ms); 1362} 1363 1364/** 1365 * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged. 1366 * @ptdev: Device. 1367 * @csg_slot: CSG slot ID. 1368 * @req_mask: Mask of requests to wait for. 1369 * @acked: Pointer to field that's updated with the acked requests. 1370 * If the function returns 0, *acked == req_mask. 1371 * @timeout_ms: Timeout expressed in milliseconds. 1372 * 1373 * Return: 0 on success, -ETIMEDOUT otherwise. 1374 */ 1375int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot, 1376 u32 req_mask, u32 *acked, u32 timeout_ms) 1377{ 1378 struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot); 1379 int ret; 1380 1381 if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK)) 1382 return -EINVAL; 1383 1384 ret = panthor_fw_wait_acks(&csg_iface->input->req, 1385 &csg_iface->output->ack, 1386 &ptdev->fw->req_waitqueue, 1387 req_mask, acked, timeout_ms); 1388 1389 /* 1390 * Check that all bits in the state field were updated, if any mismatch 1391 * then clear all bits in the state field. This allows code to do 1392 * (acked & CSG_STATE_MASK) and get the right value. 1393 */ 1394 1395 if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK) 1396 *acked &= ~CSG_STATE_MASK; 1397 1398 return ret; 1399} 1400 1401/** 1402 * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells. 1403 * @ptdev: Device. 1404 * @csg_mask: Bitmask encoding the command stream group doorbells to ring. 1405 * 1406 * This function is toggling bits in the doorbell_req and ringing the 1407 * global doorbell. It doesn't require a user doorbell to be attached to 1408 * the group. 1409 */ 1410void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask) 1411{ 1412 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1413 1414 panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask); 1415 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); 1416} 1417 1418static void panthor_fw_ping_work(struct work_struct *work) 1419{ 1420 struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work); 1421 struct panthor_device *ptdev = fw->irq.ptdev; 1422 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1423 u32 acked; 1424 int ret; 1425 1426 if (panthor_device_reset_is_pending(ptdev)) 1427 return; 1428 1429 panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING); 1430 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); 1431 1432 ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100); 1433 if (ret) { 1434 panthor_device_schedule_reset(ptdev); 1435 drm_err(&ptdev->base, "FW ping timeout, scheduling a reset"); 1436 } else { 1437 mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work, 1438 msecs_to_jiffies(PING_INTERVAL_MS)); 1439 } 1440} 1441 1442/** 1443 * panthor_fw_init() - Initialize FW related data. 1444 * @ptdev: Device. 1445 * 1446 * Return: 0 on success, a negative error code otherwise. 1447 */ 1448int panthor_fw_init(struct panthor_device *ptdev) 1449{ 1450 struct panthor_fw *fw; 1451 int ret, irq; 1452 1453 fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL); 1454 if (!fw) 1455 return -ENOMEM; 1456 1457 ptdev->fw = fw; 1458 init_waitqueue_head(&fw->req_waitqueue); 1459 INIT_LIST_HEAD(&fw->sections); 1460 INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work); 1461 1462 irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job"); 1463 if (irq <= 0) 1464 return -ENODEV; 1465 1466 ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0); 1467 if (ret) { 1468 drm_err(&ptdev->base, "failed to request job irq"); 1469 return ret; 1470 } 1471 1472 ret = panthor_hw_l2_power_on(ptdev); 1473 if (ret) 1474 return ret; 1475 1476 fw->vm = panthor_vm_create(ptdev, true, 1477 0, SZ_4G, 1478 CSF_MCU_SHARED_REGION_START, 1479 CSF_MCU_SHARED_REGION_SIZE); 1480 if (IS_ERR(fw->vm)) { 1481 ret = PTR_ERR(fw->vm); 1482 fw->vm = NULL; 1483 goto err_unplug_fw; 1484 } 1485 1486 ret = panthor_fw_load(ptdev); 1487 if (ret) 1488 goto err_unplug_fw; 1489 1490 ret = panthor_vm_active(fw->vm); 1491 if (ret) 1492 goto err_unplug_fw; 1493 1494 ret = panthor_fw_start(ptdev); 1495 if (ret) 1496 goto err_unplug_fw; 1497 1498 ret = panthor_fw_init_ifaces(ptdev); 1499 if (ret) 1500 goto err_unplug_fw; 1501 1502 panthor_fw_init_global_iface(ptdev); 1503 return 0; 1504 1505err_unplug_fw: 1506 panthor_fw_unplug(ptdev); 1507 return ret; 1508} 1509 1510MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin"); 1511MODULE_FIRMWARE("arm/mali/arch10.10/mali_csffw.bin"); 1512MODULE_FIRMWARE("arm/mali/arch10.12/mali_csffw.bin"); 1513MODULE_FIRMWARE("arm/mali/arch11.8/mali_csffw.bin"); 1514MODULE_FIRMWARE("arm/mali/arch12.8/mali_csffw.bin"); 1515MODULE_FIRMWARE("arm/mali/arch13.8/mali_csffw.bin"); 1516MODULE_FIRMWARE("arm/mali/arch14.8/mali_csffw.bin");