Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

accel/amdxdna: Use MSG_OP_CHAIN_EXEC_NPU when supported

MSG_OP_CHAIN_EXEC_NPU is a unified mailbox message that replaces
MSG_OP_CHAIN_EXEC_BUFFER_CF and MSG_OP_CHAIN_EXEC_DPU.

Add driver logic to check firmware version, and if MSG_OP_CHAIN_EXEC_NPU
is supported, uses it to submit firmware commands.

Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20251031014700.2919349-1-lizhi.hou@amd.com

Lizhi Hou 71829d7f 3668133e

+434 -209
+328 -199
drivers/accel/amdxdna/aie2_message.c
··· 27 27 #define DECLARE_AIE2_MSG(name, op) \ 28 28 DECLARE_XDNA_MSG_COMMON(name, op, MAX_AIE2_STATUS_CODE) 29 29 30 + #define EXEC_MSG_OPS(xdna) ((xdna)->dev_handle->exec_msg_ops) 31 + 30 32 static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev, 31 33 struct xdna_mailbox_msg *msg) 32 34 { ··· 435 433 return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); 436 434 } 437 435 436 + static int aie2_init_exec_cu_req(struct amdxdna_gem_obj *cmd_bo, void *req, 437 + size_t *size, u32 *msg_op) 438 + { 439 + struct execute_buffer_req *cu_req = req; 440 + u32 cmd_len; 441 + void *cmd; 442 + 443 + cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); 444 + if (cmd_len > sizeof(cu_req->payload)) 445 + return -EINVAL; 446 + 447 + cu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); 448 + if (cu_req->cu_idx == INVALID_CU_IDX) 449 + return -EINVAL; 450 + 451 + memcpy(cu_req->payload, cmd, cmd_len); 452 + 453 + *size = sizeof(*cu_req); 454 + *msg_op = MSG_OP_EXECUTE_BUFFER_CF; 455 + return 0; 456 + } 457 + 458 + static int aie2_init_exec_dpu_req(struct amdxdna_gem_obj *cmd_bo, void *req, 459 + size_t *size, u32 *msg_op) 460 + { 461 + struct exec_dpu_req *dpu_req = req; 462 + struct amdxdna_cmd_start_npu *sn; 463 + u32 cmd_len; 464 + 465 + sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); 466 + if (cmd_len - sizeof(*sn) > sizeof(dpu_req->payload)) 467 + return -EINVAL; 468 + 469 + dpu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); 470 + if (dpu_req->cu_idx == INVALID_CU_IDX) 471 + return -EINVAL; 472 + 473 + dpu_req->inst_buf_addr = sn->buffer; 474 + dpu_req->inst_size = sn->buffer_size; 475 + dpu_req->inst_prop_cnt = sn->prop_count; 476 + memcpy(dpu_req->payload, sn->prop_args, cmd_len - sizeof(*sn)); 477 + 478 + *size = sizeof(*dpu_req); 479 + *msg_op = MSG_OP_EXEC_DPU; 480 + return 0; 481 + } 482 + 483 + static void aie2_init_exec_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt) 484 + { 485 + struct cmd_chain_req *chain_req = req; 486 + 487 + chain_req->buf_addr = slot_addr; 488 + chain_req->buf_size = size; 489 + chain_req->count = cmd_cnt; 490 + } 491 + 492 + static void aie2_init_npu_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt) 493 + { 494 + struct cmd_chain_npu_req *npu_chain_req = req; 495 + 496 + npu_chain_req->flags = 0; 497 + npu_chain_req->reserved = 0; 498 + npu_chain_req->buf_addr = slot_addr; 499 + npu_chain_req->buf_size = size; 500 + npu_chain_req->count = cmd_cnt; 501 + } 502 + 503 + static int 504 + aie2_cmdlist_fill_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) 505 + { 506 + struct cmd_chain_slot_execbuf_cf *cf_slot = slot; 507 + u32 cmd_len; 508 + void *cmd; 509 + 510 + cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); 511 + if (*size < sizeof(*cf_slot) + cmd_len) 512 + return -EINVAL; 513 + 514 + cf_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); 515 + if (cf_slot->cu_idx == INVALID_CU_IDX) 516 + return -EINVAL; 517 + 518 + cf_slot->arg_cnt = cmd_len / sizeof(u32); 519 + memcpy(cf_slot->args, cmd, cmd_len); 520 + /* Accurate slot size to hint firmware to do necessary copy */ 521 + *size = sizeof(*cf_slot) + cmd_len; 522 + return 0; 523 + } 524 + 525 + static int 526 + aie2_cmdlist_fill_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) 527 + { 528 + struct cmd_chain_slot_dpu *dpu_slot = slot; 529 + struct amdxdna_cmd_start_npu *sn; 530 + u32 cmd_len; 531 + u32 arg_sz; 532 + 533 + sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); 534 + arg_sz = cmd_len - sizeof(*sn); 535 + if (cmd_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE) 536 + return -EINVAL; 537 + 538 + if (*size < sizeof(*dpu_slot) + arg_sz) 539 + return -EINVAL; 540 + 541 + dpu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); 542 + if (dpu_slot->cu_idx == INVALID_CU_IDX) 543 + return -EINVAL; 544 + 545 + dpu_slot->inst_buf_addr = sn->buffer; 546 + dpu_slot->inst_size = sn->buffer_size; 547 + dpu_slot->inst_prop_cnt = sn->prop_count; 548 + dpu_slot->arg_cnt = arg_sz / sizeof(u32); 549 + memcpy(dpu_slot->args, sn->prop_args, arg_sz); 550 + 551 + /* Accurate slot size to hint firmware to do necessary copy */ 552 + *size = sizeof(*dpu_slot) + arg_sz; 553 + return 0; 554 + } 555 + 556 + static u32 aie2_get_chain_msg_op(u32 cmd_op) 557 + { 558 + switch (cmd_op) { 559 + case ERT_START_CU: 560 + return MSG_OP_CHAIN_EXEC_BUFFER_CF; 561 + case ERT_START_NPU: 562 + return MSG_OP_CHAIN_EXEC_DPU; 563 + default: 564 + break; 565 + } 566 + 567 + return MSG_OP_MAX_OPCODE; 568 + } 569 + 570 + static struct aie2_exec_msg_ops legacy_exec_message_ops = { 571 + .init_cu_req = aie2_init_exec_cu_req, 572 + .init_dpu_req = aie2_init_exec_dpu_req, 573 + .init_chain_req = aie2_init_exec_chain_req, 574 + .fill_cf_slot = aie2_cmdlist_fill_cf, 575 + .fill_dpu_slot = aie2_cmdlist_fill_dpu, 576 + .get_chain_msg_op = aie2_get_chain_msg_op, 577 + }; 578 + 579 + static int 580 + aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) 581 + { 582 + struct cmd_chain_slot_npu *npu_slot = slot; 583 + u32 cmd_len; 584 + void *cmd; 585 + 586 + cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); 587 + if (*size < sizeof(*npu_slot) + cmd_len) 588 + return -EINVAL; 589 + 590 + npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); 591 + if (npu_slot->cu_idx == INVALID_CU_IDX) 592 + return -EINVAL; 593 + 594 + memset(npu_slot, 0, sizeof(*npu_slot)); 595 + npu_slot->type = EXEC_NPU_TYPE_NON_ELF; 596 + npu_slot->arg_cnt = cmd_len / sizeof(u32); 597 + memcpy(npu_slot->args, cmd, cmd_len); 598 + 599 + *size = sizeof(*npu_slot) + cmd_len; 600 + return 0; 601 + } 602 + 603 + static int 604 + aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) 605 + { 606 + struct cmd_chain_slot_npu *npu_slot = slot; 607 + struct amdxdna_cmd_start_npu *sn; 608 + u32 cmd_len; 609 + u32 arg_sz; 610 + 611 + sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); 612 + arg_sz = cmd_len - sizeof(*sn); 613 + if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE) 614 + return -EINVAL; 615 + 616 + if (*size < sizeof(*npu_slot) + arg_sz) 617 + return -EINVAL; 618 + 619 + npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); 620 + if (npu_slot->cu_idx == INVALID_CU_IDX) 621 + return -EINVAL; 622 + 623 + memset(npu_slot, 0, sizeof(*npu_slot)); 624 + npu_slot->type = EXEC_NPU_TYPE_PARTIAL_ELF; 625 + npu_slot->inst_buf_addr = sn->buffer; 626 + npu_slot->inst_size = sn->buffer_size; 627 + npu_slot->inst_prop_cnt = sn->prop_count; 628 + npu_slot->arg_cnt = arg_sz / sizeof(u32); 629 + memcpy(npu_slot->args, sn->prop_args, arg_sz); 630 + 631 + *size = sizeof(*npu_slot) + arg_sz; 632 + return 0; 633 + } 634 + 635 + static u32 aie2_get_npu_chain_msg_op(u32 cmd_op) 636 + { 637 + return MSG_OP_CHAIN_EXEC_NPU; 638 + } 639 + 640 + static struct aie2_exec_msg_ops npu_exec_message_ops = { 641 + .init_cu_req = aie2_init_exec_cu_req, 642 + .init_dpu_req = aie2_init_exec_dpu_req, 643 + .init_chain_req = aie2_init_npu_chain_req, 644 + .fill_cf_slot = aie2_cmdlist_fill_npu_cf, 645 + .fill_dpu_slot = aie2_cmdlist_fill_npu_dpu, 646 + .get_chain_msg_op = aie2_get_npu_chain_msg_op, 647 + }; 648 + 649 + static int aie2_init_exec_req(void *req, struct amdxdna_gem_obj *cmd_abo, 650 + size_t *size, u32 *msg_op) 651 + { 652 + struct amdxdna_dev *xdna = cmd_abo->client->xdna; 653 + int ret; 654 + u32 op; 655 + 656 + 657 + op = amdxdna_cmd_get_op(cmd_abo); 658 + switch (op) { 659 + case ERT_START_CU: 660 + ret = EXEC_MSG_OPS(xdna)->init_cu_req(cmd_abo, req, size, msg_op); 661 + if (ret) { 662 + XDNA_DBG(xdna, "Init CU req failed ret %d", ret); 663 + return ret; 664 + } 665 + break; 666 + case ERT_START_NPU: 667 + ret = EXEC_MSG_OPS(xdna)->init_dpu_req(cmd_abo, req, size, msg_op); 668 + if (ret) { 669 + XDNA_DBG(xdna, "Init DPU req failed ret %d", ret); 670 + return ret; 671 + } 672 + 673 + break; 674 + default: 675 + XDNA_ERR(xdna, "Unsupported op %d", op); 676 + ret = -EOPNOTSUPP; 677 + break; 678 + } 679 + 680 + return ret; 681 + } 682 + 683 + static int 684 + aie2_cmdlist_fill_slot(void *slot, struct amdxdna_gem_obj *cmd_abo, 685 + size_t *size, u32 *cmd_op) 686 + { 687 + struct amdxdna_dev *xdna = cmd_abo->client->xdna; 688 + int ret; 689 + u32 op; 690 + 691 + op = amdxdna_cmd_get_op(cmd_abo); 692 + if (*cmd_op == ERT_INVALID_CMD) 693 + *cmd_op = op; 694 + else if (op != *cmd_op) 695 + return -EINVAL; 696 + 697 + switch (op) { 698 + case ERT_START_CU: 699 + ret = EXEC_MSG_OPS(xdna)->fill_cf_slot(cmd_abo, slot, size); 700 + break; 701 + case ERT_START_NPU: 702 + ret = EXEC_MSG_OPS(xdna)->fill_dpu_slot(cmd_abo, slot, size); 703 + break; 704 + default: 705 + XDNA_INFO(xdna, "Unsupported op %d", op); 706 + ret = -EOPNOTSUPP; 707 + break; 708 + } 709 + 710 + return ret; 711 + } 712 + 713 + void aie2_msg_init(struct amdxdna_dev_hdl *ndev) 714 + { 715 + if (AIE2_FEATURE_ON(ndev, AIE2_NPU_COMMAND)) 716 + ndev->exec_msg_ops = &npu_exec_message_ops; 717 + else 718 + ndev->exec_msg_ops = &legacy_exec_message_ops; 719 + } 720 + 721 + static inline struct amdxdna_gem_obj * 722 + aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job) 723 + { 724 + int idx = get_job_idx(job->seq); 725 + 726 + return job->hwctx->priv->cmd_buf[idx]; 727 + } 728 + 438 729 int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, 439 730 int (*notify_cb)(void *, void __iomem *, size_t)) 440 731 { 441 732 struct mailbox_channel *chann = hwctx->priv->mbox_chann; 442 733 struct amdxdna_dev *xdna = hwctx->client->xdna; 443 734 struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; 444 - union { 445 - struct execute_buffer_req ebuf; 446 - struct exec_dpu_req dpu; 447 - } req; 448 735 struct xdna_mailbox_msg msg; 449 - u32 payload_len; 450 - void *payload; 451 - int cu_idx; 736 + union exec_req req; 452 737 int ret; 453 - u32 op; 454 738 455 739 if (!chann) 456 740 return -ENODEV; 457 741 458 - payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len); 459 - if (!payload) { 460 - XDNA_ERR(xdna, "Invalid command, cannot get payload"); 461 - return -EINVAL; 462 - } 742 + ret = aie2_init_exec_req(&req, cmd_abo, &msg.send_size, &msg.opcode); 743 + if (ret) 744 + return ret; 463 745 464 - cu_idx = amdxdna_cmd_get_cu_idx(cmd_abo); 465 - if (cu_idx < 0) { 466 - XDNA_DBG(xdna, "Invalid cu idx"); 467 - return -EINVAL; 468 - } 469 - 470 - op = amdxdna_cmd_get_op(cmd_abo); 471 - switch (op) { 472 - case ERT_START_CU: 473 - if (unlikely(payload_len > sizeof(req.ebuf.payload))) 474 - XDNA_DBG(xdna, "Invalid ebuf payload len: %d", payload_len); 475 - req.ebuf.cu_idx = cu_idx; 476 - memcpy(req.ebuf.payload, payload, sizeof(req.ebuf.payload)); 477 - msg.send_size = sizeof(req.ebuf); 478 - msg.opcode = MSG_OP_EXECUTE_BUFFER_CF; 479 - break; 480 - case ERT_START_NPU: { 481 - struct amdxdna_cmd_start_npu *sn = payload; 482 - 483 - if (unlikely(payload_len - sizeof(*sn) > sizeof(req.dpu.payload))) 484 - XDNA_DBG(xdna, "Invalid dpu payload len: %d", payload_len); 485 - req.dpu.inst_buf_addr = sn->buffer; 486 - req.dpu.inst_size = sn->buffer_size; 487 - req.dpu.inst_prop_cnt = sn->prop_count; 488 - req.dpu.cu_idx = cu_idx; 489 - memcpy(req.dpu.payload, sn->prop_args, sizeof(req.dpu.payload)); 490 - msg.send_size = sizeof(req.dpu); 491 - msg.opcode = MSG_OP_EXEC_DPU; 492 - break; 493 - } 494 - default: 495 - XDNA_DBG(xdna, "Invalid ERT cmd op code: %d", op); 496 - return -EINVAL; 497 - } 498 746 msg.handle = job; 499 747 msg.notify_cb = notify_cb; 500 748 msg.send_data = (u8 *)&req; ··· 760 508 return 0; 761 509 } 762 510 763 - static int 764 - aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset, 765 - struct amdxdna_gem_obj *abo, u32 *size) 766 - { 767 - struct cmd_chain_slot_execbuf_cf *buf = cmd_buf + offset; 768 - int cu_idx = amdxdna_cmd_get_cu_idx(abo); 769 - u32 payload_len; 770 - void *payload; 771 - 772 - if (cu_idx < 0) 773 - return -EINVAL; 774 - 775 - payload = amdxdna_cmd_get_payload(abo, &payload_len); 776 - if (!payload) 777 - return -EINVAL; 778 - 779 - if (!slot_has_space(*buf, offset, payload_len)) 780 - return -ENOSPC; 781 - 782 - buf->cu_idx = cu_idx; 783 - buf->arg_cnt = payload_len / sizeof(u32); 784 - memcpy(buf->args, payload, payload_len); 785 - /* Accurate buf size to hint firmware to do necessary copy */ 786 - *size = sizeof(*buf) + payload_len; 787 - return 0; 788 - } 789 - 790 - static int 791 - aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset, 792 - struct amdxdna_gem_obj *abo, u32 *size) 793 - { 794 - struct cmd_chain_slot_dpu *buf = cmd_buf + offset; 795 - int cu_idx = amdxdna_cmd_get_cu_idx(abo); 796 - struct amdxdna_cmd_start_npu *sn; 797 - u32 payload_len; 798 - void *payload; 799 - u32 arg_sz; 800 - 801 - if (cu_idx < 0) 802 - return -EINVAL; 803 - 804 - payload = amdxdna_cmd_get_payload(abo, &payload_len); 805 - if (!payload) 806 - return -EINVAL; 807 - sn = payload; 808 - arg_sz = payload_len - sizeof(*sn); 809 - if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE) 810 - return -EINVAL; 811 - 812 - if (!slot_has_space(*buf, offset, arg_sz)) 813 - return -ENOSPC; 814 - 815 - buf->inst_buf_addr = sn->buffer; 816 - buf->inst_size = sn->buffer_size; 817 - buf->inst_prop_cnt = sn->prop_count; 818 - buf->cu_idx = cu_idx; 819 - buf->arg_cnt = arg_sz / sizeof(u32); 820 - memcpy(buf->args, sn->prop_args, arg_sz); 821 - 822 - /* Accurate buf size to hint firmware to do necessary copy */ 823 - *size = sizeof(*buf) + arg_sz; 824 - return 0; 825 - } 826 - 827 - static int 828 - aie2_cmdlist_fill_one_slot(u32 op, struct amdxdna_gem_obj *cmdbuf_abo, u32 offset, 829 - struct amdxdna_gem_obj *abo, u32 *size) 830 - { 831 - u32 this_op = amdxdna_cmd_get_op(abo); 832 - void *cmd_buf = cmdbuf_abo->mem.kva; 833 - int ret; 834 - 835 - if (this_op != op) { 836 - ret = -EINVAL; 837 - goto done; 838 - } 839 - 840 - switch (op) { 841 - case ERT_START_CU: 842 - ret = aie2_cmdlist_fill_one_slot_cf(cmd_buf, offset, abo, size); 843 - break; 844 - case ERT_START_NPU: 845 - ret = aie2_cmdlist_fill_one_slot_dpu(cmd_buf, offset, abo, size); 846 - break; 847 - default: 848 - ret = -EOPNOTSUPP; 849 - } 850 - 851 - done: 852 - if (ret) { 853 - XDNA_ERR(abo->client->xdna, "Can't fill slot for cmd op %d ret %d", 854 - op, ret); 855 - } 856 - return ret; 857 - } 858 - 859 - static inline struct amdxdna_gem_obj * 860 - aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job) 861 - { 862 - int idx = get_job_idx(job->seq); 863 - 864 - return job->hwctx->priv->cmd_buf[idx]; 865 - } 866 - 867 - static void 868 - aie2_cmdlist_prepare_request(struct cmd_chain_req *req, 869 - struct amdxdna_gem_obj *cmdbuf_abo, u32 size, u32 cnt) 870 - { 871 - req->buf_addr = cmdbuf_abo->mem.dev_addr; 872 - req->buf_size = size; 873 - req->count = cnt; 874 - drm_clflush_virt_range(cmdbuf_abo->mem.kva, size); 875 - XDNA_DBG(cmdbuf_abo->client->xdna, "Command buf addr 0x%llx size 0x%x count %d", 876 - req->buf_addr, size, cnt); 877 - } 878 - 879 - static inline u32 880 - aie2_cmd_op_to_msg_op(u32 op) 881 - { 882 - switch (op) { 883 - case ERT_START_CU: 884 - return MSG_OP_CHAIN_EXEC_BUFFER_CF; 885 - case ERT_START_NPU: 886 - return MSG_OP_CHAIN_EXEC_DPU; 887 - default: 888 - return MSG_OP_MAX_OPCODE; 889 - } 890 - } 891 - 892 511 int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, 893 512 struct amdxdna_sched_job *job, 894 513 int (*notify_cb)(void *, void __iomem *, size_t)) ··· 768 645 struct mailbox_channel *chann = hwctx->priv->mbox_chann; 769 646 struct amdxdna_client *client = hwctx->client; 770 647 struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; 648 + struct amdxdna_dev *xdna = client->xdna; 771 649 struct amdxdna_cmd_chain *payload; 772 650 struct xdna_mailbox_msg msg; 773 - struct cmd_chain_req req; 651 + union exec_chain_req req; 774 652 u32 payload_len; 775 653 u32 offset = 0; 776 - u32 size; 654 + size_t size; 777 655 int ret; 778 656 u32 op; 779 657 u32 i; ··· 785 661 payload_len < struct_size(payload, data, payload->command_count)) 786 662 return -EINVAL; 787 663 664 + op = ERT_INVALID_CMD; 788 665 for (i = 0; i < payload->command_count; i++) { 789 666 u32 boh = (u32)(payload->data[i]); 790 667 struct amdxdna_gem_obj *abo; 791 668 792 669 abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD); 793 670 if (!abo) { 794 - XDNA_ERR(client->xdna, "Failed to find cmd BO %d", boh); 671 + XDNA_ERR(xdna, "Failed to find cmd BO %d", boh); 795 672 return -ENOENT; 796 673 } 797 674 798 - /* All sub-cmd should have same op, use the first one. */ 799 - if (i == 0) 800 - op = amdxdna_cmd_get_op(abo); 801 - 802 - ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, offset, abo, &size); 675 + size = cmdbuf_abo->mem.size - offset; 676 + ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva + offset, 677 + abo, &size, &op); 803 678 amdxdna_gem_put_obj(abo); 804 679 if (ret) 805 - return -EINVAL; 680 + return ret; 806 681 807 682 offset += size; 808 683 } 809 - 810 - /* The offset is the accumulated total size of the cmd buffer */ 811 - aie2_cmdlist_prepare_request(&req, cmdbuf_abo, offset, payload->command_count); 812 - 813 - msg.opcode = aie2_cmd_op_to_msg_op(op); 684 + msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op); 814 685 if (msg.opcode == MSG_OP_MAX_OPCODE) 815 686 return -EOPNOTSUPP; 687 + 688 + /* The offset is the accumulated total size of the cmd buffer */ 689 + EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr, 690 + offset, payload->command_count); 691 + drm_clflush_virt_range(cmdbuf_abo->mem.kva, offset); 692 + 816 693 msg.handle = job; 817 694 msg.notify_cb = notify_cb; 818 695 msg.send_data = (u8 *)&req; 819 696 msg.send_size = sizeof(req); 820 697 ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); 821 698 if (ret) { 822 - XDNA_ERR(hwctx->client->xdna, "Send message failed"); 699 + XDNA_ERR(xdna, "Send message failed"); 823 700 return ret; 824 701 } 825 702 ··· 833 708 { 834 709 struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job); 835 710 struct mailbox_channel *chann = hwctx->priv->mbox_chann; 711 + struct amdxdna_dev *xdna = hwctx->client->xdna; 836 712 struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; 837 713 struct xdna_mailbox_msg msg; 838 - struct cmd_chain_req req; 839 - u32 size; 714 + union exec_chain_req req; 715 + u32 op = ERT_INVALID_CMD; 716 + size_t size; 840 717 int ret; 841 - u32 op; 842 718 843 - op = amdxdna_cmd_get_op(cmd_abo); 844 - ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, 0, cmd_abo, &size); 719 + size = cmdbuf_abo->mem.size; 720 + ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva, cmd_abo, &size, &op); 845 721 if (ret) 846 722 return ret; 847 723 848 - aie2_cmdlist_prepare_request(&req, cmdbuf_abo, size, 1); 849 - 850 - msg.opcode = aie2_cmd_op_to_msg_op(op); 724 + msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op); 851 725 if (msg.opcode == MSG_OP_MAX_OPCODE) 852 726 return -EOPNOTSUPP; 727 + 728 + EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr, 729 + size, 1); 730 + drm_clflush_virt_range(cmdbuf_abo->mem.kva, size); 731 + 853 732 msg.handle = job; 854 733 msg.notify_cb = notify_cb; 855 734 msg.send_data = (u8 *)&req;
+39 -3
drivers/accel/amdxdna/aie2_msg_priv.h
··· 19 19 MSG_OP_CHAIN_EXEC_BUFFER_CF = 0x12, 20 20 MSG_OP_CHAIN_EXEC_DPU = 0x13, 21 21 MSG_OP_CONFIG_DEBUG_BO = 0x14, 22 + MSG_OP_CHAIN_EXEC_NPU = 0x18, 22 23 MSG_OP_MAX_XRT_OPCODE, 23 24 MSG_OP_SUSPEND = 0x101, 24 25 MSG_OP_RESUME = 0x102, ··· 149 148 __u32 cu_idx; 150 149 __u32 payload[35]; 151 150 } __packed; 151 + 152 + enum exec_npu_type { 153 + EXEC_NPU_TYPE_NON_ELF = 0x1, 154 + EXEC_NPU_TYPE_PARTIAL_ELF = 0x2, 155 + }; 156 + 157 + union exec_req { 158 + struct execute_buffer_req ebuf; 159 + struct exec_dpu_req dpu_req; 160 + }; 152 161 153 162 struct execute_buffer_resp { 154 163 enum aie2_msg_status status; ··· 331 320 } __packed; 332 321 333 322 #define MAX_CHAIN_CMDBUF_SIZE SZ_4K 334 - #define slot_has_space(slot, offset, payload_size) \ 335 - (MAX_CHAIN_CMDBUF_SIZE >= (offset) + (payload_size) + \ 336 - sizeof(typeof(slot))) 337 323 338 324 struct cmd_chain_slot_execbuf_cf { 339 325 __u32 cu_idx; ··· 348 340 __u32 args[] __counted_by(arg_cnt); 349 341 }; 350 342 343 + #define MAX_NPU_ARGS_SIZE (26 * sizeof(__u32)) 344 + struct cmd_chain_slot_npu { 345 + enum exec_npu_type type; 346 + u64 inst_buf_addr; 347 + u64 save_buf_addr; 348 + u64 restore_buf_addr; 349 + u32 inst_size; 350 + u32 save_size; 351 + u32 restore_size; 352 + u32 inst_prop_cnt; 353 + u32 cu_idx; 354 + u32 arg_cnt; 355 + u32 args[] __counted_by(arg_cnt); 356 + } __packed; 357 + 351 358 struct cmd_chain_req { 352 359 __u64 buf_addr; 353 360 __u32 buf_size; 354 361 __u32 count; 355 362 } __packed; 363 + 364 + struct cmd_chain_npu_req { 365 + u32 flags; 366 + u32 reserved; 367 + u64 buf_addr; 368 + u32 buf_size; 369 + u32 count; 370 + } __packed; 371 + 372 + union exec_chain_req { 373 + struct cmd_chain_npu_req npu_req; 374 + struct cmd_chain_req req; 375 + }; 356 376 357 377 struct cmd_chain_resp { 358 378 enum aie2_msg_status status;
+13
drivers/accel/amdxdna/aie2_pci.c
··· 55 55 56 56 static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 fw_minor) 57 57 { 58 + const struct aie2_fw_feature_tbl *feature; 58 59 struct amdxdna_dev *xdna = ndev->xdna; 59 60 60 61 /* ··· 79 78 XDNA_ERR(xdna, "Firmware minor version smaller than supported"); 80 79 return -EINVAL; 81 80 } 81 + 82 + for (feature = ndev->priv->fw_feature_tbl; feature && feature->min_minor; 83 + feature++) { 84 + if (fw_minor < feature->min_minor) 85 + continue; 86 + if (feature->max_minor > 0 && fw_minor > feature->max_minor) 87 + continue; 88 + 89 + set_bit(feature->feature, &ndev->feature_mask); 90 + } 91 + 82 92 return 0; 83 93 } 84 94 ··· 599 587 } 600 588 601 589 release_firmware(fw); 590 + aie2_msg_init(ndev); 602 591 amdxdna_pm_init(xdna); 603 592 return 0; 604 593
+29
drivers/accel/amdxdna/aie2_pci.h
··· 156 156 AIE2_DEV_START, 157 157 }; 158 158 159 + struct aie2_exec_msg_ops { 160 + int (*init_cu_req)(struct amdxdna_gem_obj *cmd_bo, void *req, 161 + size_t *size, u32 *msg_op); 162 + int (*init_dpu_req)(struct amdxdna_gem_obj *cmd_bo, void *req, 163 + size_t *size, u32 *msg_op); 164 + void (*init_chain_req)(void *req, u64 slot_addr, size_t size, u32 cmd_cnt); 165 + int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); 166 + int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); 167 + u32 (*get_chain_msg_op)(u32 cmd_op); 168 + }; 169 + 159 170 struct amdxdna_dev_hdl { 160 171 struct amdxdna_dev *xdna; 161 172 const struct amdxdna_dev_priv *priv; ··· 184 173 u32 total_col; 185 174 struct aie_version version; 186 175 struct aie_metadata metadata; 176 + unsigned long feature_mask; 177 + struct aie2_exec_msg_ops *exec_msg_ops; 187 178 188 179 /* power management and clock*/ 189 180 enum amdxdna_power_mode_type pw_mode; ··· 219 206 int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level); 220 207 }; 221 208 209 + enum aie2_fw_feature { 210 + AIE2_NPU_COMMAND, 211 + AIE2_FEATURE_MAX 212 + }; 213 + 214 + struct aie2_fw_feature_tbl { 215 + enum aie2_fw_feature feature; 216 + u32 max_minor; 217 + u32 min_minor; 218 + }; 219 + 220 + #define AIE2_FEATURE_ON(ndev, feature) test_bit(feature, &(ndev)->feature_mask) 221 + 222 222 struct amdxdna_dev_priv { 223 223 const char *fw_path; 224 224 u64 protocol_major; 225 225 u64 protocol_minor; 226 226 const struct rt_config *rt_config; 227 227 const struct dpm_clk_freq *dpm_clk_tbl; 228 + const struct aie2_fw_feature_tbl *fw_feature_tbl; 228 229 229 230 #define COL_ALIGN_NONE 0 230 231 #define COL_ALIGN_NATURE 1 ··· 263 236 extern const struct dpm_clk_freq npu4_dpm_clk_table[]; 264 237 extern const struct rt_config npu1_default_rt_cfg[]; 265 238 extern const struct rt_config npu4_default_rt_cfg[]; 239 + extern const struct aie2_fw_feature_tbl npu4_fw_feature_table[]; 266 240 267 241 /* aie2_smu.c */ 268 242 int aie2_smu_init(struct amdxdna_dev_hdl *ndev); ··· 288 260 struct amdxdna_drm_get_array *args); 289 261 290 262 /* aie2_message.c */ 263 + void aie2_msg_init(struct amdxdna_dev_hdl *ndev); 291 264 int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev); 292 265 int aie2_resume_fw(struct amdxdna_dev_hdl *ndev); 293 266 int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
+3 -3
drivers/accel/amdxdna/amdxdna_ctx.c
··· 113 113 return &cmd->data[num_masks]; 114 114 } 115 115 116 - int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo) 116 + u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo) 117 117 { 118 118 struct amdxdna_cmd *cmd = abo->mem.kva; 119 119 u32 num_masks, i; 120 120 u32 *cu_mask; 121 121 122 122 if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN) 123 - return -1; 123 + return INVALID_CU_IDX; 124 124 125 125 num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header); 126 126 cu_mask = cmd->data; ··· 129 129 return ffs(cu_mask[i]) - 1; 130 130 } 131 131 132 - return -1; 132 + return INVALID_CU_IDX; 133 133 } 134 134 135 135 /*
+7 -4
drivers/accel/amdxdna/amdxdna_ctx.h
··· 13 13 struct amdxdna_hwctx_priv; 14 14 15 15 enum ert_cmd_opcode { 16 - ERT_START_CU = 0, 17 - ERT_CMD_CHAIN = 19, 18 - ERT_START_NPU = 20, 16 + ERT_START_CU = 0, 17 + ERT_CMD_CHAIN = 19, 18 + ERT_START_NPU = 20, 19 + ERT_INVALID_CMD = ~0U, 19 20 }; 20 21 21 22 enum ert_cmd_state { ··· 64 63 u32 header; 65 64 u32 data[]; 66 65 }; 66 + 67 + #define INVALID_CU_IDX (~0U) 67 68 68 69 struct amdxdna_hwctx { 69 70 struct amdxdna_client *client; ··· 153 150 } 154 151 155 152 void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size); 156 - int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo); 153 + u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo); 157 154 158 155 void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job); 159 156 void amdxdna_hwctx_remove_all(struct amdxdna_client *client);
+6
drivers/accel/amdxdna/npu1_regs.c
··· 63 63 { 0 } 64 64 }; 65 65 66 + static const struct aie2_fw_feature_tbl npu1_fw_feature_table[] = { 67 + { .feature = AIE2_NPU_COMMAND, .min_minor = 8 }, 68 + { 0 } 69 + }; 70 + 66 71 static const struct amdxdna_dev_priv npu1_dev_priv = { 67 72 .fw_path = "amdnpu/1502_00/npu.sbin", 68 73 .protocol_major = 0x5, 69 74 .protocol_minor = 0x7, 70 75 .rt_config = npu1_default_rt_cfg, 71 76 .dpm_clk_tbl = npu1_dpm_clk_table, 77 + .fw_feature_tbl = npu1_fw_feature_table, 72 78 .col_align = COL_ALIGN_NONE, 73 79 .mbox_dev_addr = NPU1_MBOX_BAR_BASE, 74 80 .mbox_size = 0, /* Use BAR size */
+1
drivers/accel/amdxdna/npu2_regs.c
··· 67 67 .protocol_minor = 0x6, 68 68 .rt_config = npu4_default_rt_cfg, 69 69 .dpm_clk_tbl = npu4_dpm_clk_table, 70 + .fw_feature_tbl = npu4_fw_feature_table, 70 71 .col_align = COL_ALIGN_NATURE, 71 72 .mbox_dev_addr = NPU2_MBOX_BAR_BASE, 72 73 .mbox_size = 0, /* Use BAR size */
+6
drivers/accel/amdxdna/npu4_regs.c
··· 83 83 { 0 } 84 84 }; 85 85 86 + const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = { 87 + { .feature = AIE2_NPU_COMMAND, .min_minor = 15 }, 88 + { 0 } 89 + }; 90 + 86 91 static const struct amdxdna_dev_priv npu4_dev_priv = { 87 92 .fw_path = "amdnpu/17f0_10/npu.sbin", 88 93 .protocol_major = 0x6, 89 94 .protocol_minor = 12, 90 95 .rt_config = npu4_default_rt_cfg, 91 96 .dpm_clk_tbl = npu4_dpm_clk_table, 97 + .fw_feature_tbl = npu4_fw_feature_table, 92 98 .col_align = COL_ALIGN_NATURE, 93 99 .mbox_dev_addr = NPU4_MBOX_BAR_BASE, 94 100 .mbox_size = 0, /* Use BAR size */
+1
drivers/accel/amdxdna/npu5_regs.c
··· 67 67 .protocol_minor = 12, 68 68 .rt_config = npu4_default_rt_cfg, 69 69 .dpm_clk_tbl = npu4_dpm_clk_table, 70 + .fw_feature_tbl = npu4_fw_feature_table, 70 71 .col_align = COL_ALIGN_NATURE, 71 72 .mbox_dev_addr = NPU5_MBOX_BAR_BASE, 72 73 .mbox_size = 0, /* Use BAR size */
+1
drivers/accel/amdxdna/npu6_regs.c
··· 67 67 .protocol_minor = 12, 68 68 .rt_config = npu4_default_rt_cfg, 69 69 .dpm_clk_tbl = npu4_dpm_clk_table, 70 + .fw_feature_tbl = npu4_fw_feature_table, 70 71 .col_align = COL_ALIGN_NATURE, 71 72 .mbox_dev_addr = NPU6_MBOX_BAR_BASE, 72 73 .mbox_size = 0, /* Use BAR size */