Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

dmaengine: switchtec-dma: Implement descriptor submission

On prep, a spin lock is taken and the next entry in the circular buffer
is filled. On submit, the spin lock just needs to be released as the
requests are already pending.

When switchtec_dma_issue_pending() is called, the sq_tail register
is written to indicate there are new jobs for the dma engine to start
on.

Pause and resume operations are implemented by writing to a control
register.

Signed-off-by: Kelvin Cao <kelvin.cao@microchip.com>
Co-developed-by: George Ge <george.ge@microchip.com>
Signed-off-by: George Ge <george.ge@microchip.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Link: https://patch.msgid.link/20260302210419.3656-4-logang@deltatee.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>

authored by

Kelvin Cao and committed by
Vinod Koul
3af11dae 30eba9df

+225
+225
drivers/dma/switchtec_dma.c
··· 32 32 #define SWITCHTEC_REG_SE_BUF_CNT 0x98 33 33 #define SWITCHTEC_REG_SE_BUF_BASE 0x9a 34 34 35 + #define SWITCHTEC_DESC_MAX_SIZE 0x100000 36 + 35 37 #define SWITCHTEC_CHAN_CTRL_PAUSE BIT(0) 36 38 #define SWITCHTEC_CHAN_CTRL_HALT BIT(1) 37 39 #define SWITCHTEC_CHAN_CTRL_RESET BIT(2) ··· 42 40 #define SWITCHTEC_CHAN_STS_PAUSED BIT(9) 43 41 #define SWITCHTEC_CHAN_STS_HALTED BIT(10) 44 42 #define SWITCHTEC_CHAN_STS_PAUSED_MASK GENMASK(29, 13) 43 + 44 + #define SWITCHTEC_INVALID_HFID 0xffff 45 45 46 46 #define SWITCHTEC_DMA_SQ_SIZE SZ_32K 47 47 #define SWITCHTEC_DMA_CQ_SIZE SZ_32K ··· 207 203 __le16 dfid; 208 204 __le16 sfid; 209 205 }; 206 + 207 + #define SWITCHTEC_SE_DFM BIT(5) 208 + #define SWITCHTEC_SE_LIOF BIT(6) 209 + #define SWITCHTEC_SE_BRR BIT(7) 210 + #define SWITCHTEC_SE_CID_MASK GENMASK(15, 0) 210 211 211 212 #define SWITCHTEC_CE_SC_LEN_ERR BIT(0) 212 213 #define SWITCHTEC_CE_SC_UR BIT(1) ··· 612 603 spin_unlock_bh(&swdma_chan->complete_lock); 613 604 } 614 605 606 + static struct dma_async_tx_descriptor * 607 + switchtec_dma_prep_desc(struct dma_chan *c, u16 dst_fid, dma_addr_t dma_dst, 608 + u16 src_fid, dma_addr_t dma_src, u64 data, 609 + size_t len, unsigned long flags) 610 + __acquires(swdma_chan->submit_lock) 611 + { 612 + struct switchtec_dma_chan *swdma_chan = 613 + container_of(c, struct switchtec_dma_chan, dma_chan); 614 + struct switchtec_dma_desc *desc; 615 + int head, tail; 616 + 617 + spin_lock_bh(&swdma_chan->submit_lock); 618 + 619 + if (!swdma_chan->ring_active) 620 + goto err_unlock; 621 + 622 + tail = READ_ONCE(swdma_chan->tail); 623 + head = swdma_chan->head; 624 + 625 + if (!CIRC_SPACE(head, tail, SWITCHTEC_DMA_RING_SIZE)) 626 + goto err_unlock; 627 + 628 + desc = swdma_chan->desc_ring[head]; 629 + 630 + if (src_fid != SWITCHTEC_INVALID_HFID && 631 + dst_fid != SWITCHTEC_INVALID_HFID) 632 + desc->hw->ctrl |= SWITCHTEC_SE_DFM; 633 + 634 + if (flags & DMA_PREP_INTERRUPT) 635 + desc->hw->ctrl |= SWITCHTEC_SE_LIOF; 636 + 637 + if (flags & DMA_PREP_FENCE) 638 + desc->hw->ctrl |= SWITCHTEC_SE_BRR; 639 + 640 + desc->txd.flags = flags; 641 + 642 + desc->completed = false; 643 + desc->hw->opc = SWITCHTEC_DMA_OPC_MEMCPY; 644 + desc->hw->addr_lo = cpu_to_le32(lower_32_bits(dma_src)); 645 + desc->hw->addr_hi = cpu_to_le32(upper_32_bits(dma_src)); 646 + desc->hw->daddr_lo = cpu_to_le32(lower_32_bits(dma_dst)); 647 + desc->hw->daddr_hi = cpu_to_le32(upper_32_bits(dma_dst)); 648 + desc->hw->byte_cnt = cpu_to_le32(len); 649 + desc->hw->tlp_setting = 0; 650 + desc->hw->dfid = cpu_to_le16(dst_fid); 651 + desc->hw->sfid = cpu_to_le16(src_fid); 652 + swdma_chan->cid &= SWITCHTEC_SE_CID_MASK; 653 + desc->hw->cid = cpu_to_le16(swdma_chan->cid++); 654 + desc->orig_size = len; 655 + 656 + /* return with the lock held, it will be released in tx_submit */ 657 + 658 + return &desc->txd; 659 + 660 + err_unlock: 661 + /* 662 + * Keep sparse happy by restoring an even lock count on 663 + * this lock. 664 + */ 665 + __acquire(swdma_chan->submit_lock); 666 + 667 + spin_unlock_bh(&swdma_chan->submit_lock); 668 + return NULL; 669 + } 670 + 671 + static struct dma_async_tx_descriptor * 672 + switchtec_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dst, 673 + dma_addr_t dma_src, size_t len, unsigned long flags) 674 + __acquires(swdma_chan->submit_lock) 675 + { 676 + if (len > SWITCHTEC_DESC_MAX_SIZE) { 677 + /* 678 + * Keep sparse happy by restoring an even lock count on 679 + * this lock. 680 + */ 681 + __acquire(swdma_chan->submit_lock); 682 + return NULL; 683 + } 684 + 685 + return switchtec_dma_prep_desc(c, SWITCHTEC_INVALID_HFID, dma_dst, 686 + SWITCHTEC_INVALID_HFID, dma_src, 0, len, 687 + flags); 688 + } 689 + 690 + static dma_cookie_t 691 + switchtec_dma_tx_submit(struct dma_async_tx_descriptor *desc) 692 + __releases(swdma_chan->submit_lock) 693 + { 694 + struct switchtec_dma_chan *swdma_chan = 695 + container_of(desc->chan, struct switchtec_dma_chan, dma_chan); 696 + dma_cookie_t cookie; 697 + int head; 698 + 699 + head = swdma_chan->head + 1; 700 + head &= SWITCHTEC_DMA_RING_SIZE - 1; 701 + 702 + /* 703 + * Ensure the desc updates are visible before updating the head index 704 + */ 705 + smp_store_release(&swdma_chan->head, head); 706 + 707 + cookie = dma_cookie_assign(desc); 708 + 709 + spin_unlock_bh(&swdma_chan->submit_lock); 710 + 711 + return cookie; 712 + } 713 + 714 + static enum dma_status switchtec_dma_tx_status(struct dma_chan *chan, 715 + dma_cookie_t cookie, struct dma_tx_state *txstate) 716 + { 717 + struct switchtec_dma_chan *swdma_chan = 718 + container_of(chan, struct switchtec_dma_chan, dma_chan); 719 + enum dma_status ret; 720 + 721 + ret = dma_cookie_status(chan, cookie, txstate); 722 + if (ret == DMA_COMPLETE) 723 + return ret; 724 + 725 + /* 726 + * For jobs where the interrupts are disabled, this is the only place 727 + * to process the completions returned by the hardware. Callers that 728 + * disable interrupts must call tx_status() to determine when a job 729 + * is done, so it is safe to process completions here. If a job has 730 + * interrupts enabled, then the completions will normally be processed 731 + * in the tasklet that is triggered by the interrupt and tx_status() 732 + * does not need to be called. 733 + */ 734 + switchtec_dma_cleanup_completed(swdma_chan); 735 + 736 + return dma_cookie_status(chan, cookie, txstate); 737 + } 738 + 739 + static void switchtec_dma_issue_pending(struct dma_chan *chan) 740 + { 741 + struct switchtec_dma_chan *swdma_chan = 742 + container_of(chan, struct switchtec_dma_chan, dma_chan); 743 + struct switchtec_dma_dev *swdma_dev = swdma_chan->swdma_dev; 744 + 745 + /* 746 + * The sq_tail register is actually for the head of the 747 + * submisssion queue. Chip has the opposite define of head/tail 748 + * to the Linux kernel. 749 + */ 750 + 751 + rcu_read_lock(); 752 + if (!rcu_dereference(swdma_dev->pdev)) { 753 + rcu_read_unlock(); 754 + return; 755 + } 756 + 757 + spin_lock_bh(&swdma_chan->submit_lock); 758 + writew(swdma_chan->head, &swdma_chan->mmio_chan_hw->sq_tail); 759 + spin_unlock_bh(&swdma_chan->submit_lock); 760 + 761 + rcu_read_unlock(); 762 + } 763 + 764 + static int switchtec_dma_pause(struct dma_chan *chan) 765 + { 766 + struct switchtec_dma_chan *swdma_chan = 767 + container_of(chan, struct switchtec_dma_chan, dma_chan); 768 + struct chan_hw_regs __iomem *chan_hw = swdma_chan->mmio_chan_hw; 769 + struct pci_dev *pdev; 770 + int ret; 771 + 772 + rcu_read_lock(); 773 + pdev = rcu_dereference(swdma_chan->swdma_dev->pdev); 774 + if (!pdev) { 775 + ret = -ENODEV; 776 + goto unlock_and_exit; 777 + } 778 + 779 + spin_lock(&swdma_chan->hw_ctrl_lock); 780 + writeb(SWITCHTEC_CHAN_CTRL_PAUSE, &chan_hw->ctrl); 781 + ret = wait_for_chan_status(chan_hw, SWITCHTEC_CHAN_STS_PAUSED, true); 782 + spin_unlock(&swdma_chan->hw_ctrl_lock); 783 + 784 + unlock_and_exit: 785 + rcu_read_unlock(); 786 + return ret; 787 + } 788 + 789 + static int switchtec_dma_resume(struct dma_chan *chan) 790 + { 791 + struct switchtec_dma_chan *swdma_chan = 792 + container_of(chan, struct switchtec_dma_chan, dma_chan); 793 + struct chan_hw_regs __iomem *chan_hw = swdma_chan->mmio_chan_hw; 794 + struct pci_dev *pdev; 795 + int ret; 796 + 797 + rcu_read_lock(); 798 + pdev = rcu_dereference(swdma_chan->swdma_dev->pdev); 799 + if (!pdev) { 800 + ret = -ENODEV; 801 + goto unlock_and_exit; 802 + } 803 + 804 + spin_lock(&swdma_chan->hw_ctrl_lock); 805 + writeb(0, &chan_hw->ctrl); 806 + ret = wait_for_chan_status(chan_hw, SWITCHTEC_CHAN_STS_PAUSED, false); 807 + spin_unlock(&swdma_chan->hw_ctrl_lock); 808 + 809 + unlock_and_exit: 810 + rcu_read_unlock(); 811 + return ret; 812 + } 813 + 615 814 static void switchtec_dma_desc_task(unsigned long data) 616 815 { 617 816 struct switchtec_dma_chan *swdma_chan = (void *)data; ··· 938 721 } 939 722 940 723 dma_async_tx_descriptor_init(&desc->txd, &swdma_chan->dma_chan); 724 + desc->txd.tx_submit = switchtec_dma_tx_submit; 941 725 desc->hw = &swdma_chan->hw_sq[i]; 942 726 desc->completed = true; 943 727 ··· 1265 1047 1266 1048 dma = &swdma_dev->dma_dev; 1267 1049 dma->copy_align = DMAENGINE_ALIGN_8_BYTES; 1050 + dma_cap_set(DMA_MEMCPY, dma->cap_mask); 1051 + dma_cap_set(DMA_PRIVATE, dma->cap_mask); 1268 1052 dma->dev = get_device(&pdev->dev); 1269 1053 1270 1054 dma->device_alloc_chan_resources = switchtec_dma_alloc_chan_resources; 1271 1055 dma->device_free_chan_resources = switchtec_dma_free_chan_resources; 1056 + dma->device_prep_dma_memcpy = switchtec_dma_prep_memcpy; 1057 + dma->device_tx_status = switchtec_dma_tx_status; 1058 + dma->device_issue_pending = switchtec_dma_issue_pending; 1059 + dma->device_pause = switchtec_dma_pause; 1060 + dma->device_resume = switchtec_dma_resume; 1272 1061 dma->device_terminate_all = switchtec_dma_terminate_all; 1273 1062 dma->device_synchronize = switchtec_dma_synchronize; 1274 1063 dma->device_release = switchtec_dma_release;