Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

net: airoha: Add the capability to consume out-of-order DMA tx descriptors

EN7581 and AN7583 SoCs are capable of DMA mapping non-linear tx skbs on
non-consecutive DMA descriptors. This feature is useful when multiple
flows are queued on the same hw tx queue since it allows to fully utilize
the available tx DMA descriptors and to avoid the starvation of
high-priority flow we have in the current codebase due to head-of-line
blocking introduced by low-priority flows.

Tested-by: Xuegang Lu <xuegang.lu@airoha.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://patch.msgid.link/20251106-airoha-tx-linked-list-v2-1-0706d4a322bd@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Lorenzo Bianconi and committed by
Jakub Kicinski
3f47e67d 416dd649

+45 -47
+39 -46
drivers/net/ethernet/airoha/airoha_eth.c
··· 892 892 893 893 dma_unmap_single(eth->dev, e->dma_addr, e->dma_len, 894 894 DMA_TO_DEVICE); 895 - memset(e, 0, sizeof(*e)); 895 + e->dma_addr = 0; 896 + list_add_tail(&e->list, &q->tx_list); 897 + 896 898 WRITE_ONCE(desc->msg0, 0); 897 899 WRITE_ONCE(desc->msg1, 0); 898 900 q->queued--; 899 - 900 - /* completion ring can report out-of-order indexes if hw QoS 901 - * is enabled and packets with different priority are queued 902 - * to same DMA ring. Take into account possible out-of-order 903 - * reports incrementing DMA ring tail pointer 904 - */ 905 - while (q->tail != q->head && !q->entry[q->tail].dma_addr) 906 - q->tail = (q->tail + 1) % q->ndesc; 907 901 908 902 if (skb) { 909 903 u16 queue = skb_get_queue_mapping(skb); ··· 943 949 q->ndesc = size; 944 950 q->qdma = qdma; 945 951 q->free_thr = 1 + MAX_SKB_FRAGS; 952 + INIT_LIST_HEAD(&q->tx_list); 946 953 947 954 q->entry = devm_kzalloc(eth->dev, q->ndesc * sizeof(*q->entry), 948 955 GFP_KERNEL); ··· 956 961 return -ENOMEM; 957 962 958 963 for (i = 0; i < q->ndesc; i++) { 959 - u32 val; 964 + u32 val = FIELD_PREP(QDMA_DESC_DONE_MASK, 1); 960 965 961 - val = FIELD_PREP(QDMA_DESC_DONE_MASK, 1); 966 + list_add_tail(&q->entry[i].list, &q->tx_list); 962 967 WRITE_ONCE(q->desc[i].ctrl, cpu_to_le32(val)); 963 968 } 964 969 ··· 968 973 969 974 airoha_qdma_wr(qdma, REG_TX_RING_BASE(qid), dma_addr); 970 975 airoha_qdma_rmw(qdma, REG_TX_CPU_IDX(qid), TX_RING_CPU_IDX_MASK, 971 - FIELD_PREP(TX_RING_CPU_IDX_MASK, q->head)); 976 + FIELD_PREP(TX_RING_CPU_IDX_MASK, 0)); 972 977 airoha_qdma_rmw(qdma, REG_TX_DMA_IDX(qid), TX_RING_DMA_IDX_MASK, 973 - FIELD_PREP(TX_RING_DMA_IDX_MASK, q->head)); 978 + FIELD_PREP(TX_RING_DMA_IDX_MASK, 0)); 974 979 975 980 return 0; 976 981 } ··· 1026 1031 static void airoha_qdma_cleanup_tx_queue(struct airoha_queue *q) 1027 1032 { 1028 1033 struct airoha_eth *eth = q->qdma->eth; 1034 + int i; 1029 1035 1030 1036 spin_lock_bh(&q->lock); 1031 - while (q->queued) { 1032 - struct airoha_queue_entry *e = &q->entry[q->tail]; 1037 + for (i = 0; i < q->ndesc; i++) { 1038 + struct airoha_queue_entry *e = &q->entry[i]; 1039 + 1040 + if (!e->dma_addr) 1041 + continue; 1033 1042 1034 1043 dma_unmap_single(eth->dev, e->dma_addr, e->dma_len, 1035 1044 DMA_TO_DEVICE); 1036 1045 dev_kfree_skb_any(e->skb); 1046 + e->dma_addr = 0; 1037 1047 e->skb = NULL; 1038 - 1039 - q->tail = (q->tail + 1) % q->ndesc; 1048 + list_add_tail(&e->list, &q->tx_list); 1040 1049 q->queued--; 1041 1050 } 1042 1051 spin_unlock_bh(&q->lock); ··· 1882 1883 #endif 1883 1884 } 1884 1885 1885 - static bool airoha_dev_tx_queue_busy(struct airoha_queue *q, u32 nr_frags) 1886 - { 1887 - u32 tail = q->tail <= q->head ? q->tail + q->ndesc : q->tail; 1888 - u32 index = q->head + nr_frags; 1889 - 1890 - /* completion napi can free out-of-order tx descriptors if hw QoS is 1891 - * enabled and packets with different priorities are queued to the same 1892 - * DMA ring. Take into account possible out-of-order reports checking 1893 - * if the tx queue is full using circular buffer head/tail pointers 1894 - * instead of the number of queued packets. 1895 - */ 1896 - return index >= tail; 1897 - } 1898 - 1899 1886 static int airoha_get_fe_port(struct airoha_gdm_port *port) 1900 1887 { 1901 1888 struct airoha_qdma *qdma = port->qdma; ··· 1904 1919 struct airoha_gdm_port *port = netdev_priv(dev); 1905 1920 struct airoha_qdma *qdma = port->qdma; 1906 1921 u32 nr_frags, tag, msg0, msg1, len; 1922 + struct airoha_queue_entry *e; 1907 1923 struct netdev_queue *txq; 1908 1924 struct airoha_queue *q; 1925 + LIST_HEAD(tx_list); 1909 1926 void *data; 1910 1927 int i, qid; 1911 1928 u16 index; ··· 1953 1966 txq = netdev_get_tx_queue(dev, qid); 1954 1967 nr_frags = 1 + skb_shinfo(skb)->nr_frags; 1955 1968 1956 - if (airoha_dev_tx_queue_busy(q, nr_frags)) { 1969 + if (q->queued + nr_frags >= q->ndesc) { 1957 1970 /* not enough space in the queue */ 1958 1971 netif_tx_stop_queue(txq); 1959 1972 spin_unlock_bh(&q->lock); ··· 1962 1975 1963 1976 len = skb_headlen(skb); 1964 1977 data = skb->data; 1965 - index = q->head; 1978 + 1979 + e = list_first_entry(&q->tx_list, struct airoha_queue_entry, 1980 + list); 1981 + index = e - q->entry; 1966 1982 1967 1983 for (i = 0; i < nr_frags; i++) { 1968 1984 struct airoha_qdma_desc *desc = &q->desc[index]; 1969 - struct airoha_queue_entry *e = &q->entry[index]; 1970 1985 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1971 1986 dma_addr_t addr; 1972 1987 u32 val; ··· 1978 1989 if (unlikely(dma_mapping_error(dev->dev.parent, addr))) 1979 1990 goto error_unmap; 1980 1991 1981 - index = (index + 1) % q->ndesc; 1992 + list_move_tail(&e->list, &tx_list); 1993 + e->skb = i ? NULL : skb; 1994 + e->dma_addr = addr; 1995 + e->dma_len = len; 1996 + 1997 + e = list_first_entry(&q->tx_list, struct airoha_queue_entry, 1998 + list); 1999 + index = e - q->entry; 1982 2000 1983 2001 val = FIELD_PREP(QDMA_DESC_LEN_MASK, len); 1984 2002 if (i < nr_frags - 1) ··· 1998 2002 WRITE_ONCE(desc->msg1, cpu_to_le32(msg1)); 1999 2003 WRITE_ONCE(desc->msg2, cpu_to_le32(0xffff)); 2000 2004 2001 - e->skb = i ? NULL : skb; 2002 - e->dma_addr = addr; 2003 - e->dma_len = len; 2004 - 2005 2005 data = skb_frag_address(frag); 2006 2006 len = skb_frag_size(frag); 2007 2007 } 2008 - 2009 - q->head = index; 2010 2008 q->queued += i; 2011 2009 2012 2010 skb_tx_timestamp(skb); ··· 2009 2019 if (netif_xmit_stopped(txq) || !netdev_xmit_more()) 2010 2020 airoha_qdma_rmw(qdma, REG_TX_CPU_IDX(qid), 2011 2021 TX_RING_CPU_IDX_MASK, 2012 - FIELD_PREP(TX_RING_CPU_IDX_MASK, q->head)); 2022 + FIELD_PREP(TX_RING_CPU_IDX_MASK, index)); 2013 2023 2014 2024 if (q->ndesc - q->queued < q->free_thr) 2015 2025 netif_tx_stop_queue(txq); ··· 2019 2029 return NETDEV_TX_OK; 2020 2030 2021 2031 error_unmap: 2022 - for (i--; i >= 0; i--) { 2023 - index = (q->head + i) % q->ndesc; 2024 - dma_unmap_single(dev->dev.parent, q->entry[index].dma_addr, 2025 - q->entry[index].dma_len, DMA_TO_DEVICE); 2032 + while (!list_empty(&tx_list)) { 2033 + e = list_first_entry(&tx_list, struct airoha_queue_entry, 2034 + list); 2035 + dma_unmap_single(dev->dev.parent, e->dma_addr, e->dma_len, 2036 + DMA_TO_DEVICE); 2037 + e->dma_addr = 0; 2038 + list_move_tail(&e->list, &q->tx_list); 2026 2039 } 2027 2040 2028 2041 spin_unlock_bh(&q->lock);
+6 -1
drivers/net/ethernet/airoha/airoha_eth.h
··· 169 169 struct airoha_queue_entry { 170 170 union { 171 171 void *buf; 172 - struct sk_buff *skb; 172 + struct { 173 + struct list_head list; 174 + struct sk_buff *skb; 175 + }; 173 176 }; 174 177 dma_addr_t dma_addr; 175 178 u16 dma_len; ··· 196 193 struct napi_struct napi; 197 194 struct page_pool *page_pool; 198 195 struct sk_buff *skb; 196 + 197 + struct list_head tx_list; 199 198 }; 200 199 201 200 struct airoha_tx_irq_queue {