Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

eth: bnxt: use page pool for head frags

Testing small size RPCs (300B-400B) on a large AMD system suggests
that page pool recycling is very useful even for just the head frags.
With this patch (and copy break disabled) I see a 30% performance
improvement (82Gbps -> 106Gbps).

Convert bnxt from normal page frags to page pool frags for head buffers.

On systems with small page size we can use the same pool as for TPA
pages. On systems with large pages the frag allocation logic of the
page pool is already used to split a large page into TPA chunks.
TPA chunks are much larger than heads (8k or 64k, AFAICT vs 1kB)
and we always allocate the same sized chunks. Mixing allocation
of TPA and head pages would lead to sub-optimal memory use.
Plus Taehee's work on zero-copy / devmem will need to differentiate
between TPA and non-TPA page pool, anyway. Conditionally allocate
a new page pool for heads.

Link: https://patch.msgid.link/20241109035119.3391864-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+51 -48
+50 -48
drivers/net/ethernet/broadcom/bnxt/bnxt.c
··· 864 864 bnapi->events &= ~BNXT_TX_CMP_EVENT; 865 865 } 866 866 867 + static bool bnxt_separate_head_pool(void) 868 + { 869 + return PAGE_SIZE > BNXT_RX_PAGE_SIZE; 870 + } 871 + 867 872 static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping, 868 873 struct bnxt_rx_ring_info *rxr, 869 874 unsigned int *offset, ··· 891 886 } 892 887 893 888 static inline u8 *__bnxt_alloc_rx_frag(struct bnxt *bp, dma_addr_t *mapping, 889 + struct bnxt_rx_ring_info *rxr, 894 890 gfp_t gfp) 895 891 { 896 - u8 *data; 897 - struct pci_dev *pdev = bp->pdev; 892 + unsigned int offset; 893 + struct page *page; 898 894 899 - if (gfp == GFP_ATOMIC) 900 - data = napi_alloc_frag(bp->rx_buf_size); 901 - else 902 - data = netdev_alloc_frag(bp->rx_buf_size); 903 - if (!data) 895 + page = page_pool_alloc_frag(rxr->head_pool, &offset, 896 + bp->rx_buf_size, gfp); 897 + if (!page) 904 898 return NULL; 905 899 906 - *mapping = dma_map_single_attrs(&pdev->dev, data + bp->rx_dma_offset, 907 - bp->rx_buf_use_size, bp->rx_dir, 908 - DMA_ATTR_WEAK_ORDERING); 909 - 910 - if (dma_mapping_error(&pdev->dev, *mapping)) { 911 - skb_free_frag(data); 912 - data = NULL; 913 - } 914 - return data; 900 + *mapping = page_pool_get_dma_addr(page) + bp->rx_dma_offset + offset; 901 + return page_address(page) + offset; 915 902 } 916 903 917 904 int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, ··· 925 928 rx_buf->data = page; 926 929 rx_buf->data_ptr = page_address(page) + offset + bp->rx_offset; 927 930 } else { 928 - u8 *data = __bnxt_alloc_rx_frag(bp, &mapping, gfp); 931 + u8 *data = __bnxt_alloc_rx_frag(bp, &mapping, rxr, gfp); 929 932 930 933 if (!data) 931 934 return -ENOMEM; ··· 1176 1179 } 1177 1180 1178 1181 skb = napi_build_skb(data, bp->rx_buf_size); 1179 - dma_unmap_single_attrs(&bp->pdev->dev, dma_addr, bp->rx_buf_use_size, 1180 - bp->rx_dir, DMA_ATTR_WEAK_ORDERING); 1182 + dma_sync_single_for_cpu(&bp->pdev->dev, dma_addr, bp->rx_buf_use_size, 1183 + bp->rx_dir); 1181 1184 if (!skb) { 1182 - skb_free_frag(data); 1185 + page_pool_free_va(rxr->head_pool, data, true); 1183 1186 return NULL; 1184 1187 } 1185 1188 1189 + skb_mark_for_recycle(skb); 1186 1190 skb_reserve(skb, bp->rx_offset); 1187 1191 skb_put(skb, offset_and_len & 0xffff); 1188 1192 return skb; ··· 1838 1840 u8 *new_data; 1839 1841 dma_addr_t new_mapping; 1840 1842 1841 - new_data = __bnxt_alloc_rx_frag(bp, &new_mapping, GFP_ATOMIC); 1843 + new_data = __bnxt_alloc_rx_frag(bp, &new_mapping, rxr, 1844 + GFP_ATOMIC); 1842 1845 if (!new_data) { 1843 1846 bnxt_abort_tpa(cpr, idx, agg_bufs); 1844 1847 cpr->sw_stats->rx.rx_oom_discards += 1; ··· 1851 1852 tpa_info->mapping = new_mapping; 1852 1853 1853 1854 skb = napi_build_skb(data, bp->rx_buf_size); 1854 - dma_unmap_single_attrs(&bp->pdev->dev, mapping, 1855 - bp->rx_buf_use_size, bp->rx_dir, 1856 - DMA_ATTR_WEAK_ORDERING); 1855 + dma_sync_single_for_cpu(&bp->pdev->dev, mapping, 1856 + bp->rx_buf_use_size, bp->rx_dir); 1857 1857 1858 1858 if (!skb) { 1859 - skb_free_frag(data); 1859 + page_pool_free_va(rxr->head_pool, data, true); 1860 1860 bnxt_abort_tpa(cpr, idx, agg_bufs); 1861 1861 cpr->sw_stats->rx.rx_oom_discards += 1; 1862 1862 return NULL; 1863 1863 } 1864 + skb_mark_for_recycle(skb); 1864 1865 skb_reserve(skb, bp->rx_offset); 1865 1866 skb_put(skb, len); 1866 1867 } ··· 3307 3308 3308 3309 static void bnxt_free_one_rx_ring(struct bnxt *bp, struct bnxt_rx_ring_info *rxr) 3309 3310 { 3310 - struct pci_dev *pdev = bp->pdev; 3311 3311 int i, max_idx; 3312 3312 3313 3313 max_idx = bp->rx_nr_pages * RX_DESC_CNT; 3314 3314 3315 3315 for (i = 0; i < max_idx; i++) { 3316 3316 struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[i]; 3317 - dma_addr_t mapping = rx_buf->mapping; 3318 3317 void *data = rx_buf->data; 3319 3318 3320 3319 if (!data) 3321 3320 continue; 3322 3321 3323 3322 rx_buf->data = NULL; 3324 - if (BNXT_RX_PAGE_MODE(bp)) { 3323 + if (BNXT_RX_PAGE_MODE(bp)) 3325 3324 page_pool_recycle_direct(rxr->page_pool, data); 3326 - } else { 3327 - dma_unmap_single_attrs(&pdev->dev, mapping, 3328 - bp->rx_buf_use_size, bp->rx_dir, 3329 - DMA_ATTR_WEAK_ORDERING); 3330 - skb_free_frag(data); 3331 - } 3325 + else 3326 + page_pool_free_va(rxr->head_pool, data, true); 3332 3327 } 3333 3328 } 3334 3329 ··· 3349 3356 static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr) 3350 3357 { 3351 3358 struct bnxt_rx_ring_info *rxr = &bp->rx_ring[ring_nr]; 3352 - struct pci_dev *pdev = bp->pdev; 3353 3359 struct bnxt_tpa_idx_map *map; 3354 3360 int i; 3355 3361 ··· 3362 3370 if (!data) 3363 3371 continue; 3364 3372 3365 - dma_unmap_single_attrs(&pdev->dev, tpa_info->mapping, 3366 - bp->rx_buf_use_size, bp->rx_dir, 3367 - DMA_ATTR_WEAK_ORDERING); 3368 - 3369 3373 tpa_info->data = NULL; 3370 - 3371 - skb_free_frag(data); 3374 + page_pool_free_va(rxr->head_pool, data, false); 3372 3375 } 3373 3376 3374 3377 skip_rx_tpa_free: ··· 3579 3592 xdp_rxq_info_unreg(&rxr->xdp_rxq); 3580 3593 3581 3594 page_pool_destroy(rxr->page_pool); 3582 - rxr->page_pool = NULL; 3595 + if (rxr->page_pool != rxr->head_pool) 3596 + page_pool_destroy(rxr->head_pool); 3597 + rxr->page_pool = rxr->head_pool = NULL; 3583 3598 3584 3599 kfree(rxr->rx_agg_bmap); 3585 3600 rxr->rx_agg_bmap = NULL; ··· 3599 3610 int numa_node) 3600 3611 { 3601 3612 struct page_pool_params pp = { 0 }; 3613 + struct page_pool *pool; 3602 3614 3603 3615 pp.pool_size = bp->rx_agg_ring_size; 3604 3616 if (BNXT_RX_PAGE_MODE(bp)) ··· 3612 3622 pp.max_len = PAGE_SIZE; 3613 3623 pp.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; 3614 3624 3615 - rxr->page_pool = page_pool_create(&pp); 3616 - if (IS_ERR(rxr->page_pool)) { 3617 - int err = PTR_ERR(rxr->page_pool); 3625 + pool = page_pool_create(&pp); 3626 + if (IS_ERR(pool)) 3627 + return PTR_ERR(pool); 3628 + rxr->page_pool = pool; 3618 3629 3619 - rxr->page_pool = NULL; 3620 - return err; 3630 + if (bnxt_separate_head_pool()) { 3631 + pp.pool_size = max(bp->rx_ring_size, 1024); 3632 + pool = page_pool_create(&pp); 3633 + if (IS_ERR(pool)) 3634 + goto err_destroy_pp; 3621 3635 } 3636 + rxr->head_pool = pool; 3637 + 3622 3638 return 0; 3639 + 3640 + err_destroy_pp: 3641 + page_pool_destroy(rxr->page_pool); 3642 + rxr->page_pool = NULL; 3643 + return PTR_ERR(pool); 3623 3644 } 3624 3645 3625 3646 static int bnxt_alloc_rx_rings(struct bnxt *bp) ··· 4181 4180 u8 *data; 4182 4181 4183 4182 for (i = 0; i < bp->max_tpa; i++) { 4184 - data = __bnxt_alloc_rx_frag(bp, &mapping, GFP_KERNEL); 4183 + data = __bnxt_alloc_rx_frag(bp, &mapping, rxr, 4184 + GFP_KERNEL); 4185 4185 if (!data) 4186 4186 return -ENOMEM; 4187 4187
+1
drivers/net/ethernet/broadcom/bnxt/bnxt.h
··· 1105 1105 struct bnxt_ring_struct rx_agg_ring_struct; 1106 1106 struct xdp_rxq_info xdp_rxq; 1107 1107 struct page_pool *page_pool; 1108 + struct page_pool *head_pool; 1108 1109 }; 1109 1110 1110 1111 struct bnxt_rx_sw_stats {