Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

net: bcmasp: Switch to page pool for RX path

This shows an improvement of 1.9% in reducing the CPU cycles and data
cache misses.

Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com>
Reviewed-by: Justin Chen <justin.chen@broadcom.com>
Reviewed-by: Nicolai Buchwitz <nb@tipi-net.de>
Link: https://patch.msgid.link/20260408001813.635679-1-florian.fainelli@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Florian Fainelli and committed by
Jakub Kicinski
686a7587 202ab599

+115 -23
+1
drivers/net/ethernet/broadcom/Kconfig
··· 272 272 depends on OF 273 273 select PHYLIB 274 274 select MDIO_BCM_UNIMAC 275 + select PAGE_POOL 275 276 help 276 277 This configuration enables the Broadcom ASP 2.0 Ethernet controller 277 278 driver which is present in Broadcom STB SoCs such as 72165.
+6 -2
drivers/net/ethernet/broadcom/asp2/bcmasp.h
··· 6 6 #include <linux/phy.h> 7 7 #include <linux/io-64-nonatomic-hi-lo.h> 8 8 #include <uapi/linux/ethtool.h> 9 + #include <net/page_pool/helpers.h> 9 10 10 11 #define ASP_INTR2_OFFSET 0x1000 11 12 #define ASP_INTR2_STATUS 0x0 ··· 299 298 void __iomem *rx_edpkt_cfg; 300 299 void __iomem *rx_edpkt_dma; 301 300 int rx_edpkt_index; 302 - int rx_buf_order; 303 301 struct bcmasp_desc *rx_edpkt_cpu; 304 302 dma_addr_t rx_edpkt_dma_addr; 305 303 dma_addr_t rx_edpkt_dma_read; 306 304 dma_addr_t rx_edpkt_dma_valid; 307 305 308 - /* RX buffer prefetcher ring*/ 306 + /* Streaming RX data ring (RBUF_4K mode) */ 309 307 void *rx_ring_cpu; 310 308 dma_addr_t rx_ring_dma; 311 309 dma_addr_t rx_ring_dma_valid; 310 + int rx_buf_order; 311 + 312 + /* Page pool for recycling RX SKB data pages */ 313 + struct page_pool *rx_page_pool; 312 314 struct napi_struct rx_napi; 313 315 314 316 struct bcmasp_res res;
+104 -21
drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
··· 15 15 #include <linux/platform_device.h> 16 16 #include <net/ip.h> 17 17 #include <net/ipv6.h> 18 + #include <net/page_pool/helpers.h> 18 19 19 20 #include "bcmasp.h" 20 21 #include "bcmasp_intf_defs.h" ··· 483 482 struct bcmasp_desc *desc; 484 483 struct sk_buff *skb; 485 484 dma_addr_t valid; 485 + struct page *page; 486 486 void *data; 487 487 u64 flags; 488 488 u32 len; 489 489 490 + /* Hardware advances DMA_VALID as it writes each descriptor 491 + * (RBUF_4K streaming mode); software chases with rx_edpkt_dma_read. 492 + */ 490 493 valid = rx_edpkt_dma_rq(intf, RX_EDPKT_DMA_VALID) + 1; 491 494 if (valid == intf->rx_edpkt_dma_addr + DESC_RING_SIZE) 492 495 valid = intf->rx_edpkt_dma_addr; ··· 498 493 while ((processed < budget) && (valid != intf->rx_edpkt_dma_read)) { 499 494 desc = &intf->rx_edpkt_cpu[intf->rx_edpkt_index]; 500 495 501 - /* Ensure that descriptor has been fully written to DRAM by 502 - * hardware before reading by the CPU 496 + /* Ensure the descriptor has been fully written to DRAM by 497 + * the hardware before the CPU reads it. 503 498 */ 504 499 rmb(); 505 500 506 - /* Calculate virt addr by offsetting from physical addr */ 501 + /* Locate the packet data inside the streaming ring buffer. */ 507 502 data = intf->rx_ring_cpu + 508 503 (DESC_ADDR(desc->buf) - intf->rx_ring_dma); 509 504 ··· 529 524 530 525 len = desc->size; 531 526 532 - skb = napi_alloc_skb(napi, len); 527 + /* Allocate a page pool page as the SKB data area so the 528 + * kernel can recycle it efficiently after the packet is 529 + * consumed, avoiding repeated slab allocations. 530 + */ 531 + page = page_pool_dev_alloc_pages(intf->rx_page_pool); 532 + if (!page) { 533 + u64_stats_update_begin(&stats->syncp); 534 + u64_stats_inc(&stats->rx_dropped); 535 + u64_stats_update_end(&stats->syncp); 536 + intf->mib.alloc_rx_skb_failed++; 537 + goto next; 538 + } 539 + 540 + skb = napi_build_skb(page_address(page), PAGE_SIZE); 533 541 if (!skb) { 534 542 u64_stats_update_begin(&stats->syncp); 535 543 u64_stats_inc(&stats->rx_dropped); 536 544 u64_stats_update_end(&stats->syncp); 537 545 intf->mib.alloc_rx_skb_failed++; 538 - 546 + page_pool_recycle_direct(intf->rx_page_pool, page); 539 547 goto next; 540 548 } 541 549 550 + /* Reserve headroom then copy the full descriptor payload 551 + * (hardware prepends a 2-byte alignment pad at the start). 552 + */ 553 + skb_reserve(skb, NET_SKB_PAD); 542 554 skb_put(skb, len); 543 555 memcpy(skb->data, data, len); 556 + skb_mark_for_recycle(skb); 544 557 558 + /* Skip the 2-byte hardware alignment pad. */ 545 559 skb_pull(skb, 2); 546 560 len -= 2; 547 561 if (likely(intf->crc_fwd)) { ··· 582 558 u64_stats_update_end(&stats->syncp); 583 559 584 560 next: 561 + /* Return this portion of the streaming ring buffer to HW. */ 585 562 rx_edpkt_cfg_wq(intf, (DESC_ADDR(desc->buf) + desc->size), 586 563 RX_EDPKT_RING_BUFFER_READ); 587 564 ··· 686 661 phy_print_status(phydev); 687 662 } 688 663 689 - static int bcmasp_alloc_buffers(struct bcmasp_intf *intf) 664 + static struct page_pool * 665 + bcmasp_rx_page_pool_create(struct bcmasp_intf *intf) 666 + { 667 + struct page_pool_params pp_params = { 668 + .order = 0, 669 + .flags = 0, 670 + .pool_size = NUM_4K_BUFFERS, 671 + .nid = NUMA_NO_NODE, 672 + .dev = &intf->parent->pdev->dev, 673 + .napi = &intf->rx_napi, 674 + .netdev = intf->ndev, 675 + .offset = 0, 676 + .max_len = PAGE_SIZE, 677 + }; 678 + 679 + return page_pool_create(&pp_params); 680 + } 681 + 682 + static int bcmasp_alloc_rx_buffers(struct bcmasp_intf *intf) 690 683 { 691 684 struct device *kdev = &intf->parent->pdev->dev; 692 685 struct page *buffer_pg; 686 + int ret; 693 687 694 - /* Alloc RX */ 688 + /* Contiguous streaming ring that hardware writes packet data into. */ 695 689 intf->rx_buf_order = get_order(RING_BUFFER_SIZE); 696 690 buffer_pg = alloc_pages(GFP_KERNEL, intf->rx_buf_order); 697 691 if (!buffer_pg) ··· 719 675 intf->rx_ring_cpu = page_to_virt(buffer_pg); 720 676 intf->rx_ring_dma = dma_map_page(kdev, buffer_pg, 0, RING_BUFFER_SIZE, 721 677 DMA_FROM_DEVICE); 722 - if (dma_mapping_error(kdev, intf->rx_ring_dma)) 723 - goto free_rx_buffer; 678 + if (dma_mapping_error(kdev, intf->rx_ring_dma)) { 679 + ret = -ENOMEM; 680 + goto free_ring_pages; 681 + } 682 + 683 + /* Page pool for SKB data areas (copy targets, not DMA buffers). */ 684 + intf->rx_page_pool = bcmasp_rx_page_pool_create(intf); 685 + if (IS_ERR(intf->rx_page_pool)) { 686 + ret = PTR_ERR(intf->rx_page_pool); 687 + intf->rx_page_pool = NULL; 688 + goto free_ring_dma; 689 + } 690 + 691 + return 0; 692 + 693 + free_ring_dma: 694 + dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE, 695 + DMA_FROM_DEVICE); 696 + free_ring_pages: 697 + __free_pages(buffer_pg, intf->rx_buf_order); 698 + return ret; 699 + } 700 + 701 + static void bcmasp_reclaim_rx_buffers(struct bcmasp_intf *intf) 702 + { 703 + struct device *kdev = &intf->parent->pdev->dev; 704 + 705 + page_pool_destroy(intf->rx_page_pool); 706 + intf->rx_page_pool = NULL; 707 + dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE, 708 + DMA_FROM_DEVICE); 709 + __free_pages(virt_to_page(intf->rx_ring_cpu), intf->rx_buf_order); 710 + } 711 + 712 + static int bcmasp_alloc_buffers(struct bcmasp_intf *intf) 713 + { 714 + struct device *kdev = &intf->parent->pdev->dev; 715 + int ret; 716 + 717 + /* Alloc RX */ 718 + ret = bcmasp_alloc_rx_buffers(intf); 719 + if (ret) 720 + return ret; 724 721 725 722 intf->rx_edpkt_cpu = dma_alloc_coherent(kdev, DESC_RING_SIZE, 726 - &intf->rx_edpkt_dma_addr, GFP_KERNEL); 723 + &intf->rx_edpkt_dma_addr, 724 + GFP_KERNEL); 727 725 if (!intf->rx_edpkt_cpu) 728 - goto free_rx_buffer_dma; 726 + goto free_rx_buffers; 729 727 730 728 /* Alloc TX */ 731 729 intf->tx_spb_cpu = dma_alloc_coherent(kdev, DESC_RING_SIZE, ··· 787 701 free_rx_edpkt_dma: 788 702 dma_free_coherent(kdev, DESC_RING_SIZE, intf->rx_edpkt_cpu, 789 703 intf->rx_edpkt_dma_addr); 790 - free_rx_buffer_dma: 791 - dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE, 792 - DMA_FROM_DEVICE); 793 - free_rx_buffer: 794 - __free_pages(buffer_pg, intf->rx_buf_order); 704 + free_rx_buffers: 705 + bcmasp_reclaim_rx_buffers(intf); 795 706 796 707 return -ENOMEM; 797 708 } ··· 800 717 /* RX buffers */ 801 718 dma_free_coherent(kdev, DESC_RING_SIZE, intf->rx_edpkt_cpu, 802 719 intf->rx_edpkt_dma_addr); 803 - dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE, 804 - DMA_FROM_DEVICE); 805 - __free_pages(virt_to_page(intf->rx_ring_cpu), intf->rx_buf_order); 720 + bcmasp_reclaim_rx_buffers(intf); 806 721 807 722 /* TX buffers */ 808 723 dma_free_coherent(kdev, DESC_RING_SIZE, intf->tx_spb_cpu, ··· 819 738 /* Make sure channels are disabled */ 820 739 rx_edpkt_cfg_wl(intf, 0x0, RX_EDPKT_CFG_ENABLE); 821 740 822 - /* Rx SPB */ 741 + /* Streaming data ring: hardware writes raw packet bytes here. */ 823 742 rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_READ); 824 743 rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_WRITE); 825 744 rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_BASE); ··· 828 747 rx_edpkt_cfg_wq(intf, intf->rx_ring_dma_valid, 829 748 RX_EDPKT_RING_BUFFER_VALID); 830 749 831 - /* EDPKT */ 750 + /* EDPKT descriptor ring: hardware fills descriptors pointing into 751 + * the streaming ring buffer above (RBUF_4K mode). 752 + */ 832 753 rx_edpkt_cfg_wl(intf, (RX_EDPKT_CFG_CFG0_RBUF_4K << 833 754 RX_EDPKT_CFG_CFG0_DBUF_SHIFT) | 834 755 (RX_EDPKT_CFG_CFG0_64_ALN <<
+4
drivers/net/ethernet/broadcom/asp2/bcmasp_intf_defs.h
··· 246 246 ((((intf)->channel - 6) * 0x14) + 0xa2000) 247 247 #define RX_SPB_TOP_BLKOUT 0x00 248 248 249 + /* 250 + * Number of 4 KB pages that make up the contiguous RBUF_4K streaming ring 251 + * and the page pool used as copy-target SKB data areas. 252 + */ 249 253 #define NUM_4K_BUFFERS 32 250 254 #define RING_BUFFER_SIZE (PAGE_SIZE * NUM_4K_BUFFERS) 251 255