Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'net-mana-enforce-tx-sge-limit-and-fix-error-cleanup'

Aditya Garg says:

====================
net: mana: Enforce TX SGE limit and fix error cleanup

Add pre-transmission checks to block SKBs that exceed the hardware's SGE
limit. Force software segmentation for GSO traffic and linearize non-GSO
packets as needed.

Update TX error handling to drop failed SKBs and unmap resources
immediately.
====================

Link: https://patch.msgid.link/1763464269-10431-1-git-send-email-gargaditya@linux.microsoft.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+53 -12
+1 -5
drivers/net/ethernet/microsoft/mana/gdma_main.c
··· 1300 1300 struct gdma_posted_wqe_info *wqe_info) 1301 1301 { 1302 1302 u32 client_oob_size = wqe_req->inline_oob_size; 1303 - struct gdma_context *gc; 1304 1303 u32 sgl_data_size; 1305 1304 u32 max_wqe_size; 1306 1305 u32 wqe_size; ··· 1329 1330 if (wqe_size > max_wqe_size) 1330 1331 return -EINVAL; 1331 1332 1332 - if (wq->monitor_avl_buf && wqe_size > mana_gd_wq_avail_space(wq)) { 1333 - gc = wq->gdma_dev->gdma_context; 1334 - dev_err(gc->dev, "unsuccessful flow control!\n"); 1333 + if (wq->monitor_avl_buf && wqe_size > mana_gd_wq_avail_space(wq)) 1335 1334 return -ENOSPC; 1336 - } 1337 1335 1338 1336 if (wqe_info) 1339 1337 wqe_info->wqe_size_in_bu = wqe_size / GDMA_WQE_BU_SIZE;
+41 -6
drivers/net/ethernet/microsoft/mana/mana_en.c
··· 11 11 #include <linux/mm.h> 12 12 #include <linux/pci.h> 13 13 #include <linux/export.h> 14 + #include <linux/skbuff.h> 14 15 15 16 #include <net/checksum.h> 16 17 #include <net/ip6_checksum.h> ··· 330 329 cq = &apc->tx_qp[txq_idx].tx_cq; 331 330 tx_stats = &txq->stats; 332 331 332 + BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES != MANA_MAX_TX_WQE_SGL_ENTRIES); 333 + if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES && 334 + skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) { 335 + /* GSO skb with Hardware SGE limit exceeded is not expected here 336 + * as they are handled in mana_features_check() callback 337 + */ 338 + if (skb_linearize(skb)) { 339 + netdev_warn_once(ndev, "Failed to linearize skb with nr_frags=%d and is_gso=%d\n", 340 + skb_shinfo(skb)->nr_frags, 341 + skb_is_gso(skb)); 342 + goto tx_drop_count; 343 + } 344 + apc->eth_stats.tx_linear_pkt_cnt++; 345 + } 346 + 333 347 pkg.tx_oob.s_oob.vcq_num = cq->gdma_id; 334 348 pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame; 335 349 ··· 458 442 } 459 443 } 460 444 461 - WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES); 462 - 463 445 if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) { 464 446 pkg.wqe_req.sgl = pkg.sgl_array; 465 447 } else { ··· 492 478 493 479 if (err) { 494 480 (void)skb_dequeue_tail(&txq->pending_skbs); 481 + mana_unmap_skb(skb, apc); 495 482 netdev_warn(ndev, "Failed to post TX OOB: %d\n", err); 496 - err = NETDEV_TX_BUSY; 497 - goto tx_busy; 483 + goto free_sgl_ptr; 498 484 } 499 485 500 486 err = NETDEV_TX_OK; ··· 514 500 tx_stats->bytes += len + ((num_gso_seg - 1) * gso_hs); 515 501 u64_stats_update_end(&tx_stats->syncp); 516 502 517 - tx_busy: 518 503 if (netif_tx_queue_stopped(net_txq) && mana_can_tx(gdma_sq)) { 519 504 netif_tx_wake_queue(net_txq); 520 505 apc->eth_stats.wake_queue++; ··· 530 517 dev_kfree_skb_any(skb); 531 518 return NETDEV_TX_OK; 532 519 } 520 + 521 + #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES) 522 + static netdev_features_t mana_features_check(struct sk_buff *skb, 523 + struct net_device *ndev, 524 + netdev_features_t features) 525 + { 526 + if (skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) { 527 + /* Exceeds HW SGE limit. 528 + * GSO case: 529 + * Disable GSO so the stack will software-segment the skb 530 + * into smaller skbs that fit the SGE budget. 531 + * Non-GSO case: 532 + * The xmit path will attempt skb_linearize() as a fallback. 533 + */ 534 + features &= ~NETIF_F_GSO_MASK; 535 + } 536 + return features; 537 + } 538 + #endif 533 539 534 540 static void mana_get_stats64(struct net_device *ndev, 535 541 struct rtnl_link_stats64 *st) ··· 915 883 .ndo_open = mana_open, 916 884 .ndo_stop = mana_close, 917 885 .ndo_select_queue = mana_select_queue, 886 + #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES) 887 + .ndo_features_check = mana_features_check, 888 + #endif 918 889 .ndo_start_xmit = mana_start_xmit, 919 890 .ndo_validate_addr = eth_validate_addr, 920 891 .ndo_get_stats64 = mana_get_stats64, ··· 1686 1651 return 0; 1687 1652 } 1688 1653 1689 - static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc) 1654 + void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc) 1690 1655 { 1691 1656 struct mana_skb_head *ash = (struct mana_skb_head *)skb->head; 1692 1657 struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
+2
drivers/net/ethernet/microsoft/mana/mana_ethtool.c
··· 18 18 {"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)}, 19 19 {"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats, 20 20 tx_cqe_unknown_type)}, 21 + {"tx_linear_pkt_cnt", offsetof(struct mana_ethtool_stats, 22 + tx_linear_pkt_cnt)}, 21 23 {"rx_coalesced_err", offsetof(struct mana_ethtool_stats, 22 24 rx_coalesced_err)}, 23 25 {"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
+7 -1
include/net/mana/gdma.h
··· 486 486 #define INLINE_OOB_SMALL_SIZE 8 487 487 #define INLINE_OOB_LARGE_SIZE 24 488 488 489 + #define MANA_MAX_TX_WQE_SGL_ENTRIES 30 490 + 489 491 #define MAX_TX_WQE_SIZE 512 490 492 #define MAX_RX_WQE_SIZE 256 491 493 ··· 594 592 #define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17) 595 593 #define GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE BIT(6) 596 594 595 + /* Driver supports linearizing the skb when num_sge exceeds hardware limit */ 596 + #define GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE BIT(20) 597 + 597 598 /* Driver can send HWC periodically to query stats */ 598 599 #define GDMA_DRV_CAP_FLAG_1_PERIODIC_STATS_QUERY BIT(21) 599 600 ··· 610 605 GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \ 611 606 GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \ 612 607 GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \ 613 - GDMA_DRV_CAP_FLAG_1_PERIODIC_STATS_QUERY) 608 + GDMA_DRV_CAP_FLAG_1_PERIODIC_STATS_QUERY | \ 609 + GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE) 614 610 615 611 #define GDMA_DRV_CAP_FLAGS2 0 616 612
+2
include/net/mana/mana.h
··· 377 377 u64 wake_queue; 378 378 u64 tx_cqe_err; 379 379 u64 tx_cqe_unknown_type; 380 + u64 tx_linear_pkt_cnt; 380 381 u64 rx_coalesced_err; 381 382 u64 rx_cqe_unknown_type; 382 383 }; ··· 593 592 void mana_query_phy_stats(struct mana_port_context *apc); 594 593 int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues); 595 594 void mana_pre_dealloc_rxbufs(struct mana_port_context *apc); 595 + void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc); 596 596 597 597 extern const struct ethtool_ops mana_ethtool_ops; 598 598 extern struct dentry *mana_debugfs_root;