Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

net: mana: Implement ndo_tx_timeout and serialize queue resets per port.

Implement .ndo_tx_timeout for MANA so any stalled TX queue can be detected
and a device-controlled port reset for all queues can be scheduled to a
ordered workqueue. The reset for all queues on stall detection is
recomended by hardware team.

Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
Link: https://patch.msgid.link/20260112130552.GA11785@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Dipayaan Roy and committed by
Jakub Kicinski
3b194343 969994f0

+84 -3
+76 -1
drivers/net/ethernet/microsoft/mana/mana_en.c
··· 299 299 return gso_hs; 300 300 } 301 301 302 + static void mana_per_port_queue_reset_work_handler(struct work_struct *work) 303 + { 304 + struct mana_port_context *apc = container_of(work, 305 + struct mana_port_context, 306 + queue_reset_work); 307 + struct net_device *ndev = apc->ndev; 308 + int err; 309 + 310 + rtnl_lock(); 311 + 312 + /* Pre-allocate buffers to prevent failure in mana_attach later */ 313 + err = mana_pre_alloc_rxbufs(apc, ndev->mtu, apc->num_queues); 314 + if (err) { 315 + netdev_err(ndev, "Insufficient memory for reset post tx stall detection\n"); 316 + goto out; 317 + } 318 + 319 + err = mana_detach(ndev, false); 320 + if (err) { 321 + netdev_err(ndev, "mana_detach failed: %d\n", err); 322 + goto dealloc_pre_rxbufs; 323 + } 324 + 325 + err = mana_attach(ndev); 326 + if (err) 327 + netdev_err(ndev, "mana_attach failed: %d\n", err); 328 + 329 + dealloc_pre_rxbufs: 330 + mana_pre_dealloc_rxbufs(apc); 331 + out: 332 + rtnl_unlock(); 333 + } 334 + 302 335 netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) 303 336 { 304 337 enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT; ··· 872 839 return err; 873 840 } 874 841 842 + static void mana_tx_timeout(struct net_device *netdev, unsigned int txqueue) 843 + { 844 + struct mana_port_context *apc = netdev_priv(netdev); 845 + struct mana_context *ac = apc->ac; 846 + struct gdma_context *gc = ac->gdma_dev->gdma_context; 847 + 848 + /* Already in service, hence tx queue reset is not required.*/ 849 + if (gc->in_service) 850 + return; 851 + 852 + /* Note: If there are pending queue reset work for this port(apc), 853 + * subsequent request queued up from here are ignored. This is because 854 + * we are using the same work instance per port(apc). 855 + */ 856 + queue_work(ac->per_port_queue_reset_wq, &apc->queue_reset_work); 857 + } 858 + 875 859 static int mana_shaper_set(struct net_shaper_binding *binding, 876 860 const struct net_shaper *shaper, 877 861 struct netlink_ext_ack *extack) ··· 974 924 .ndo_bpf = mana_bpf, 975 925 .ndo_xdp_xmit = mana_xdp_xmit, 976 926 .ndo_change_mtu = mana_change_mtu, 927 + .ndo_tx_timeout = mana_tx_timeout, 977 928 .net_shaper_ops = &mana_shaper_ops, 978 929 }; 979 930 ··· 3338 3287 ndev->min_mtu = ETH_MIN_MTU; 3339 3288 ndev->needed_headroom = MANA_HEADROOM; 3340 3289 ndev->dev_port = port_idx; 3290 + /* Recommended timeout based on HW FPGA re-config scenario. */ 3291 + ndev->watchdog_timeo = 15 * HZ; 3341 3292 SET_NETDEV_DEV(ndev, gc->dev); 3342 3293 3343 3294 netif_set_tso_max_size(ndev, GSO_MAX_SIZE); ··· 3355 3302 err = mana_rss_table_alloc(apc); 3356 3303 if (err) 3357 3304 goto reset_apc; 3305 + 3306 + /* Initialize the per port queue reset work.*/ 3307 + INIT_WORK(&apc->queue_reset_work, 3308 + mana_per_port_queue_reset_work_handler); 3358 3309 3359 3310 netdev_lockdep_set_classes(ndev); 3360 3311 ··· 3549 3492 { 3550 3493 struct gdma_context *gc = gd->gdma_context; 3551 3494 struct mana_context *ac = gd->driver_data; 3495 + struct mana_port_context *apc = NULL; 3552 3496 struct device *dev = gc->dev; 3553 3497 u8 bm_hostmode = 0; 3554 3498 u16 num_ports = 0; ··· 3607 3549 if (ac->num_ports > MAX_PORTS_IN_MANA_DEV) 3608 3550 ac->num_ports = MAX_PORTS_IN_MANA_DEV; 3609 3551 3552 + ac->per_port_queue_reset_wq = 3553 + create_singlethread_workqueue("mana_per_port_queue_reset_wq"); 3554 + if (!ac->per_port_queue_reset_wq) { 3555 + dev_err(dev, "Failed to allocate per port queue reset workqueue\n"); 3556 + err = -ENOMEM; 3557 + goto out; 3558 + } 3559 + 3610 3560 if (!resuming) { 3611 3561 for (i = 0; i < ac->num_ports; i++) { 3612 3562 err = mana_probe_port(ac, i, &ac->ports[i]); ··· 3631 3565 } else { 3632 3566 for (i = 0; i < ac->num_ports; i++) { 3633 3567 rtnl_lock(); 3568 + apc = netdev_priv(ac->ports[i]); 3569 + enable_work(&apc->queue_reset_work); 3634 3570 err = mana_attach(ac->ports[i]); 3635 3571 rtnl_unlock(); 3636 3572 /* we log the port for which the attach failed and stop ··· 3684 3616 3685 3617 for (i = 0; i < ac->num_ports; i++) { 3686 3618 ndev = ac->ports[i]; 3687 - apc = netdev_priv(ndev); 3688 3619 if (!ndev) { 3689 3620 if (i == 0) 3690 3621 dev_err(dev, "No net device to remove\n"); 3691 3622 goto out; 3692 3623 } 3624 + 3625 + apc = netdev_priv(ndev); 3626 + disable_work_sync(&apc->queue_reset_work); 3693 3627 3694 3628 /* All cleanup actions should stay after rtnl_lock(), otherwise 3695 3629 * other functions may access partially cleaned up data. ··· 3719 3649 3720 3650 mana_destroy_eq(ac); 3721 3651 out: 3652 + if (ac->per_port_queue_reset_wq) { 3653 + destroy_workqueue(ac->per_port_queue_reset_wq); 3654 + ac->per_port_queue_reset_wq = NULL; 3655 + } 3656 + 3722 3657 mana_gd_deregister_device(gd); 3723 3658 3724 3659 if (suspending)
+6 -1
include/net/mana/gdma.h
··· 598 598 599 599 /* Driver can self reset on FPGA Reconfig EQE notification */ 600 600 #define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17) 601 + 602 + /* Driver detects stalled send queues and recovers them */ 603 + #define GDMA_DRV_CAP_FLAG_1_HANDLE_STALL_SQ_RECOVERY BIT(18) 604 + 601 605 #define GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE BIT(6) 602 606 603 607 /* Driver supports linearizing the skb when num_sge exceeds hardware limit */ ··· 625 621 GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \ 626 622 GDMA_DRV_CAP_FLAG_1_PERIODIC_STATS_QUERY | \ 627 623 GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE | \ 628 - GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY) 624 + GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY | \ 625 + GDMA_DRV_CAP_FLAG_1_HANDLE_STALL_SQ_RECOVERY) 629 626 630 627 #define GDMA_DRV_CAP_FLAGS2 0 631 628
+2 -1
include/net/mana/mana.h
··· 480 480 struct mana_ethtool_hc_stats hc_stats; 481 481 struct mana_eq *eqs; 482 482 struct dentry *mana_eqs_debugfs; 483 - 483 + struct workqueue_struct *per_port_queue_reset_wq; 484 484 /* Workqueue for querying hardware stats */ 485 485 struct delayed_work gf_stats_work; 486 486 bool hwc_timeout_occurred; ··· 495 495 struct mana_port_context { 496 496 struct mana_context *ac; 497 497 struct net_device *ndev; 498 + struct work_struct queue_reset_work; 498 499 499 500 u8 mac_addr[ETH_ALEN]; 500 501