Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'mlx4-add-support-for-netdev-genl-api'

Joe Damato says:

====================
mlx4: Add support for netdev-genl API

There are no functional changes from v5, which I mistakenly sent right
after net-next was closed (oops). This revision, however, includes
Tariq's Reviewed-by tags of the v5 in each commit message. See the
changelog below.

This series adds support to mlx4 for the netdev-genl API which makes it
much easier for users and user programs to map NAPI IDs back to
ifindexes, queues, and IRQs. This is extremely useful for a number of
use cases, including epoll-based busy poll.

In addition, this series includes a patch to generate per-queue
statistics using the netlink API, as well.

To facilitate the stats, patch 1/3 adds a field "alloc_fail" to the ring
structure. This is incremented by the driver in an appropriate place and
used in patch 3/3 as alloc_fail.

Please note: I do not have access to mlx4 hardware, but I've been
working closely with Martin Karsten from University of Waterloo (CC'd)
who has very graciously tested my patches on their mlx4 hardware (hence
his Tested-by attribution in each commit). His latest research work is
particularly interesting [1] and this series helps to support that (and
future) work.

Martin re-test v4 using Jakub's suggested tool [2] and the
stats.pkt_byte_sum and stats.qstat_by_ifindex tests passed. He also
adjusted the queue count and re-ran test to confirm it still passed even
if the queue count was modified.

[1]: https://dl.acm.org/doi/pdf/10.1145/3626780
[2]: https://lore.kernel.org/lkml/20240423175718.4ad4dc5a@kernel.org/
====================

Link: https://lore.kernel.org/r/20240528181139.515070-1-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+93 -1
+14
drivers/net/ethernet/mellanox/mlx4/en_cq.c
··· 126 126 cq_idx = cq_idx % priv->rx_ring_num; 127 127 rx_cq = priv->rx_cq[cq_idx]; 128 128 cq->vector = rx_cq->vector; 129 + irq = mlx4_eq_get_irq(mdev->dev, cq->vector); 129 130 } 130 131 131 132 if (cq->type == RX) ··· 143 142 if (err) 144 143 goto free_eq; 145 144 145 + cq->cq_idx = cq_idx; 146 146 cq->mcq.event = mlx4_en_cq_event; 147 147 148 148 switch (cq->type) { 149 149 case TX: 150 150 cq->mcq.comp = mlx4_en_tx_irq; 151 151 netif_napi_add_tx(cq->dev, &cq->napi, mlx4_en_poll_tx_cq); 152 + netif_napi_set_irq(&cq->napi, irq); 152 153 napi_enable(&cq->napi); 154 + netif_queue_set_napi(cq->dev, cq_idx, NETDEV_QUEUE_TYPE_TX, &cq->napi); 153 155 break; 154 156 case RX: 155 157 cq->mcq.comp = mlx4_en_rx_irq; 156 158 netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq); 159 + netif_napi_set_irq(&cq->napi, irq); 157 160 napi_enable(&cq->napi); 161 + netif_queue_set_napi(cq->dev, cq_idx, NETDEV_QUEUE_TYPE_RX, &cq->napi); 158 162 break; 159 163 case TX_XDP: 160 164 /* nothing regarding napi, it's shared with rx ring */ ··· 195 189 void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) 196 190 { 197 191 if (cq->type != TX_XDP) { 192 + enum netdev_queue_type qtype; 193 + 194 + if (cq->type == RX) 195 + qtype = NETDEV_QUEUE_TYPE_RX; 196 + else 197 + qtype = NETDEV_QUEUE_TYPE_TX; 198 + 199 + netif_queue_set_napi(cq->dev, cq->cq_idx, qtype, NULL); 198 200 napi_disable(&cq->napi); 199 201 netif_napi_del(&cq->napi); 200 202 }
+74
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
··· 43 43 #include <net/vxlan.h> 44 44 #include <net/devlink.h> 45 45 #include <net/rps.h> 46 + #include <net/netdev_queues.h> 46 47 47 48 #include <linux/mlx4/driver.h> 48 49 #include <linux/mlx4/device.h> ··· 2074 2073 priv->rx_ring[i]->csum_ok = 0; 2075 2074 priv->rx_ring[i]->csum_none = 0; 2076 2075 priv->rx_ring[i]->csum_complete = 0; 2076 + priv->rx_ring[i]->alloc_fail = 0; 2077 2077 } 2078 2078 } 2079 2079 ··· 3101 3099 last_i += NUM_PHY_STATS; 3102 3100 } 3103 3101 3102 + static void mlx4_get_queue_stats_rx(struct net_device *dev, int i, 3103 + struct netdev_queue_stats_rx *stats) 3104 + { 3105 + struct mlx4_en_priv *priv = netdev_priv(dev); 3106 + const struct mlx4_en_rx_ring *ring; 3107 + 3108 + spin_lock_bh(&priv->stats_lock); 3109 + 3110 + if (!priv->port_up || mlx4_is_master(priv->mdev->dev)) 3111 + goto out_unlock; 3112 + 3113 + ring = priv->rx_ring[i]; 3114 + stats->packets = READ_ONCE(ring->packets); 3115 + stats->bytes = READ_ONCE(ring->bytes); 3116 + stats->alloc_fail = READ_ONCE(ring->alloc_fail); 3117 + 3118 + out_unlock: 3119 + spin_unlock_bh(&priv->stats_lock); 3120 + } 3121 + 3122 + static void mlx4_get_queue_stats_tx(struct net_device *dev, int i, 3123 + struct netdev_queue_stats_tx *stats) 3124 + { 3125 + struct mlx4_en_priv *priv = netdev_priv(dev); 3126 + const struct mlx4_en_tx_ring *ring; 3127 + 3128 + spin_lock_bh(&priv->stats_lock); 3129 + 3130 + if (!priv->port_up || mlx4_is_master(priv->mdev->dev)) 3131 + goto out_unlock; 3132 + 3133 + ring = priv->tx_ring[TX][i]; 3134 + stats->packets = READ_ONCE(ring->packets); 3135 + stats->bytes = READ_ONCE(ring->bytes); 3136 + 3137 + out_unlock: 3138 + spin_unlock_bh(&priv->stats_lock); 3139 + } 3140 + 3141 + static void mlx4_get_base_stats(struct net_device *dev, 3142 + struct netdev_queue_stats_rx *rx, 3143 + struct netdev_queue_stats_tx *tx) 3144 + { 3145 + struct mlx4_en_priv *priv = netdev_priv(dev); 3146 + 3147 + spin_lock_bh(&priv->stats_lock); 3148 + 3149 + if (!priv->port_up || mlx4_is_master(priv->mdev->dev)) 3150 + goto out_unlock; 3151 + 3152 + if (priv->rx_ring_num) { 3153 + rx->packets = 0; 3154 + rx->bytes = 0; 3155 + rx->alloc_fail = 0; 3156 + } 3157 + 3158 + if (priv->tx_ring_num[TX]) { 3159 + tx->packets = 0; 3160 + tx->bytes = 0; 3161 + } 3162 + 3163 + out_unlock: 3164 + spin_unlock_bh(&priv->stats_lock); 3165 + } 3166 + 3167 + static const struct netdev_stat_ops mlx4_stat_ops = { 3168 + .get_queue_stats_rx = mlx4_get_queue_stats_rx, 3169 + .get_queue_stats_tx = mlx4_get_queue_stats_tx, 3170 + .get_base_stats = mlx4_get_base_stats, 3171 + }; 3172 + 3104 3173 int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, 3105 3174 struct mlx4_en_port_profile *prof) 3106 3175 { ··· 3335 3262 netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]); 3336 3263 netif_set_real_num_rx_queues(dev, priv->rx_ring_num); 3337 3264 3265 + dev->stat_ops = &mlx4_stat_ops; 3338 3266 dev->ethtool_ops = &mlx4_en_ethtool_ops; 3339 3267 3340 3268 /*
+3 -1
drivers/net/ethernet/mellanox/mlx4/en_rx.c
··· 82 82 83 83 for (i = 0; i < priv->num_frags; i++, frags++) { 84 84 if (!frags->page) { 85 - if (mlx4_alloc_page(priv, frags, gfp)) 85 + if (mlx4_alloc_page(priv, frags, gfp)) { 86 + ring->alloc_fail++; 86 87 return -ENOMEM; 88 + } 87 89 ring->rx_alloc_pages++; 88 90 } 89 91 rx_desc->data[i].addr = cpu_to_be64(frags->dma +
+2
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
··· 355 355 unsigned long xdp_tx; 356 356 unsigned long xdp_tx_full; 357 357 unsigned long dropped; 358 + unsigned long alloc_fail; 358 359 int hwtstamp_rx_filter; 359 360 cpumask_var_t affinity_mask; 360 361 struct xdp_rxq_info xdp_rxq; ··· 380 379 #define MLX4_EN_OPCODE_ERROR 0x1e 381 380 382 381 const struct cpumask *aff_mask; 382 + int cq_idx; 383 383 }; 384 384 385 385 struct mlx4_en_port_profile {