Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

net: ti: icssg_prueth: Add SW TX / RX Coalescing based on hrtimers

Add SW IRQ coalescing based on hrtimers for RX and TX data path for ICSSG
driver, which can be enabled by ethtool commands:

- RX coalescing
ethtool -C eth1 rx-usecs 50

- TX coalescing can be enabled per TX queue

- by default enables coalescing for TX0
ethtool -C eth1 tx-usecs 50
- configure TX0
ethtool -Q eth0 queue_mask 1 --coalesce tx-usecs 100
- configure TX1
ethtool -Q eth0 queue_mask 2 --coalesce tx-usecs 100
- configure TX0 and TX1
ethtool -Q eth0 queue_mask 3 --coalesce tx-usecs 100 --coalesce
tx-usecs 100

Minimum value for both rx-usecs and tx-usecs is 20us.

Compared to gro_flush_timeout and napi_defer_hard_irqs this patch allows
to enable IRQ coalescing for RX path separately.

Benchmarking numbers:
===============================================================
| Method | Tput_TX | CPU_TX | Tput_RX | CPU_RX |
| ==============================================================
| Default Driver 943 Mbps 31% 517 Mbps 38% |
| IRQ Coalescing (Patch) 943 Mbps 28% 518 Mbps 25% |
===============================================================

Signed-off-by: MD Danish Anwar <danishanwar@ti.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://lore.kernel.org/r/20240430120634.1558998-1-danishanwar@ti.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

MD Danish Anwar and committed by
Jakub Kicinski
dcb3fba6 5165c48e

+155 -8
+35 -6
drivers/net/ethernet/ti/icssg/icssg_common.c
··· 122 122 } 123 123 124 124 int emac_tx_complete_packets(struct prueth_emac *emac, int chn, 125 - int budget) 125 + int budget, bool *tdown) 126 126 { 127 127 struct net_device *ndev = emac->ndev; 128 128 struct cppi5_host_desc_t *desc_tx; ··· 145 145 if (cppi5_desc_is_tdcm(desc_dma)) { 146 146 if (atomic_dec_and_test(&emac->tdown_cnt)) 147 147 complete(&emac->tdown_complete); 148 + *tdown = true; 148 149 break; 149 150 } 150 151 ··· 191 190 return num_tx; 192 191 } 193 192 193 + static enum hrtimer_restart emac_tx_timer_callback(struct hrtimer *timer) 194 + { 195 + struct prueth_tx_chn *tx_chns = 196 + container_of(timer, struct prueth_tx_chn, tx_hrtimer); 197 + 198 + enable_irq(tx_chns->irq); 199 + return HRTIMER_NORESTART; 200 + } 201 + 194 202 static int emac_napi_tx_poll(struct napi_struct *napi_tx, int budget) 195 203 { 196 204 struct prueth_tx_chn *tx_chn = prueth_napi_to_tx_chn(napi_tx); 197 205 struct prueth_emac *emac = tx_chn->emac; 206 + bool tdown = false; 198 207 int num_tx_packets; 199 208 200 - num_tx_packets = emac_tx_complete_packets(emac, tx_chn->id, budget); 209 + num_tx_packets = emac_tx_complete_packets(emac, tx_chn->id, budget, 210 + &tdown); 201 211 202 212 if (num_tx_packets >= budget) 203 213 return budget; 204 214 205 - if (napi_complete_done(napi_tx, num_tx_packets)) 206 - enable_irq(tx_chn->irq); 215 + if (napi_complete_done(napi_tx, num_tx_packets)) { 216 + if (unlikely(tx_chn->tx_pace_timeout_ns && !tdown)) { 217 + hrtimer_start(&tx_chn->tx_hrtimer, 218 + ns_to_ktime(tx_chn->tx_pace_timeout_ns), 219 + HRTIMER_MODE_REL_PINNED); 220 + } else { 221 + enable_irq(tx_chn->irq); 222 + } 223 + } 207 224 208 225 return num_tx_packets; 209 226 } ··· 245 226 struct prueth_tx_chn *tx_chn = &emac->tx_chns[i]; 246 227 247 228 netif_napi_add_tx(emac->ndev, &tx_chn->napi_tx, emac_napi_tx_poll); 229 + hrtimer_init(&tx_chn->tx_hrtimer, CLOCK_MONOTONIC, 230 + HRTIMER_MODE_REL_PINNED); 231 + tx_chn->tx_hrtimer.function = &emac_tx_timer_callback; 248 232 ret = request_irq(tx_chn->irq, prueth_tx_irq, 249 233 IRQF_TRIGGER_HIGH, tx_chn->name, 250 234 tx_chn); ··· 893 871 break; 894 872 } 895 873 896 - if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) 897 - enable_irq(emac->rx_chns.irq[rx_flow]); 874 + if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) { 875 + if (unlikely(emac->rx_pace_timeout_ns)) { 876 + hrtimer_start(&emac->rx_hrtimer, 877 + ns_to_ktime(emac->rx_pace_timeout_ns), 878 + HRTIMER_MODE_REL_PINNED); 879 + } else { 880 + enable_irq(emac->rx_chns.irq[rx_flow]); 881 + } 882 + } 898 883 899 884 return num_rx; 900 885 }
+93
drivers/net/ethernet/ti/icssg/icssg_ethtool.c
··· 201 201 rmon_stats->hist_tx[4] = emac_get_stat_by_name(emac, "tx_bucket5_frames"); 202 202 } 203 203 204 + static int emac_get_coalesce(struct net_device *ndev, 205 + struct ethtool_coalesce *coal, 206 + struct kernel_ethtool_coalesce *kernel_coal, 207 + struct netlink_ext_ack *extack) 208 + { 209 + struct prueth_emac *emac = netdev_priv(ndev); 210 + struct prueth_tx_chn *tx_chn; 211 + 212 + tx_chn = &emac->tx_chns[0]; 213 + 214 + coal->rx_coalesce_usecs = emac->rx_pace_timeout_ns / 1000; 215 + coal->tx_coalesce_usecs = tx_chn->tx_pace_timeout_ns / 1000; 216 + 217 + return 0; 218 + } 219 + 220 + static int emac_get_per_queue_coalesce(struct net_device *ndev, u32 queue, 221 + struct ethtool_coalesce *coal) 222 + { 223 + struct prueth_emac *emac = netdev_priv(ndev); 224 + struct prueth_tx_chn *tx_chn; 225 + 226 + if (queue >= PRUETH_MAX_TX_QUEUES) 227 + return -EINVAL; 228 + 229 + tx_chn = &emac->tx_chns[queue]; 230 + 231 + coal->tx_coalesce_usecs = tx_chn->tx_pace_timeout_ns / 1000; 232 + 233 + return 0; 234 + } 235 + 236 + static int emac_set_coalesce(struct net_device *ndev, 237 + struct ethtool_coalesce *coal, 238 + struct kernel_ethtool_coalesce *kernel_coal, 239 + struct netlink_ext_ack *extack) 240 + { 241 + struct prueth_emac *emac = netdev_priv(ndev); 242 + struct prueth *prueth = emac->prueth; 243 + struct prueth_tx_chn *tx_chn; 244 + 245 + tx_chn = &emac->tx_chns[0]; 246 + 247 + if (coal->rx_coalesce_usecs && 248 + coal->rx_coalesce_usecs < ICSSG_MIN_COALESCE_USECS) { 249 + dev_info(prueth->dev, "defaulting to min value of %dus for rx-usecs\n", 250 + ICSSG_MIN_COALESCE_USECS); 251 + coal->rx_coalesce_usecs = ICSSG_MIN_COALESCE_USECS; 252 + } 253 + 254 + if (coal->tx_coalesce_usecs && 255 + coal->tx_coalesce_usecs < ICSSG_MIN_COALESCE_USECS) { 256 + dev_info(prueth->dev, "defaulting to min value of %dus for tx-usecs\n", 257 + ICSSG_MIN_COALESCE_USECS); 258 + coal->tx_coalesce_usecs = ICSSG_MIN_COALESCE_USECS; 259 + } 260 + 261 + emac->rx_pace_timeout_ns = coal->rx_coalesce_usecs * 1000; 262 + tx_chn->tx_pace_timeout_ns = coal->tx_coalesce_usecs * 1000; 263 + 264 + return 0; 265 + } 266 + 267 + static int emac_set_per_queue_coalesce(struct net_device *ndev, u32 queue, 268 + struct ethtool_coalesce *coal) 269 + { 270 + struct prueth_emac *emac = netdev_priv(ndev); 271 + struct prueth *prueth = emac->prueth; 272 + struct prueth_tx_chn *tx_chn; 273 + 274 + if (queue >= PRUETH_MAX_TX_QUEUES) 275 + return -EINVAL; 276 + 277 + tx_chn = &emac->tx_chns[queue]; 278 + 279 + if (coal->tx_coalesce_usecs && 280 + coal->tx_coalesce_usecs < ICSSG_MIN_COALESCE_USECS) { 281 + dev_info(prueth->dev, "defaulting to min value of %dus for tx-usecs for tx-%u\n", 282 + ICSSG_MIN_COALESCE_USECS, queue); 283 + coal->tx_coalesce_usecs = ICSSG_MIN_COALESCE_USECS; 284 + } 285 + 286 + tx_chn->tx_pace_timeout_ns = coal->tx_coalesce_usecs * 1000; 287 + 288 + return 0; 289 + } 290 + 204 291 const struct ethtool_ops icssg_ethtool_ops = { 205 292 .get_drvinfo = emac_get_drvinfo, 206 293 .get_msglevel = emac_get_msglevel, ··· 296 209 .get_ethtool_stats = emac_get_ethtool_stats, 297 210 .get_strings = emac_get_strings, 298 211 .get_ts_info = emac_get_ts_info, 212 + .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS | 213 + ETHTOOL_COALESCE_TX_USECS, 214 + .get_coalesce = emac_get_coalesce, 215 + .set_coalesce = emac_set_coalesce, 216 + .get_per_queue_coalesce = emac_get_per_queue_coalesce, 217 + .set_per_queue_coalesce = emac_set_per_queue_coalesce, 299 218 .get_channels = emac_get_channels, 300 219 .set_channels = emac_set_channels, 301 220 .get_link_ksettings = emac_get_link_ksettings,
+17 -1
drivers/net/ethernet/ti/icssg/icssg_prueth.c
··· 243 243 } 244 244 } 245 245 246 + static enum hrtimer_restart emac_rx_timer_callback(struct hrtimer *timer) 247 + { 248 + struct prueth_emac *emac = 249 + container_of(timer, struct prueth_emac, rx_hrtimer); 250 + int rx_flow = PRUETH_RX_FLOW_DATA; 251 + 252 + enable_irq(emac->rx_chns.irq[rx_flow]); 253 + return HRTIMER_NORESTART; 254 + } 255 + 246 256 static int emac_phy_connect(struct prueth_emac *emac) 247 257 { 248 258 struct prueth *prueth = emac->prueth; ··· 592 582 netdev_err(ndev, "tx teardown timeout\n"); 593 583 594 584 prueth_reset_tx_chan(emac, emac->tx_ch_num, true); 595 - for (i = 0; i < emac->tx_ch_num; i++) 585 + for (i = 0; i < emac->tx_ch_num; i++) { 596 586 napi_disable(&emac->tx_chns[i].napi_tx); 587 + hrtimer_cancel(&emac->tx_chns[i].tx_hrtimer); 588 + } 597 589 598 590 max_rx_flows = PRUETH_MAX_RX_FLOWS; 599 591 k3_udma_glue_tdown_rx_chn(emac->rx_chns.rx_chn, true); ··· 603 591 prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, true); 604 592 605 593 napi_disable(&emac->napi_rx); 594 + hrtimer_cancel(&emac->rx_hrtimer); 606 595 607 596 cancel_work_sync(&emac->rx_mode_work); 608 597 ··· 814 801 ndev->features = ndev->hw_features; 815 802 816 803 netif_napi_add(ndev, &emac->napi_rx, emac_napi_rx_poll); 804 + hrtimer_init(&emac->rx_hrtimer, CLOCK_MONOTONIC, 805 + HRTIMER_MODE_REL_PINNED); 806 + emac->rx_hrtimer.function = &emac_rx_timer_callback; 817 807 prueth->emac[mac] = emac; 818 808 819 809 return 0;
+10 -1
drivers/net/ethernet/ti/icssg/icssg_prueth.h
··· 108 108 u32 descs_num; 109 109 unsigned int irq; 110 110 char name[32]; 111 + struct hrtimer tx_hrtimer; 112 + unsigned long tx_pace_timeout_ns; 111 113 }; 112 114 113 115 struct prueth_rx_chn { ··· 128 126 #define PRUETH_MAX_TX_QUEUES 4 129 127 130 128 #define PRUETH_MAX_TX_TS_REQUESTS 50 /* Max simultaneous TX_TS requests */ 129 + 130 + /* Minimum coalesce time in usecs for both Tx and Rx */ 131 + #define ICSSG_MIN_COALESCE_USECS 20 131 132 132 133 /* data for each emac port */ 133 134 struct prueth_emac { ··· 188 183 189 184 struct delayed_work stats_work; 190 185 u64 stats[ICSSG_NUM_STATS]; 186 + 187 + /* RX IRQ Coalescing Related */ 188 + struct hrtimer rx_hrtimer; 189 + unsigned long rx_pace_timeout_ns; 191 190 }; 192 191 193 192 /** ··· 329 320 void prueth_xmit_free(struct prueth_tx_chn *tx_chn, 330 321 struct cppi5_host_desc_t *desc); 331 322 int emac_tx_complete_packets(struct prueth_emac *emac, int chn, 332 - int budget); 323 + int budget, bool *tdown); 333 324 int prueth_ndev_add_tx_napi(struct prueth_emac *emac); 334 325 int prueth_init_tx_chns(struct prueth_emac *emac); 335 326 int prueth_init_rx_chns(struct prueth_emac *emac,