Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'net-stats-tools-driver-tests-for-hw-gro'

Jakub Kicinski says:

====================
net: stats, tools, driver tests for HW GRO [part]

Add miscellaneous pieces related to production use of HW-GRO:
- report standard stats from drivers (bnxt included here,
Gal recently posted patches for mlx5 which is great)
- CLI tool for calculating HW GRO savings / effectiveness
====================

Link: https://patch.msgid.link/20260207003509.3927744-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+131 -61
+13 -2
drivers/net/ethernet/broadcom/bnxt/bnxt.c
··· 1801 1801 struct bnxt_tpa_info *tpa_info, 1802 1802 struct rx_tpa_end_cmp *tpa_end, 1803 1803 struct rx_tpa_end_cmp_ext *tpa_end1, 1804 - struct sk_buff *skb) 1804 + struct sk_buff *skb, 1805 + struct bnxt_rx_sw_stats *rx_stats) 1805 1806 { 1806 1807 #ifdef CONFIG_INET 1807 1808 int payload_off; ··· 1811 1810 segs = TPA_END_TPA_SEGS(tpa_end); 1812 1811 if (segs == 1) 1813 1812 return skb; 1813 + 1814 + rx_stats->rx_hw_gro_packets++; 1815 + rx_stats->rx_hw_gro_wire_packets += segs; 1814 1816 1815 1817 NAPI_GRO_CB(skb)->count = segs; 1816 1818 skb_shinfo(skb)->gso_size = ··· 1988 1984 } 1989 1985 1990 1986 if (gro) 1991 - skb = bnxt_gro_skb(bp, tpa_info, tpa_end, tpa_end1, skb); 1987 + skb = bnxt_gro_skb(bp, tpa_info, tpa_end, tpa_end1, skb, 1988 + &cpr->sw_stats->rx); 1992 1989 1993 1990 return skb; 1994 1991 } ··· 13494 13489 stats->rx_total_netpoll_discards += sw_stats->rx.rx_netpoll_discards; 13495 13490 stats->rx_total_ring_discards += 13496 13491 BNXT_GET_RING_STATS64(hw_stats, rx_discard_pkts); 13492 + stats->rx_total_hw_gro_packets += sw_stats->rx.rx_hw_gro_packets; 13493 + stats->rx_total_hw_gro_wire_packets += sw_stats->rx.rx_hw_gro_wire_packets; 13497 13494 stats->tx_total_resets += sw_stats->tx.tx_resets; 13498 13495 stats->tx_total_ring_discards += 13499 13496 BNXT_GET_RING_STATS64(hw_stats, tx_discard_pkts); ··· 15917 15910 stats->bytes += BNXT_GET_RING_STATS64(sw, rx_bcast_bytes); 15918 15911 15919 15912 stats->alloc_fail = cpr->sw_stats->rx.rx_oom_discards; 15913 + stats->hw_gro_packets = cpr->sw_stats->rx.rx_hw_gro_packets; 15914 + stats->hw_gro_wire_packets = cpr->sw_stats->rx.rx_hw_gro_wire_packets; 15920 15915 } 15921 15916 15922 15917 static void bnxt_get_queue_stats_tx(struct net_device *dev, int i, ··· 15954 15945 rx->packets = bp->net_stats_prev.rx_packets; 15955 15946 rx->bytes = bp->net_stats_prev.rx_bytes; 15956 15947 rx->alloc_fail = bp->ring_err_stats_prev.rx_total_oom_discards; 15948 + rx->hw_gro_packets = bp->ring_err_stats_prev.rx_total_hw_gro_packets; 15949 + rx->hw_gro_wire_packets = bp->ring_err_stats_prev.rx_total_hw_gro_wire_packets; 15957 15950 15958 15951 tx->packets = bp->net_stats_prev.tx_packets; 15959 15952 tx->bytes = bp->net_stats_prev.tx_bytes;
+6
drivers/net/ethernet/broadcom/bnxt/bnxt.h
··· 1126 1126 u64 rx_l4_csum_errors; 1127 1127 u64 rx_resets; 1128 1128 u64 rx_buf_errors; 1129 + /* end of ethtool -S stats */ 1129 1130 u64 rx_oom_discards; 1130 1131 u64 rx_netpoll_discards; 1132 + u64 rx_hw_gro_packets; 1133 + u64 rx_hw_gro_wire_packets; 1131 1134 }; 1132 1135 1133 1136 struct bnxt_tx_sw_stats { ··· 1157 1154 u64 tx_total_resets; 1158 1155 u64 tx_total_ring_discards; 1159 1156 u64 total_missed_irqs; 1157 + /* end of ethtool -S stats */ 1158 + u64 rx_total_hw_gro_packets; 1159 + u64 rx_total_hw_gro_wire_packets; 1160 1160 }; 1161 1161 1162 1162 struct bnxt_stats_mem {
+112 -59
tools/net/ynl/ynltool/qstats.c
··· 237 237 } 238 238 } 239 239 240 - static int do_show(int argc, char **argv) 240 + static struct netdev_qstats_get_list * 241 + qstats_dump(enum netdev_qstats_scope scope) 241 242 { 242 243 struct netdev_qstats_get_list *qstats; 243 244 struct netdev_qstats_get_req *req; 244 245 struct ynl_error yerr; 245 246 struct ynl_sock *ys; 246 - int ret = 0; 247 + 248 + ys = ynl_sock_create(&ynl_netdev_family, &yerr); 249 + if (!ys) { 250 + p_err("YNL: %s", yerr.msg); 251 + return NULL; 252 + } 253 + 254 + req = netdev_qstats_get_req_alloc(); 255 + if (!req) { 256 + p_err("failed to allocate qstats request"); 257 + goto err_close; 258 + } 259 + 260 + if (scope) 261 + netdev_qstats_get_req_set_scope(req, scope); 262 + 263 + qstats = netdev_qstats_get_dump(ys, req); 264 + netdev_qstats_get_req_free(req); 265 + if (!qstats) { 266 + p_err("failed to get queue stats: %s", ys->err.msg); 267 + goto err_close; 268 + } 269 + 270 + ynl_sock_destroy(ys); 271 + return qstats; 272 + 273 + err_close: 274 + ynl_sock_destroy(ys); 275 + return NULL; 276 + } 277 + 278 + static int do_show(int argc, char **argv) 279 + { 280 + struct netdev_qstats_get_list *qstats; 247 281 248 282 /* Parse options */ 249 283 while (argc > 0) { ··· 302 268 } 303 269 } 304 270 305 - ys = ynl_sock_create(&ynl_netdev_family, &yerr); 306 - if (!ys) { 307 - p_err("YNL: %s", yerr.msg); 271 + qstats = qstats_dump(scope); 272 + if (!qstats) 308 273 return -1; 309 - } 310 - 311 - req = netdev_qstats_get_req_alloc(); 312 - if (!req) { 313 - p_err("failed to allocate qstats request"); 314 - ret = -1; 315 - goto exit_close; 316 - } 317 - 318 - if (scope) 319 - netdev_qstats_get_req_set_scope(req, scope); 320 - 321 - qstats = netdev_qstats_get_dump(ys, req); 322 - netdev_qstats_get_req_free(req); 323 - if (!qstats) { 324 - p_err("failed to get queue stats: %s", ys->err.msg); 325 - ret = -1; 326 - goto exit_close; 327 - } 328 274 329 275 /* Print the stats as returned by the kernel */ 330 276 if (json_output) ··· 313 299 print_plain_qstats(qstats); 314 300 315 301 netdev_qstats_get_list_free(qstats); 316 - exit_close: 317 - ynl_sock_destroy(ys); 318 - return ret; 302 + return 0; 319 303 } 320 304 321 305 static void compute_stats(__u64 *values, unsigned int count, ··· 418 406 static int do_balance(int argc, char **argv __attribute__((unused))) 419 407 { 420 408 struct netdev_qstats_get_list *qstats; 421 - struct netdev_qstats_get_req *req; 422 409 struct netdev_qstats_get_rsp **sorted; 423 - struct ynl_error yerr; 424 - struct ynl_sock *ys; 425 410 unsigned int count = 0; 426 411 unsigned int i, j; 427 412 int ret = 0; ··· 428 419 return -1; 429 420 } 430 421 431 - ys = ynl_sock_create(&ynl_netdev_family, &yerr); 432 - if (!ys) { 433 - p_err("YNL: %s", yerr.msg); 422 + qstats = qstats_dump(NETDEV_QSTATS_SCOPE_QUEUE); 423 + if (!qstats) 434 424 return -1; 435 - } 436 - 437 - req = netdev_qstats_get_req_alloc(); 438 - if (!req) { 439 - p_err("failed to allocate qstats request"); 440 - ret = -1; 441 - goto exit_close; 442 - } 443 - 444 - /* Always use queue scope for balance analysis */ 445 - netdev_qstats_get_req_set_scope(req, NETDEV_QSTATS_SCOPE_QUEUE); 446 - 447 - qstats = netdev_qstats_get_dump(ys, req); 448 - netdev_qstats_get_req_free(req); 449 - if (!qstats) { 450 - p_err("failed to get queue stats: %s", ys->err.msg); 451 - ret = -1; 452 - goto exit_close; 453 - } 454 425 455 426 /* Count and sort queues */ 456 427 ynl_dump_foreach(qstats, qs) ··· 565 576 free(sorted); 566 577 exit_free_qstats: 567 578 netdev_qstats_get_list_free(qstats); 568 - exit_close: 569 - ynl_sock_destroy(ys); 570 579 return ret; 580 + } 581 + 582 + static int do_hw_gro(int argc, char **argv __attribute__((unused))) 583 + { 584 + struct netdev_qstats_get_list *qstats; 585 + 586 + if (argc > 0) { 587 + p_err("hw-gro command takes no arguments"); 588 + return -1; 589 + } 590 + 591 + qstats = qstats_dump(0); 592 + if (!qstats) 593 + return -1; 594 + 595 + if (json_output) 596 + jsonw_start_array(json_wtr); 597 + 598 + ynl_dump_foreach(qstats, qs) { 599 + char ifname[IF_NAMESIZE]; 600 + const char *name; 601 + double savings; 602 + 603 + if (!qs->_present.rx_packets || 604 + !qs->_present.rx_hw_gro_packets || 605 + !qs->_present.rx_hw_gro_wire_packets) 606 + continue; 607 + 608 + if (!qs->rx_packets) 609 + continue; 610 + 611 + /* How many skbs did we avoid allocating thanks to HW GRO */ 612 + savings = (double)(qs->rx_hw_gro_wire_packets - 613 + qs->rx_hw_gro_packets) / 614 + qs->rx_packets * 100.0; 615 + 616 + name = if_indextoname(qs->ifindex, ifname); 617 + 618 + if (json_output) { 619 + jsonw_start_object(json_wtr); 620 + jsonw_uint_field(json_wtr, "ifindex", qs->ifindex); 621 + if (name) 622 + jsonw_string_field(json_wtr, "ifname", name); 623 + jsonw_float_field(json_wtr, "savings", savings); 624 + jsonw_end_object(json_wtr); 625 + } else { 626 + if (name) 627 + printf("%s", name); 628 + else 629 + printf("ifindex:%u", qs->ifindex); 630 + printf(": %.1f%% savings\n", savings); 631 + } 632 + } 633 + 634 + if (json_output) 635 + jsonw_end_array(json_wtr); 636 + 637 + netdev_qstats_get_list_free(qstats); 638 + return 0; 571 639 } 572 640 573 641 static int do_help(int argc __attribute__((unused)), ··· 636 590 } 637 591 638 592 fprintf(stderr, 639 - "Usage: %s qstats { COMMAND | help }\n" 640 - " %s qstats [ show ] [ OPTIONS ]\n" 641 - " %s qstats balance\n" 593 + "Usage: %1$s qstats { COMMAND | help }\n" 594 + " %1$s qstats [ show ] [ OPTIONS ]\n" 595 + " %1$s qstats balance\n" 596 + " %1$s qstats hw-gro\n" 642 597 "\n" 643 598 " OPTIONS := { scope queue | group-by { device | queue } }\n" 644 599 "\n" ··· 648 601 " show scope queue - Display per-queue statistics\n" 649 602 " show group-by device - Display device-aggregated statistics (default)\n" 650 603 " show group-by queue - Display per-queue statistics\n" 651 - " balance - Analyze traffic distribution balance.\n" 604 + "\n" 605 + " Analysis:\n" 606 + " balance - Traffic distribution between queues.\n" 607 + " hw-gro - HW GRO effectiveness analysis\n" 608 + " - savings - delta between packets received\n" 609 + " on the wire and packets seen by the kernel.\n" 652 610 "", 653 - bin_name, bin_name, bin_name); 611 + bin_name); 654 612 655 613 return 0; 656 614 } ··· 663 611 static const struct cmd qstats_cmds[] = { 664 612 { "show", do_show }, 665 613 { "balance", do_balance }, 614 + { "hw-gro", do_hw_gro }, 666 615 { "help", do_help }, 667 616 { 0 } 668 617 };