Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

virtio_net: add support for Byte Queue Limits

Add support for Byte Queue Limits (BQL).

Tested on qemu emulated virtio_net device with 1, 2 and 4 queues.
Tested with fq_codel and pfifo_fast. Super netperf with 50 threads is
running in background. Netperf TCP_RR results:

NOBQL FQC 1q: 159.56 159.33 158.50 154.31 agv: 157.925
NOBQL FQC 2q: 184.64 184.96 174.73 174.15 agv: 179.62
NOBQL FQC 4q: 994.46 441.96 416.50 499.56 agv: 588.12
NOBQL PFF 1q: 148.68 148.92 145.95 149.48 agv: 148.2575
NOBQL PFF 2q: 171.86 171.20 170.42 169.42 agv: 170.725
NOBQL PFF 4q: 1505.23 1137.23 2488.70 3507.99 agv: 2159.7875
BQL FQC 1q: 1332.80 1297.97 1351.41 1147.57 agv: 1282.4375
BQL FQC 2q: 768.30 817.72 864.43 974.40 agv: 856.2125
BQL FQC 4q: 945.66 942.68 878.51 822.82 agv: 897.4175
BQL PFF 1q: 149.69 151.49 149.40 147.47 agv: 149.5125
BQL PFF 2q: 2059.32 798.74 1844.12 381.80 agv: 1270.995
BQL PFF 4q: 1871.98 4420.02 4916.59 13268.16 agv: 6119.1875

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Link: https://lore.kernel.org/r/20240618144456.1688998-1-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Jiri Pirko and committed by
Jakub Kicinski
c8bd1f7f 2b0cd6b7

+57 -24
+57 -24
drivers/net/virtio_net.c
··· 47 47 #define VIRTIO_XDP_TX BIT(0) 48 48 #define VIRTIO_XDP_REDIR BIT(1) 49 49 50 - #define VIRTIO_XDP_FLAG BIT(0) 50 + #define VIRTIO_XDP_FLAG BIT(0) 51 + #define VIRTIO_ORPHAN_FLAG BIT(1) 51 52 52 53 /* RX packet size EWMA. The average packet size is used to determine the packet 53 54 * buffer size when refilling RX rings. As the entire RX ring may be refilled ··· 86 85 struct virtnet_sq_free_stats { 87 86 u64 packets; 88 87 u64 bytes; 88 + u64 napi_packets; 89 + u64 napi_bytes; 89 90 }; 90 91 91 92 struct virtnet_sq_stats { ··· 509 506 return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); 510 507 } 511 508 512 - static void __free_old_xmit(struct send_queue *sq, bool in_napi, 513 - struct virtnet_sq_free_stats *stats) 509 + static bool is_orphan_skb(void *ptr) 510 + { 511 + return (unsigned long)ptr & VIRTIO_ORPHAN_FLAG; 512 + } 513 + 514 + static void *skb_to_ptr(struct sk_buff *skb, bool orphan) 515 + { 516 + return (void *)((unsigned long)skb | (orphan ? VIRTIO_ORPHAN_FLAG : 0)); 517 + } 518 + 519 + static struct sk_buff *ptr_to_skb(void *ptr) 520 + { 521 + return (struct sk_buff *)((unsigned long)ptr & ~VIRTIO_ORPHAN_FLAG); 522 + } 523 + 524 + static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 525 + bool in_napi, struct virtnet_sq_free_stats *stats) 514 526 { 515 527 unsigned int len; 516 528 void *ptr; 517 529 518 530 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 519 - ++stats->packets; 520 - 521 531 if (!is_xdp_frame(ptr)) { 522 - struct sk_buff *skb = ptr; 532 + struct sk_buff *skb = ptr_to_skb(ptr); 523 533 524 534 pr_debug("Sent skb %p\n", skb); 525 535 526 - stats->bytes += skb->len; 536 + if (is_orphan_skb(ptr)) { 537 + stats->packets++; 538 + stats->bytes += skb->len; 539 + } else { 540 + stats->napi_packets++; 541 + stats->napi_bytes += skb->len; 542 + } 527 543 napi_consume_skb(skb, in_napi); 528 544 } else { 529 545 struct xdp_frame *frame = ptr_to_xdp(ptr); 530 546 547 + stats->packets++; 531 548 stats->bytes += xdp_get_frame_len(frame); 532 549 xdp_return_frame(frame); 533 550 } 534 551 } 552 + netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes); 535 553 } 536 554 537 555 /* Converting between virtqueue no. and kernel tx/rx queue no. ··· 979 955 virtnet_rq_free_buf(vi, rq, buf); 980 956 } 981 957 982 - static void free_old_xmit(struct send_queue *sq, bool in_napi) 958 + static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 959 + bool in_napi) 983 960 { 984 961 struct virtnet_sq_free_stats stats = {0}; 985 962 986 - __free_old_xmit(sq, in_napi, &stats); 963 + __free_old_xmit(sq, txq, in_napi, &stats); 987 964 988 965 /* Avoid overhead when no packets have been processed 989 966 * happens when called speculatively from start_xmit. 990 967 */ 991 - if (!stats.packets) 968 + if (!stats.packets && !stats.napi_packets) 992 969 return; 993 970 994 971 u64_stats_update_begin(&sq->stats.syncp); 995 - u64_stats_add(&sq->stats.bytes, stats.bytes); 996 - u64_stats_add(&sq->stats.packets, stats.packets); 972 + u64_stats_add(&sq->stats.bytes, stats.bytes + stats.napi_bytes); 973 + u64_stats_add(&sq->stats.packets, stats.packets + stats.napi_packets); 997 974 u64_stats_update_end(&sq->stats.syncp); 998 975 } 999 976 ··· 1028 1003 * early means 16 slots are typically wasted. 1029 1004 */ 1030 1005 if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { 1031 - netif_stop_subqueue(dev, qnum); 1006 + struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1007 + 1008 + netif_tx_stop_queue(txq); 1032 1009 u64_stats_update_begin(&sq->stats.syncp); 1033 1010 u64_stats_inc(&sq->stats.stop); 1034 1011 u64_stats_update_end(&sq->stats.syncp); ··· 1039 1012 virtqueue_napi_schedule(&sq->napi, sq->vq); 1040 1013 } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { 1041 1014 /* More just got used, free them then recheck. */ 1042 - free_old_xmit(sq, false); 1015 + free_old_xmit(sq, txq, false); 1043 1016 if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { 1044 1017 netif_start_subqueue(dev, qnum); 1045 1018 u64_stats_update_begin(&sq->stats.syncp); ··· 1165 1138 } 1166 1139 1167 1140 /* Free up any pending old buffers before queueing new ones. */ 1168 - __free_old_xmit(sq, false, &stats); 1141 + __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), 1142 + false, &stats); 1169 1143 1170 1144 for (i = 0; i < n; i++) { 1171 1145 struct xdp_frame *xdpf = frames[i]; ··· 2341 2313 2342 2314 do { 2343 2315 virtqueue_disable_cb(sq->vq); 2344 - free_old_xmit(sq, true); 2316 + free_old_xmit(sq, txq, true); 2345 2317 } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 2346 2318 2347 2319 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { ··· 2440 2412 goto err_xdp_reg_mem_model; 2441 2413 2442 2414 virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi); 2415 + netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index)); 2443 2416 virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi); 2444 2417 2445 2418 return 0; ··· 2500 2471 txq = netdev_get_tx_queue(vi->dev, index); 2501 2472 __netif_tx_lock(txq, raw_smp_processor_id()); 2502 2473 virtqueue_disable_cb(sq->vq); 2503 - free_old_xmit(sq, true); 2474 + free_old_xmit(sq, txq, true); 2504 2475 2505 2476 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { 2506 2477 if (netif_tx_queue_stopped(txq)) { ··· 2534 2505 return 0; 2535 2506 } 2536 2507 2537 - static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) 2508 + static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) 2538 2509 { 2539 2510 struct virtio_net_hdr_mrg_rxbuf *hdr; 2540 2511 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; ··· 2578 2549 return num_sg; 2579 2550 num_sg++; 2580 2551 } 2581 - return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC); 2552 + return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, 2553 + skb_to_ptr(skb, orphan), GFP_ATOMIC); 2582 2554 } 2583 2555 2584 2556 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) ··· 2589 2559 struct send_queue *sq = &vi->sq[qnum]; 2590 2560 int err; 2591 2561 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 2592 - bool kick = !netdev_xmit_more(); 2562 + bool xmit_more = netdev_xmit_more(); 2593 2563 bool use_napi = sq->napi.weight; 2564 + bool kick; 2594 2565 2595 2566 /* Free up any pending old buffers before queueing new ones. */ 2596 2567 do { 2597 2568 if (use_napi) 2598 2569 virtqueue_disable_cb(sq->vq); 2599 2570 2600 - free_old_xmit(sq, false); 2571 + free_old_xmit(sq, txq, false); 2601 2572 2602 - } while (use_napi && kick && 2573 + } while (use_napi && !xmit_more && 2603 2574 unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 2604 2575 2605 2576 /* timestamp packet in software */ 2606 2577 skb_tx_timestamp(skb); 2607 2578 2608 2579 /* Try to transmit */ 2609 - err = xmit_skb(sq, skb); 2580 + err = xmit_skb(sq, skb, !use_napi); 2610 2581 2611 2582 /* This should not happen! */ 2612 2583 if (unlikely(err)) { ··· 2629 2598 2630 2599 check_sq_full_and_disable(vi, dev, sq); 2631 2600 2632 - if (kick || netif_xmit_stopped(txq)) { 2601 + kick = use_napi ? __netdev_tx_sent_queue(txq, skb->len, xmit_more) : 2602 + !xmit_more || netif_xmit_stopped(txq); 2603 + if (kick) { 2633 2604 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 2634 2605 u64_stats_update_begin(&sq->stats.syncp); 2635 2606 u64_stats_inc(&sq->stats.kicks);