Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'virtio-net-xdp-multi-buffer'

Heng Qi says:

====================
virtio-net: support multi buffer xdp

Changes since PATCH v4:
- Make netdev_warn() in [PATCH 2/10] independent from [PATCH 3/10].

Changes since PATCH v3:
- Separate fix patch [2/10] for MTU calculation of single buffer xdp.
Note that this patch needs to be backported to the stable branch.

Changes since PATCH v2:
- Even if single buffer xdp has a hole mechanism, there will be no
problem (limiting mtu and turning off GUEST GSO), so there is no
need to backport "[PATCH 1/9]";
- Modify calculation of MTU for single buffer xdp in virtnet_xdp_set();
- Make truesize in mergeable mode return to literal meaning;
- Add some comments for legibility;

Changes since RFC:
- Using headroom instead of vi->xdp_enabled to avoid re-reading
in add_recvbuf_mergeable();
- Disable GRO_HW and keep linearization for single buffer xdp;
- Renamed to virtnet_build_xdp_buff_mrg();
- pr_debug() to netdev_dbg();
- Adjusted the order of the patch series.

Currently, virtio net only supports xdp for single-buffer packets
or linearized multi-buffer packets. This patchset supports xdp for
multi-buffer packets, then larger MTU can be used if xdp sets the
xdp.frags. This does not affect single buffer handling.

In order to build multi-buffer xdp neatly, we integrated the code
into virtnet_build_xdp_buff_mrg() for xdp. The first buffer is used
for prepared xdp buff, and the rest of the buffers are added to
its skb_shared_info structure. This structure can also be
conveniently converted during XDP_PASS to get the corresponding skb.

Since virtio net uses comp pages, and bpf_xdp_frags_increase_tail()
is based on the assumption of the page pool,
(rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag))
is negative in most cases. So we didn't set xdp_rxq->frag_size in
virtnet_open() to disable the tail increase.

====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+249 -120
+249 -120
drivers/net/virtio_net.c
··· 446 446 static struct sk_buff *page_to_skb(struct virtnet_info *vi, 447 447 struct receive_queue *rq, 448 448 struct page *page, unsigned int offset, 449 - unsigned int len, unsigned int truesize, 450 - bool hdr_valid, unsigned int metasize, 451 - unsigned int headroom) 449 + unsigned int len, unsigned int truesize) 452 450 { 453 451 struct sk_buff *skb; 454 452 struct virtio_net_hdr_mrg_rxbuf *hdr; ··· 464 466 else 465 467 hdr_padded_len = sizeof(struct padded_vnet_hdr); 466 468 467 - /* If headroom is not 0, there is an offset between the beginning of the 468 - * data and the allocated space, otherwise the data and the allocated 469 - * space are aligned. 470 - * 471 - * Buffers with headroom use PAGE_SIZE as alloc size, see 472 - * add_recvbuf_mergeable() + get_mergeable_buf_len() 473 - */ 474 - truesize = headroom ? PAGE_SIZE : truesize; 475 - tailroom = truesize - headroom; 476 - buf = p - headroom; 477 - 469 + buf = p; 478 470 len -= hdr_len; 479 471 offset += hdr_padded_len; 480 472 p += hdr_padded_len; 481 - tailroom -= hdr_padded_len + len; 473 + tailroom = truesize - hdr_padded_len - len; 482 474 483 475 shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 484 476 ··· 498 510 if (len <= skb_tailroom(skb)) 499 511 copy = len; 500 512 else 501 - copy = ETH_HLEN + metasize; 513 + copy = ETH_HLEN; 502 514 skb_put_data(skb, p, copy); 503 515 504 516 len -= copy; ··· 537 549 give_pages(rq, page); 538 550 539 551 ok: 540 - /* hdr_valid means no XDP, so we can copy the vnet header */ 541 - if (hdr_valid) { 542 - hdr = skb_vnet_hdr(skb); 543 - memcpy(hdr, hdr_p, hdr_len); 544 - } 552 + hdr = skb_vnet_hdr(skb); 553 + memcpy(hdr, hdr_p, hdr_len); 545 554 if (page_to_free) 546 555 put_page(page_to_free); 547 - 548 - if (metasize) { 549 - __skb_pull(skb, metasize); 550 - skb_metadata_set(skb, metasize); 551 - } 552 556 553 557 return skb; 554 558 } ··· 550 570 struct xdp_frame *xdpf) 551 571 { 552 572 struct virtio_net_hdr_mrg_rxbuf *hdr; 553 - int err; 573 + struct skb_shared_info *shinfo; 574 + u8 nr_frags = 0; 575 + int err, i; 554 576 555 577 if (unlikely(xdpf->headroom < vi->hdr_len)) 556 578 return -EOVERFLOW; 557 579 558 - /* Make room for virtqueue hdr (also change xdpf->headroom?) */ 580 + if (unlikely(xdp_frame_has_frags(xdpf))) { 581 + shinfo = xdp_get_shared_info_from_frame(xdpf); 582 + nr_frags = shinfo->nr_frags; 583 + } 584 + 585 + /* In wrapping function virtnet_xdp_xmit(), we need to free 586 + * up the pending old buffers, where we need to calculate the 587 + * position of skb_shared_info in xdp_get_frame_len() and 588 + * xdp_return_frame(), which will involve to xdpf->data and 589 + * xdpf->headroom. Therefore, we need to update the value of 590 + * headroom synchronously here. 591 + */ 592 + xdpf->headroom -= vi->hdr_len; 559 593 xdpf->data -= vi->hdr_len; 560 594 /* Zero header and leave csum up to XDP layers */ 561 595 hdr = xdpf->data; 562 596 memset(hdr, 0, vi->hdr_len); 563 597 xdpf->len += vi->hdr_len; 564 598 565 - sg_init_one(sq->sg, xdpf->data, xdpf->len); 599 + sg_init_table(sq->sg, nr_frags + 1); 600 + sg_set_buf(sq->sg, xdpf->data, xdpf->len); 601 + for (i = 0; i < nr_frags; i++) { 602 + skb_frag_t *frag = &shinfo->frags[i]; 566 603 567 - err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf), 568 - GFP_ATOMIC); 604 + sg_set_page(&sq->sg[i + 1], skb_frag_page(frag), 605 + skb_frag_size(frag), skb_frag_off(frag)); 606 + } 607 + 608 + err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1, 609 + xdp_to_ptr(xdpf), GFP_ATOMIC); 569 610 if (unlikely(err)) 570 611 return -ENOSPC; /* Caller handle free/refcnt */ 571 612 ··· 666 665 if (likely(is_xdp_frame(ptr))) { 667 666 struct xdp_frame *frame = ptr_to_xdp(ptr); 668 667 669 - bytes += frame->len; 668 + bytes += xdp_get_frame_len(frame); 670 669 xdp_return_frame(frame); 671 670 } else { 672 671 struct sk_buff *skb = ptr; ··· 925 924 { 926 925 struct page *page = buf; 927 926 struct sk_buff *skb = 928 - page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0, 0); 927 + page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); 929 928 930 929 stats->bytes += len - vi->hdr_len; 931 930 if (unlikely(!skb)) ··· 937 936 stats->drops++; 938 937 give_pages(rq, page); 939 938 return NULL; 939 + } 940 + 941 + /* Why not use xdp_build_skb_from_frame() ? 942 + * XDP core assumes that xdp frags are PAGE_SIZE in length, while in 943 + * virtio-net there are 2 points that do not match its requirements: 944 + * 1. The size of the prefilled buffer is not fixed before xdp is set. 945 + * 2. xdp_build_skb_from_frame() does more checks that we don't need, 946 + * like eth_type_trans() (which virtio-net does in receive_buf()). 947 + */ 948 + static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, 949 + struct virtnet_info *vi, 950 + struct xdp_buff *xdp, 951 + unsigned int xdp_frags_truesz) 952 + { 953 + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 954 + unsigned int headroom, data_len; 955 + struct sk_buff *skb; 956 + int metasize; 957 + u8 nr_frags; 958 + 959 + if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { 960 + pr_debug("Error building skb as missing reserved tailroom for xdp"); 961 + return NULL; 962 + } 963 + 964 + if (unlikely(xdp_buff_has_frags(xdp))) 965 + nr_frags = sinfo->nr_frags; 966 + 967 + skb = build_skb(xdp->data_hard_start, xdp->frame_sz); 968 + if (unlikely(!skb)) 969 + return NULL; 970 + 971 + headroom = xdp->data - xdp->data_hard_start; 972 + data_len = xdp->data_end - xdp->data; 973 + skb_reserve(skb, headroom); 974 + __skb_put(skb, data_len); 975 + 976 + metasize = xdp->data - xdp->data_meta; 977 + metasize = metasize > 0 ? metasize : 0; 978 + if (metasize) 979 + skb_metadata_set(skb, metasize); 980 + 981 + if (unlikely(xdp_buff_has_frags(xdp))) 982 + xdp_update_skb_shared_info(skb, nr_frags, 983 + sinfo->xdp_frags_size, 984 + xdp_frags_truesz, 985 + xdp_buff_is_frag_pfmemalloc(xdp)); 986 + 987 + return skb; 988 + } 989 + 990 + /* TODO: build xdp in big mode */ 991 + static int virtnet_build_xdp_buff_mrg(struct net_device *dev, 992 + struct virtnet_info *vi, 993 + struct receive_queue *rq, 994 + struct xdp_buff *xdp, 995 + void *buf, 996 + unsigned int len, 997 + unsigned int frame_sz, 998 + u16 *num_buf, 999 + unsigned int *xdp_frags_truesize, 1000 + struct virtnet_rq_stats *stats) 1001 + { 1002 + struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 1003 + unsigned int headroom, tailroom, room; 1004 + unsigned int truesize, cur_frag_size; 1005 + struct skb_shared_info *shinfo; 1006 + unsigned int xdp_frags_truesz = 0; 1007 + struct page *page; 1008 + skb_frag_t *frag; 1009 + int offset; 1010 + void *ctx; 1011 + 1012 + xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 1013 + xdp_prepare_buff(xdp, buf - VIRTIO_XDP_HEADROOM, 1014 + VIRTIO_XDP_HEADROOM + vi->hdr_len, len - vi->hdr_len, true); 1015 + 1016 + if (*num_buf > 1) { 1017 + /* If we want to build multi-buffer xdp, we need 1018 + * to specify that the flags of xdp_buff have the 1019 + * XDP_FLAGS_HAS_FRAG bit. 1020 + */ 1021 + if (!xdp_buff_has_frags(xdp)) 1022 + xdp_buff_set_frags_flag(xdp); 1023 + 1024 + shinfo = xdp_get_shared_info_from_buff(xdp); 1025 + shinfo->nr_frags = 0; 1026 + shinfo->xdp_frags_size = 0; 1027 + } 1028 + 1029 + if ((*num_buf - 1) > MAX_SKB_FRAGS) 1030 + return -EINVAL; 1031 + 1032 + while ((--*num_buf) >= 1) { 1033 + buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx); 1034 + if (unlikely(!buf)) { 1035 + pr_debug("%s: rx error: %d buffers out of %d missing\n", 1036 + dev->name, *num_buf, 1037 + virtio16_to_cpu(vi->vdev, hdr->num_buffers)); 1038 + dev->stats.rx_length_errors++; 1039 + return -EINVAL; 1040 + } 1041 + 1042 + stats->bytes += len; 1043 + page = virt_to_head_page(buf); 1044 + offset = buf - page_address(page); 1045 + 1046 + truesize = mergeable_ctx_to_truesize(ctx); 1047 + headroom = mergeable_ctx_to_headroom(ctx); 1048 + tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 1049 + room = SKB_DATA_ALIGN(headroom + tailroom); 1050 + 1051 + cur_frag_size = truesize; 1052 + xdp_frags_truesz += cur_frag_size; 1053 + if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) { 1054 + put_page(page); 1055 + pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 1056 + dev->name, len, (unsigned long)(truesize - room)); 1057 + dev->stats.rx_length_errors++; 1058 + return -EINVAL; 1059 + } 1060 + 1061 + frag = &shinfo->frags[shinfo->nr_frags++]; 1062 + __skb_frag_set_page(frag, page); 1063 + skb_frag_off_set(frag, offset); 1064 + skb_frag_size_set(frag, len); 1065 + if (page_is_pfmemalloc(page)) 1066 + xdp_buff_set_frag_pfmemalloc(xdp); 1067 + 1068 + shinfo->xdp_frags_size += len; 1069 + } 1070 + 1071 + *xdp_frags_truesize = xdp_frags_truesz; 1072 + return 0; 940 1073 } 941 1074 942 1075 static struct sk_buff *receive_mergeable(struct net_device *dev, ··· 1090 955 struct bpf_prog *xdp_prog; 1091 956 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 1092 957 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 1093 - unsigned int metasize = 0; 1094 - unsigned int frame_sz; 958 + unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 959 + unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 960 + unsigned int frame_sz, xdp_room; 1095 961 int err; 1096 962 1097 963 head_skb = NULL; 1098 964 stats->bytes += len - vi->hdr_len; 1099 965 1100 - if (unlikely(len > truesize)) { 966 + if (unlikely(len > truesize - room)) { 1101 967 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 1102 - dev->name, len, (unsigned long)ctx); 968 + dev->name, len, (unsigned long)(truesize - room)); 1103 969 dev->stats.rx_length_errors++; 1104 970 goto err_skb; 1105 971 } ··· 1113 977 rcu_read_lock(); 1114 978 xdp_prog = rcu_dereference(rq->xdp_prog); 1115 979 if (xdp_prog) { 980 + unsigned int xdp_frags_truesz = 0; 981 + struct skb_shared_info *shinfo; 1116 982 struct xdp_frame *xdpf; 1117 983 struct page *xdp_page; 1118 984 struct xdp_buff xdp; 1119 985 void *data; 1120 986 u32 act; 987 + int i; 1121 988 1122 989 /* Transient failure which in theory could occur if 1123 990 * in-flight packets from before XDP was enabled reach ··· 1129 990 if (unlikely(hdr->hdr.gso_type)) 1130 991 goto err_xdp; 1131 992 1132 - /* Buffers with headroom use PAGE_SIZE as alloc size, 1133 - * see add_recvbuf_mergeable() + get_mergeable_buf_len() 993 + /* Now XDP core assumes frag size is PAGE_SIZE, but buffers 994 + * with headroom may add hole in truesize, which 995 + * make their length exceed PAGE_SIZE. So we disabled the 996 + * hole mechanism for xdp. See add_recvbuf_mergeable(). 1134 997 */ 1135 - frame_sz = headroom ? PAGE_SIZE : truesize; 998 + frame_sz = truesize; 1136 999 1137 - /* This happens when rx buffer size is underestimated 1138 - * or headroom is not enough because of the buffer 1139 - * was refilled before XDP is set. This should only 1140 - * happen for the first several packets, so we don't 1141 - * care much about its performance. 1000 + /* This happens when headroom is not enough because 1001 + * of the buffer was prefilled before XDP is set. 1002 + * This should only happen for the first several packets. 1003 + * In fact, vq reset can be used here to help us clean up 1004 + * the prefilled buffers, but many existing devices do not 1005 + * support it, and we don't want to bother users who are 1006 + * using xdp normally. 1142 1007 */ 1143 - if (unlikely(num_buf > 1 || 1144 - headroom < virtnet_get_headroom(vi))) { 1008 + if (!xdp_prog->aux->xdp_has_frags && 1009 + (num_buf > 1 || headroom < virtnet_get_headroom(vi))) { 1145 1010 /* linearize data for XDP */ 1146 1011 xdp_page = xdp_linearize_page(rq, &num_buf, 1147 1012 page, offset, ··· 1156 1013 if (!xdp_page) 1157 1014 goto err_xdp; 1158 1015 offset = VIRTIO_XDP_HEADROOM; 1016 + } else if (unlikely(headroom < virtnet_get_headroom(vi))) { 1017 + xdp_room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM + 1018 + sizeof(struct skb_shared_info)); 1019 + if (len + xdp_room > PAGE_SIZE) 1020 + goto err_xdp; 1021 + 1022 + xdp_page = alloc_page(GFP_ATOMIC); 1023 + if (!xdp_page) 1024 + goto err_xdp; 1025 + 1026 + memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM, 1027 + page_address(page) + offset, len); 1028 + frame_sz = PAGE_SIZE; 1029 + offset = VIRTIO_XDP_HEADROOM; 1159 1030 } else { 1160 1031 xdp_page = page; 1161 1032 } 1162 1033 1163 - /* Allow consuming headroom but reserve enough space to push 1164 - * the descriptor on if we get an XDP_TX return code. 1165 - */ 1166 1034 data = page_address(xdp_page) + offset; 1167 - xdp_init_buff(&xdp, frame_sz - vi->hdr_len, &rq->xdp_rxq); 1168 - xdp_prepare_buff(&xdp, data - VIRTIO_XDP_HEADROOM + vi->hdr_len, 1169 - VIRTIO_XDP_HEADROOM, len - vi->hdr_len, true); 1035 + err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz, 1036 + &num_buf, &xdp_frags_truesz, stats); 1037 + if (unlikely(err)) 1038 + goto err_xdp_frags; 1170 1039 1171 1040 act = bpf_prog_run_xdp(xdp_prog, &xdp); 1172 1041 stats->xdp_packets++; 1173 1042 1174 1043 switch (act) { 1175 1044 case XDP_PASS: 1176 - metasize = xdp.data - xdp.data_meta; 1177 - 1178 - /* recalculate offset to account for any header 1179 - * adjustments and minus the metasize to copy the 1180 - * metadata in page_to_skb(). Note other cases do not 1181 - * build an skb and avoid using offset 1182 - */ 1183 - offset = xdp.data - page_address(xdp_page) - 1184 - vi->hdr_len - metasize; 1185 - 1186 - /* recalculate len if xdp.data, xdp.data_end or 1187 - * xdp.data_meta were adjusted 1188 - */ 1189 - len = xdp.data_end - xdp.data + vi->hdr_len + metasize; 1190 - 1191 - /* recalculate headroom if xdp.data or xdp_data_meta 1192 - * were adjusted, note that offset should always point 1193 - * to the start of the reserved bytes for virtio_net 1194 - * header which are followed by xdp.data, that means 1195 - * that offset is equal to the headroom (when buf is 1196 - * starting at the beginning of the page, otherwise 1197 - * there is a base offset inside the page) but it's used 1198 - * with a different starting point (buf start) than 1199 - * xdp.data (buf start + vnet hdr size). If xdp.data or 1200 - * data_meta were adjusted by the xdp prog then the 1201 - * headroom size has changed and so has the offset, we 1202 - * can use data_hard_start, which points at buf start + 1203 - * vnet hdr size, to calculate the new headroom and use 1204 - * it later to compute buf start in page_to_skb() 1205 - */ 1206 - headroom = xdp.data - xdp.data_hard_start - metasize; 1207 - 1208 - /* We can only create skb based on xdp_page. */ 1209 - if (unlikely(xdp_page != page)) { 1210 - rcu_read_unlock(); 1045 + if (unlikely(xdp_page != page)) 1211 1046 put_page(page); 1212 - head_skb = page_to_skb(vi, rq, xdp_page, offset, 1213 - len, PAGE_SIZE, false, 1214 - metasize, 1215 - headroom); 1216 - return head_skb; 1217 - } 1218 - break; 1047 + head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); 1048 + rcu_read_unlock(); 1049 + return head_skb; 1219 1050 case XDP_TX: 1220 1051 stats->xdp_tx++; 1221 1052 xdpf = xdp_convert_buff_to_frame(&xdp); 1222 1053 if (unlikely(!xdpf)) { 1223 - if (unlikely(xdp_page != page)) 1224 - put_page(xdp_page); 1225 - goto err_xdp; 1054 + netdev_dbg(dev, "convert buff to frame failed for xdp\n"); 1055 + goto err_xdp_frags; 1226 1056 } 1227 1057 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); 1228 1058 if (unlikely(!err)) { 1229 1059 xdp_return_frame_rx_napi(xdpf); 1230 1060 } else if (unlikely(err < 0)) { 1231 1061 trace_xdp_exception(vi->dev, xdp_prog, act); 1232 - if (unlikely(xdp_page != page)) 1233 - put_page(xdp_page); 1234 - goto err_xdp; 1062 + goto err_xdp_frags; 1235 1063 } 1236 1064 *xdp_xmit |= VIRTIO_XDP_TX; 1237 1065 if (unlikely(xdp_page != page)) ··· 1212 1098 case XDP_REDIRECT: 1213 1099 stats->xdp_redirects++; 1214 1100 err = xdp_do_redirect(dev, &xdp, xdp_prog); 1215 - if (err) { 1216 - if (unlikely(xdp_page != page)) 1217 - put_page(xdp_page); 1218 - goto err_xdp; 1219 - } 1101 + if (err) 1102 + goto err_xdp_frags; 1220 1103 *xdp_xmit |= VIRTIO_XDP_REDIR; 1221 1104 if (unlikely(xdp_page != page)) 1222 1105 put_page(page); ··· 1226 1115 trace_xdp_exception(vi->dev, xdp_prog, act); 1227 1116 fallthrough; 1228 1117 case XDP_DROP: 1229 - if (unlikely(xdp_page != page)) 1230 - __free_pages(xdp_page, 0); 1231 - goto err_xdp; 1118 + goto err_xdp_frags; 1232 1119 } 1120 + err_xdp_frags: 1121 + if (unlikely(xdp_page != page)) 1122 + __free_pages(xdp_page, 0); 1123 + 1124 + if (xdp_buff_has_frags(&xdp)) { 1125 + shinfo = xdp_get_shared_info_from_buff(&xdp); 1126 + for (i = 0; i < shinfo->nr_frags; i++) { 1127 + xdp_page = skb_frag_page(&shinfo->frags[i]); 1128 + put_page(xdp_page); 1129 + } 1130 + } 1131 + 1132 + goto err_xdp; 1233 1133 } 1234 1134 rcu_read_unlock(); 1235 1135 1236 1136 skip_xdp: 1237 - head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog, 1238 - metasize, headroom); 1137 + head_skb = page_to_skb(vi, rq, page, offset, len, truesize); 1239 1138 curr_skb = head_skb; 1240 1139 1241 1140 if (unlikely(!curr_skb)) ··· 1267 1146 page = virt_to_head_page(buf); 1268 1147 1269 1148 truesize = mergeable_ctx_to_truesize(ctx); 1270 - if (unlikely(len > truesize)) { 1149 + headroom = mergeable_ctx_to_headroom(ctx); 1150 + tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 1151 + room = SKB_DATA_ALIGN(headroom + tailroom); 1152 + if (unlikely(len > truesize - room)) { 1271 1153 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 1272 - dev->name, len, (unsigned long)ctx); 1154 + dev->name, len, (unsigned long)(truesize - room)); 1273 1155 dev->stats.rx_length_errors++; 1274 1156 goto err_skb; 1275 1157 } ··· 1550 1426 /* To avoid internal fragmentation, if there is very likely not 1551 1427 * enough space for another buffer, add the remaining space to 1552 1428 * the current buffer. 1429 + * XDP core assumes that frame_size of xdp_buff and the length 1430 + * of the frag are PAGE_SIZE, so we disable the hole mechanism. 1553 1431 */ 1554 - len += hole; 1432 + if (!headroom) 1433 + len += hole; 1555 1434 alloc_frag->offset += hole; 1556 1435 } 1557 1436 1558 1437 sg_init_one(rq->sg, buf, len); 1559 - ctx = mergeable_len_to_ctx(len, headroom); 1438 + ctx = mergeable_len_to_ctx(len + room, headroom); 1560 1439 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); 1561 1440 if (err < 0) 1562 1441 put_page(virt_to_head_page(buf)); ··· 1735 1608 } else { 1736 1609 struct xdp_frame *frame = ptr_to_xdp(ptr); 1737 1610 1738 - bytes += frame->len; 1611 + bytes += xdp_get_frame_len(frame); 1739 1612 xdp_return_frame(frame); 1740 1613 } 1741 1614 packets++; ··· 3205 3078 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, 3206 3079 struct netlink_ext_ack *extack) 3207 3080 { 3208 - unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr); 3081 + unsigned int room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM + 3082 + sizeof(struct skb_shared_info)); 3083 + unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; 3209 3084 struct virtnet_info *vi = netdev_priv(dev); 3210 3085 struct bpf_prog *old_prog; 3211 3086 u16 xdp_qp = 0, curr_qp; ··· 3230 3101 return -EINVAL; 3231 3102 } 3232 3103 3233 - if (dev->mtu > max_sz) { 3234 - NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP"); 3235 - netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz); 3104 + if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { 3105 + NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags"); 3106 + netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz); 3236 3107 return -EINVAL; 3237 3108 } 3238 3109