Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

vhost: rewind next_avail_head while discarding descriptors

When discarding descriptors with IN_ORDER, we should rewind
next_avail_head otherwise it would run out of sync with
last_avail_idx. This would cause driver to report
"id X is not a head".

Fixing this by returning the number of descriptors that is used for
each buffer via vhost_get_vq_desc_n() so caller can use the value
while discarding descriptors.

Fixes: 67a873df0c41 ("vhost: basic in order support")
Cc: stable@vger.kernel.org
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Link: https://patch.msgid.link/20251120022950.10117-1-jasowang@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Jason Wang and committed by
Jakub Kicinski
779bcdd4 0ebc27a4

+103 -36
+32 -21
drivers/vhost/net.c
··· 592 592 static int vhost_net_tx_get_vq_desc(struct vhost_net *net, 593 593 struct vhost_net_virtqueue *tnvq, 594 594 unsigned int *out_num, unsigned int *in_num, 595 - struct msghdr *msghdr, bool *busyloop_intr) 595 + struct msghdr *msghdr, bool *busyloop_intr, 596 + unsigned int *ndesc) 596 597 { 597 598 struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX]; 598 599 struct vhost_virtqueue *rvq = &rnvq->vq; 599 600 struct vhost_virtqueue *tvq = &tnvq->vq; 600 601 601 - int r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov), 602 - out_num, in_num, NULL, NULL); 602 + int r = vhost_get_vq_desc_n(tvq, tvq->iov, ARRAY_SIZE(tvq->iov), 603 + out_num, in_num, NULL, NULL, ndesc); 603 604 604 605 if (r == tvq->num && tvq->busyloop_timeout) { 605 606 /* Flush batched packets first */ ··· 611 610 612 611 vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, false); 613 612 614 - r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov), 615 - out_num, in_num, NULL, NULL); 613 + r = vhost_get_vq_desc_n(tvq, tvq->iov, ARRAY_SIZE(tvq->iov), 614 + out_num, in_num, NULL, NULL, ndesc); 616 615 } 617 616 618 617 return r; ··· 643 642 struct vhost_net_virtqueue *nvq, 644 643 struct msghdr *msg, 645 644 unsigned int *out, unsigned int *in, 646 - size_t *len, bool *busyloop_intr) 645 + size_t *len, bool *busyloop_intr, 646 + unsigned int *ndesc) 647 647 { 648 648 struct vhost_virtqueue *vq = &nvq->vq; 649 649 int ret; 650 650 651 - ret = vhost_net_tx_get_vq_desc(net, nvq, out, in, msg, busyloop_intr); 651 + ret = vhost_net_tx_get_vq_desc(net, nvq, out, in, msg, 652 + busyloop_intr, ndesc); 652 653 653 654 if (ret < 0 || ret == vq->num) 654 655 return ret; ··· 769 766 int sent_pkts = 0; 770 767 bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX); 771 768 bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER); 769 + unsigned int ndesc = 0; 772 770 773 771 do { 774 772 bool busyloop_intr = false; ··· 778 774 vhost_tx_batch(net, nvq, sock, &msg); 779 775 780 776 head = get_tx_bufs(net, nvq, &msg, &out, &in, &len, 781 - &busyloop_intr); 777 + &busyloop_intr, &ndesc); 782 778 /* On error, stop handling until the next kick. */ 783 779 if (unlikely(head < 0)) 784 780 break; ··· 810 806 goto done; 811 807 } else if (unlikely(err != -ENOSPC)) { 812 808 vhost_tx_batch(net, nvq, sock, &msg); 813 - vhost_discard_vq_desc(vq, 1); 809 + vhost_discard_vq_desc(vq, 1, ndesc); 814 810 vhost_net_enable_vq(net, vq); 815 811 break; 816 812 } ··· 833 829 err = sock->ops->sendmsg(sock, &msg, len); 834 830 if (unlikely(err < 0)) { 835 831 if (err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS) { 836 - vhost_discard_vq_desc(vq, 1); 832 + vhost_discard_vq_desc(vq, 1, ndesc); 837 833 vhost_net_enable_vq(net, vq); 838 834 break; 839 835 } ··· 872 868 int err; 873 869 struct vhost_net_ubuf_ref *ubufs; 874 870 struct ubuf_info_msgzc *ubuf; 871 + unsigned int ndesc = 0; 875 872 bool zcopy_used; 876 873 int sent_pkts = 0; 877 874 ··· 884 879 885 880 busyloop_intr = false; 886 881 head = get_tx_bufs(net, nvq, &msg, &out, &in, &len, 887 - &busyloop_intr); 882 + &busyloop_intr, &ndesc); 888 883 /* On error, stop handling until the next kick. */ 889 884 if (unlikely(head < 0)) 890 885 break; ··· 946 941 vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN; 947 942 } 948 943 if (retry) { 949 - vhost_discard_vq_desc(vq, 1); 944 + vhost_discard_vq_desc(vq, 1, ndesc); 950 945 vhost_net_enable_vq(net, vq); 951 946 break; 952 947 } ··· 1050 1045 unsigned *iovcount, 1051 1046 struct vhost_log *log, 1052 1047 unsigned *log_num, 1053 - unsigned int quota) 1048 + unsigned int quota, 1049 + unsigned int *ndesc) 1054 1050 { 1055 1051 struct vhost_virtqueue *vq = &nvq->vq; 1056 1052 bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER); 1057 - unsigned int out, in; 1053 + unsigned int out, in, desc_num, n = 0; 1058 1054 int seg = 0; 1059 1055 int headcount = 0; 1060 1056 unsigned d; ··· 1070 1064 r = -ENOBUFS; 1071 1065 goto err; 1072 1066 } 1073 - r = vhost_get_vq_desc(vq, vq->iov + seg, 1074 - ARRAY_SIZE(vq->iov) - seg, &out, 1075 - &in, log, log_num); 1067 + r = vhost_get_vq_desc_n(vq, vq->iov + seg, 1068 + ARRAY_SIZE(vq->iov) - seg, &out, 1069 + &in, log, log_num, &desc_num); 1076 1070 if (unlikely(r < 0)) 1077 1071 goto err; 1078 1072 ··· 1099 1093 ++headcount; 1100 1094 datalen -= len; 1101 1095 seg += in; 1096 + n += desc_num; 1102 1097 } 1103 1098 1104 1099 *iovcount = seg; ··· 1120 1113 nheads[0] = headcount; 1121 1114 } 1122 1115 1116 + *ndesc = n; 1117 + 1123 1118 return headcount; 1124 1119 err: 1125 - vhost_discard_vq_desc(vq, headcount); 1120 + vhost_discard_vq_desc(vq, headcount, n); 1126 1121 return r; 1127 1122 } 1128 1123 ··· 1160 1151 struct iov_iter fixup; 1161 1152 __virtio16 num_buffers; 1162 1153 int recv_pkts = 0; 1154 + unsigned int ndesc; 1163 1155 1164 1156 mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_RX); 1165 1157 sock = vhost_vq_get_backend(vq); ··· 1192 1182 headcount = get_rx_bufs(nvq, vq->heads + count, 1193 1183 vq->nheads + count, 1194 1184 vhost_len, &in, vq_log, &log, 1195 - likely(mergeable) ? UIO_MAXIOV : 1); 1185 + likely(mergeable) ? UIO_MAXIOV : 1, 1186 + &ndesc); 1196 1187 /* On error, stop handling until the next kick. */ 1197 1188 if (unlikely(headcount < 0)) 1198 1189 goto out; ··· 1239 1228 if (unlikely(err != sock_len)) { 1240 1229 pr_debug("Discarded rx packet: " 1241 1230 " len %d, expected %zd\n", err, sock_len); 1242 - vhost_discard_vq_desc(vq, headcount); 1231 + vhost_discard_vq_desc(vq, headcount, ndesc); 1243 1232 continue; 1244 1233 } 1245 1234 /* Supply virtio_net_hdr if VHOST_NET_F_VIRTIO_NET_HDR */ ··· 1263 1252 copy_to_iter(&num_buffers, sizeof num_buffers, 1264 1253 &fixup) != sizeof num_buffers) { 1265 1254 vq_err(vq, "Failed num_buffers write"); 1266 - vhost_discard_vq_desc(vq, headcount); 1255 + vhost_discard_vq_desc(vq, headcount, ndesc); 1267 1256 goto out; 1268 1257 } 1269 1258 nvq->done_idx += headcount;
+62 -14
drivers/vhost/vhost.c
··· 2792 2792 return 0; 2793 2793 } 2794 2794 2795 - /* This looks in the virtqueue and for the first available buffer, and converts 2796 - * it to an iovec for convenient access. Since descriptors consist of some 2797 - * number of output then some number of input descriptors, it's actually two 2798 - * iovecs, but we pack them into one and note how many of each there were. 2795 + /** 2796 + * vhost_get_vq_desc_n - Fetch the next available descriptor chain and build iovecs 2797 + * @vq: target virtqueue 2798 + * @iov: array that receives the scatter/gather segments 2799 + * @iov_size: capacity of @iov in elements 2800 + * @out_num: the number of output segments 2801 + * @in_num: the number of input segments 2802 + * @log: optional array to record addr/len for each writable segment; NULL if unused 2803 + * @log_num: optional output; number of entries written to @log when provided 2804 + * @ndesc: optional output; number of descriptors consumed from the available ring 2805 + * (useful for rollback via vhost_discard_vq_desc) 2799 2806 * 2800 - * This function returns the descriptor number found, or vq->num (which is 2801 - * never a valid descriptor number) if none was found. A negative code is 2802 - * returned on error. */ 2803 - int vhost_get_vq_desc(struct vhost_virtqueue *vq, 2804 - struct iovec iov[], unsigned int iov_size, 2805 - unsigned int *out_num, unsigned int *in_num, 2806 - struct vhost_log *log, unsigned int *log_num) 2807 + * Extracts one available descriptor chain from @vq and translates guest addresses 2808 + * into host iovecs. 2809 + * 2810 + * On success, advances @vq->last_avail_idx by 1 and @vq->next_avail_head by the 2811 + * number of descriptors consumed (also stored via @ndesc when non-NULL). 2812 + * 2813 + * Return: 2814 + * - head index in [0, @vq->num) on success; 2815 + * - @vq->num if no descriptor is currently available; 2816 + * - negative errno on failure 2817 + */ 2818 + int vhost_get_vq_desc_n(struct vhost_virtqueue *vq, 2819 + struct iovec iov[], unsigned int iov_size, 2820 + unsigned int *out_num, unsigned int *in_num, 2821 + struct vhost_log *log, unsigned int *log_num, 2822 + unsigned int *ndesc) 2807 2823 { 2808 2824 bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER); 2809 2825 struct vring_desc desc; ··· 2937 2921 vq->last_avail_idx++; 2938 2922 vq->next_avail_head += c; 2939 2923 2924 + if (ndesc) 2925 + *ndesc = c; 2926 + 2940 2927 /* Assume notifications from guest are disabled at this point, 2941 2928 * if they aren't we would need to update avail_event index. */ 2942 2929 BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY)); 2943 2930 return head; 2944 2931 } 2932 + EXPORT_SYMBOL_GPL(vhost_get_vq_desc_n); 2933 + 2934 + /* This looks in the virtqueue and for the first available buffer, and converts 2935 + * it to an iovec for convenient access. Since descriptors consist of some 2936 + * number of output then some number of input descriptors, it's actually two 2937 + * iovecs, but we pack them into one and note how many of each there were. 2938 + * 2939 + * This function returns the descriptor number found, or vq->num (which is 2940 + * never a valid descriptor number) if none was found. A negative code is 2941 + * returned on error. 2942 + */ 2943 + int vhost_get_vq_desc(struct vhost_virtqueue *vq, 2944 + struct iovec iov[], unsigned int iov_size, 2945 + unsigned int *out_num, unsigned int *in_num, 2946 + struct vhost_log *log, unsigned int *log_num) 2947 + { 2948 + return vhost_get_vq_desc_n(vq, iov, iov_size, out_num, in_num, 2949 + log, log_num, NULL); 2950 + } 2945 2951 EXPORT_SYMBOL_GPL(vhost_get_vq_desc); 2946 2952 2947 - /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ 2948 - void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n) 2953 + /** 2954 + * vhost_discard_vq_desc - Reverse the effect of vhost_get_vq_desc_n() 2955 + * @vq: target virtqueue 2956 + * @nbufs: number of buffers to roll back 2957 + * @ndesc: number of descriptors to roll back 2958 + * 2959 + * Rewinds the internal consumer cursors after a failed attempt to use buffers 2960 + * returned by vhost_get_vq_desc_n(). 2961 + */ 2962 + void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int nbufs, 2963 + unsigned int ndesc) 2949 2964 { 2950 - vq->last_avail_idx -= n; 2965 + vq->next_avail_head -= ndesc; 2966 + vq->last_avail_idx -= nbufs; 2951 2967 } 2952 2968 EXPORT_SYMBOL_GPL(vhost_discard_vq_desc); 2953 2969
+9 -1
drivers/vhost/vhost.h
··· 230 230 struct iovec iov[], unsigned int iov_size, 231 231 unsigned int *out_num, unsigned int *in_num, 232 232 struct vhost_log *log, unsigned int *log_num); 233 - void vhost_discard_vq_desc(struct vhost_virtqueue *, int n); 233 + 234 + int vhost_get_vq_desc_n(struct vhost_virtqueue *vq, 235 + struct iovec iov[], unsigned int iov_size, 236 + unsigned int *out_num, unsigned int *in_num, 237 + struct vhost_log *log, unsigned int *log_num, 238 + unsigned int *ndesc); 239 + 240 + void vhost_discard_vq_desc(struct vhost_virtqueue *, int nbuf, 241 + unsigned int ndesc); 234 242 235 243 bool vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work); 236 244 bool vhost_vq_has_work(struct vhost_virtqueue *vq);