virtio-fs: improved request latencies when Virtio queue is full

Currently, when the Virtio queue is full, a work item is scheduled
to execute in 1ms that retries adding the request to the queue.
This is a large amount of time on the scale on which a
virtio-fs device can operate. When using a DPU this is around
30-40us baseline without going to a remote server (4k, QD=1).

This patch changes the retrying behavior to immediately filling the
Virtio queue up again when a completion has been received.

This reduces the 99.9th percentile latencies in our tests by
60x and slightly increases the overall throughput, when using a
workload IO depth 2x the size of the Virtio queue and a
DPU-powered virtio-fs device (NVIDIA BlueField DPU).

Signed-off-by: Peter-Jan Gootzen <pgootzen@nvidia.com>
Reviewed-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Reviewed-by: Yoray Zack <yorayz@nvidia.com>
Message-Id: <20240517190435.152096-3-pgootzen@nvidia.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

authored by

Peter-Jan Gootzen and committed by

Michael S. Tsirkin 2 years ago 106e4df1 2106e1f4

+19 -15

1 changed file

expand all

fuse

virtio_fs.c

+19 -15

fs/fuse/virtio_fs.c

··· 51 51 struct work_struct done_work; 52 52 struct list_head queued_reqs; 53 53 struct list_head end_reqs; /* End these requests */ 54 - struct delayed_work dispatch_work; 54 + struct work_struct dispatch_work; 55 55 struct fuse_dev *fud; 56 56 bool connected; 57 57 long in_flight; ··· 233 233 } 234 234 235 235 flush_work(&fsvq->done_work); 236 - flush_delayed_work(&fsvq->dispatch_work); 236 + flush_work(&fsvq->dispatch_work); 237 237 } 238 238 239 239 static void virtio_fs_drain_all_queues_locked(struct virtio_fs *fs) ··· 408 408 dec_in_flight_req(fsvq); 409 409 } 410 410 } while (!virtqueue_enable_cb(vq)); 411 + 412 + if (!list_empty(&fsvq->queued_reqs)) 413 + schedule_work(&fsvq->dispatch_work); 414 + 411 415 spin_unlock(&fsvq->lock); 412 416 } 413 417 ··· 419 415 { 420 416 struct fuse_req *req; 421 417 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 422 - dispatch_work.work); 418 + dispatch_work); 423 419 int ret; 424 420 425 421 pr_debug("virtio-fs: worker %s called.\n", __func__); ··· 454 450 if (ret == -ENOSPC) { 455 451 spin_lock(&fsvq->lock); 456 452 list_add_tail(&req->list, &fsvq->queued_reqs); 457 - schedule_delayed_work(&fsvq->dispatch_work, 458 - msecs_to_jiffies(1)); 459 453 spin_unlock(&fsvq->lock); 460 454 return; 461 455 } ··· 500 498 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n", 501 499 ret); 502 500 list_add_tail(&forget->list, &fsvq->queued_reqs); 503 - schedule_delayed_work(&fsvq->dispatch_work, 504 - msecs_to_jiffies(1)); 505 501 if (!in_flight) 506 502 inc_in_flight_req(fsvq); 507 503 /* Queue is full */ ··· 531 531 { 532 532 struct virtio_fs_forget *forget; 533 533 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 534 - dispatch_work.work); 534 + dispatch_work); 535 535 pr_debug("virtio-fs: worker %s called.\n", __func__); 536 536 while (1) { 537 537 spin_lock(&fsvq->lock); ··· 709 709 virtio_fs_request_complete(req, fsvq); 710 710 } 711 711 } 712 + 713 + /* Try to push previously queued requests, as the queue might no longer be full */ 714 + spin_lock(&fsvq->lock); 715 + if (!list_empty(&fsvq->queued_reqs)) 716 + schedule_work(&fsvq->dispatch_work); 717 + spin_unlock(&fsvq->lock); 712 718 } 713 719 714 720 static void virtio_fs_map_queues(struct virtio_device *vdev, struct virtio_fs *fs) ··· 776 770 777 771 if (vq_type == VQ_REQUEST) { 778 772 INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work); 779 - INIT_DELAYED_WORK(&fsvq->dispatch_work, 780 - virtio_fs_request_dispatch_work); 773 + INIT_WORK(&fsvq->dispatch_work, 774 + virtio_fs_request_dispatch_work); 781 775 } else { 782 776 INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work); 783 - INIT_DELAYED_WORK(&fsvq->dispatch_work, 784 - virtio_fs_hiprio_dispatch_work); 777 + INIT_WORK(&fsvq->dispatch_work, 778 + virtio_fs_hiprio_dispatch_work); 785 779 } 786 780 } 787 781 ··· 1381 1375 spin_lock(&fsvq->lock); 1382 1376 list_add_tail(&req->list, &fsvq->queued_reqs); 1383 1377 inc_in_flight_req(fsvq); 1384 - schedule_delayed_work(&fsvq->dispatch_work, 1385 - msecs_to_jiffies(1)); 1386 1378 spin_unlock(&fsvq->lock); 1387 1379 return; 1388 1380 } ··· 1390 1386 /* Can't end request in submission context. Use a worker */ 1391 1387 spin_lock(&fsvq->lock); 1392 1388 list_add_tail(&req->list, &fsvq->end_reqs); 1393 - schedule_delayed_work(&fsvq->dispatch_work, 0); 1389 + schedule_work(&fsvq->dispatch_work); 1394 1390 spin_unlock(&fsvq->lock); 1395 1391 return; 1396 1392 }

Configure Feed

Configure Feed