Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio bug fixes from Michael Tsirkin:
"A bunch of fixes all over the place"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
tools/virtio: use canonical ftrace path
vhost_vdpa: support PACKED when setting-getting vring_base
vhost: support PACKED when setting-getting vring_base
vhost: Fix worker hangs due to missed wake up calls
vhost: Fix crash during early vhost_transport_send_pkt calls
vhost_net: revert upend_idx only on retriable error
vhost_vdpa: tell vqs about the negotiated
vdpa/mlx5: Fix hang when cvq commands are triggered during device unregister
tools/virtio: Add .gitignore for ringtest
tools/virtio: Fix arm64 ringtest compilation error
vduse: avoid empty string for dev name
vhost: use kzalloc() instead of kmalloc() followed by memset()

+120 -65
+1 -1
drivers/vdpa/mlx5/net/mlx5_vnet.c
··· 3349 3349 mlx5_vdpa_remove_debugfs(ndev->debugfs); 3350 3350 ndev->debugfs = NULL; 3351 3351 unregister_link_notifier(ndev); 3352 + _vdpa_unregister_device(dev); 3352 3353 wq = mvdev->wq; 3353 3354 mvdev->wq = NULL; 3354 3355 destroy_workqueue(wq); 3355 - _vdpa_unregister_device(dev); 3356 3356 mgtdev->ndev = NULL; 3357 3357 } 3358 3358
+3
drivers/vdpa/vdpa_user/vduse_dev.c
··· 1685 1685 if (config->vq_num > 0xffff) 1686 1686 return false; 1687 1687 1688 + if (!config->name[0]) 1689 + return false; 1690 + 1688 1691 if (!device_is_allowed(config->device_id)) 1689 1692 return false; 1690 1693
+8 -3
drivers/vhost/net.c
··· 935 935 936 936 err = sock->ops->sendmsg(sock, &msg, len); 937 937 if (unlikely(err < 0)) { 938 + bool retry = err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS; 939 + 938 940 if (zcopy_used) { 939 941 if (vq->heads[ubuf->desc].len == VHOST_DMA_IN_PROGRESS) 940 942 vhost_net_ubuf_put(ubufs); 941 - nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) 942 - % UIO_MAXIOV; 943 + if (retry) 944 + nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) 945 + % UIO_MAXIOV; 946 + else 947 + vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN; 943 948 } 944 - if (err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS) { 949 + if (retry) { 945 950 vhost_discard_vq_desc(vq, 1); 946 951 vhost_net_enable_vq(net, vq); 947 952 break;
+30 -4
drivers/vhost/vdpa.c
··· 407 407 { 408 408 struct vdpa_device *vdpa = v->vdpa; 409 409 const struct vdpa_config_ops *ops = vdpa->config; 410 + struct vhost_dev *d = &v->vdev; 411 + u64 actual_features; 410 412 u64 features; 413 + int i; 411 414 412 415 /* 413 416 * It's not allowed to change the features after they have ··· 424 421 425 422 if (vdpa_set_features(vdpa, features)) 426 423 return -EINVAL; 424 + 425 + /* let the vqs know what has been configured */ 426 + actual_features = ops->get_driver_features(vdpa); 427 + for (i = 0; i < d->nvqs; ++i) { 428 + struct vhost_virtqueue *vq = d->vqs[i]; 429 + 430 + mutex_lock(&vq->mutex); 431 + vq->acked_features = actual_features; 432 + mutex_unlock(&vq->mutex); 433 + } 427 434 428 435 return 0; 429 436 } ··· 607 594 if (r) 608 595 return r; 609 596 610 - vq->last_avail_idx = vq_state.split.avail_index; 597 + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { 598 + vq->last_avail_idx = vq_state.packed.last_avail_idx | 599 + (vq_state.packed.last_avail_counter << 15); 600 + vq->last_used_idx = vq_state.packed.last_used_idx | 601 + (vq_state.packed.last_used_counter << 15); 602 + } else { 603 + vq->last_avail_idx = vq_state.split.avail_index; 604 + } 611 605 break; 612 606 } 613 607 ··· 632 612 break; 633 613 634 614 case VHOST_SET_VRING_BASE: 635 - vq_state.split.avail_index = vq->last_avail_idx; 636 - if (ops->set_vq_state(vdpa, idx, &vq_state)) 637 - r = -EINVAL; 615 + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { 616 + vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff; 617 + vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000); 618 + vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff; 619 + vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000); 620 + } else { 621 + vq_state.split.avail_index = vq->last_avail_idx; 622 + } 623 + r = ops->set_vq_state(vdpa, idx, &vq_state); 638 624 break; 639 625 640 626 case VHOST_SET_VRING_CALL:
+34 -41
drivers/vhost/vhost.c
··· 235 235 { 236 236 struct vhost_flush_struct flush; 237 237 238 - if (dev->worker) { 238 + if (dev->worker.vtsk) { 239 239 init_completion(&flush.wait_event); 240 240 vhost_work_init(&flush.work, vhost_flush_work); 241 241 ··· 247 247 248 248 void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) 249 249 { 250 - if (!dev->worker) 250 + if (!dev->worker.vtsk) 251 251 return; 252 252 253 253 if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) { ··· 255 255 * sure it was not in the list. 256 256 * test_and_set_bit() implies a memory barrier. 257 257 */ 258 - llist_add(&work->node, &dev->worker->work_list); 259 - vhost_task_wake(dev->worker->vtsk); 258 + llist_add(&work->node, &dev->worker.work_list); 259 + vhost_task_wake(dev->worker.vtsk); 260 260 } 261 261 } 262 262 EXPORT_SYMBOL_GPL(vhost_work_queue); ··· 264 264 /* A lockless hint for busy polling code to exit the loop */ 265 265 bool vhost_has_work(struct vhost_dev *dev) 266 266 { 267 - return dev->worker && !llist_empty(&dev->worker->work_list); 267 + return !llist_empty(&dev->worker.work_list); 268 268 } 269 269 EXPORT_SYMBOL_GPL(vhost_has_work); 270 270 ··· 341 341 342 342 node = llist_del_all(&worker->work_list); 343 343 if (node) { 344 + __set_current_state(TASK_RUNNING); 345 + 344 346 node = llist_reverse_order(node); 345 347 /* make sure flag is seen after deletion */ 346 348 smp_wmb(); ··· 458 456 dev->umem = NULL; 459 457 dev->iotlb = NULL; 460 458 dev->mm = NULL; 461 - dev->worker = NULL; 459 + memset(&dev->worker, 0, sizeof(dev->worker)); 460 + init_llist_head(&dev->worker.work_list); 462 461 dev->iov_limit = iov_limit; 463 462 dev->weight = weight; 464 463 dev->byte_weight = byte_weight; ··· 533 530 534 531 static void vhost_worker_free(struct vhost_dev *dev) 535 532 { 536 - struct vhost_worker *worker = dev->worker; 537 - 538 - if (!worker) 533 + if (!dev->worker.vtsk) 539 534 return; 540 535 541 - dev->worker = NULL; 542 - WARN_ON(!llist_empty(&worker->work_list)); 543 - vhost_task_stop(worker->vtsk); 544 - kfree(worker); 536 + WARN_ON(!llist_empty(&dev->worker.work_list)); 537 + vhost_task_stop(dev->worker.vtsk); 538 + dev->worker.kcov_handle = 0; 539 + dev->worker.vtsk = NULL; 545 540 } 546 541 547 542 static int vhost_worker_create(struct vhost_dev *dev) 548 543 { 549 - struct vhost_worker *worker; 550 544 struct vhost_task *vtsk; 551 545 char name[TASK_COMM_LEN]; 552 - int ret; 553 546 554 - worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT); 555 - if (!worker) 556 - return -ENOMEM; 557 - 558 - dev->worker = worker; 559 - worker->kcov_handle = kcov_common_handle(); 560 - init_llist_head(&worker->work_list); 561 547 snprintf(name, sizeof(name), "vhost-%d", current->pid); 562 548 563 - vtsk = vhost_task_create(vhost_worker, worker, name); 564 - if (!vtsk) { 565 - ret = -ENOMEM; 566 - goto free_worker; 567 - } 549 + vtsk = vhost_task_create(vhost_worker, &dev->worker, name); 550 + if (!vtsk) 551 + return -ENOMEM; 568 552 569 - worker->vtsk = vtsk; 553 + dev->worker.kcov_handle = kcov_common_handle(); 554 + dev->worker.vtsk = vtsk; 570 555 vhost_task_start(vtsk); 571 556 return 0; 572 - 573 - free_worker: 574 - kfree(worker); 575 - dev->worker = NULL; 576 - return ret; 577 557 } 578 558 579 559 /* Caller should have device mutex */ ··· 1600 1614 r = -EFAULT; 1601 1615 break; 1602 1616 } 1603 - if (s.num > 0xffff) { 1604 - r = -EINVAL; 1605 - break; 1617 + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { 1618 + vq->last_avail_idx = s.num & 0xffff; 1619 + vq->last_used_idx = (s.num >> 16) & 0xffff; 1620 + } else { 1621 + if (s.num > 0xffff) { 1622 + r = -EINVAL; 1623 + break; 1624 + } 1625 + vq->last_avail_idx = s.num; 1606 1626 } 1607 - vq->last_avail_idx = s.num; 1608 1627 /* Forget the cached index value. */ 1609 1628 vq->avail_idx = vq->last_avail_idx; 1610 1629 break; 1611 1630 case VHOST_GET_VRING_BASE: 1612 1631 s.index = idx; 1613 - s.num = vq->last_avail_idx; 1632 + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) 1633 + s.num = (u32)vq->last_avail_idx | ((u32)vq->last_used_idx << 16); 1634 + else 1635 + s.num = vq->last_avail_idx; 1614 1636 if (copy_to_user(argp, &s, sizeof s)) 1615 1637 r = -EFAULT; 1616 1638 break; ··· 2557 2563 /* Create a new message. */ 2558 2564 struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type) 2559 2565 { 2560 - struct vhost_msg_node *node = kmalloc(sizeof *node, GFP_KERNEL); 2566 + /* Make sure all padding within the structure is initialized. */ 2567 + struct vhost_msg_node *node = kzalloc(sizeof(*node), GFP_KERNEL); 2561 2568 if (!node) 2562 2569 return NULL; 2563 2570 2564 - /* Make sure all padding within the structure is initialized. */ 2565 - memset(&node->msg, 0, sizeof node->msg); 2566 2571 node->vq = vq; 2567 2572 node->msg.type = type; 2568 2573 return node;
+7 -3
drivers/vhost/vhost.h
··· 92 92 /* The routine to call when the Guest pings us, or timeout. */ 93 93 vhost_work_fn_t handle_kick; 94 94 95 - /* Last available index we saw. */ 95 + /* Last available index we saw. 96 + * Values are limited to 0x7fff, and the high bit is used as 97 + * a wrap counter when using VIRTIO_F_RING_PACKED. */ 96 98 u16 last_avail_idx; 97 99 98 100 /* Caches available index value from user. */ 99 101 u16 avail_idx; 100 102 101 - /* Last index we used. */ 103 + /* Last index we used. 104 + * Values are limited to 0x7fff, and the high bit is used as 105 + * a wrap counter when using VIRTIO_F_RING_PACKED. */ 102 106 u16 last_used_idx; 103 107 104 108 /* Used flags */ ··· 158 154 struct vhost_virtqueue **vqs; 159 155 int nvqs; 160 156 struct eventfd_ctx *log_ctx; 161 - struct vhost_worker *worker; 157 + struct vhost_worker worker; 162 158 struct vhost_iotlb *umem; 163 159 struct vhost_iotlb *iotlb; 164 160 spinlock_t iotlb_lock;
+10 -8
kernel/vhost_task.c
··· 28 28 for (;;) { 29 29 bool did_work; 30 30 31 - /* mb paired w/ vhost_task_stop */ 32 - if (test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags)) 33 - break; 34 - 35 31 if (!dead && signal_pending(current)) { 36 32 struct ksignal ksig; 37 33 /* ··· 44 48 clear_thread_flag(TIF_SIGPENDING); 45 49 } 46 50 47 - did_work = vtsk->fn(vtsk->data); 48 - if (!did_work) { 49 - set_current_state(TASK_INTERRUPTIBLE); 50 - schedule(); 51 + /* mb paired w/ vhost_task_stop */ 52 + set_current_state(TASK_INTERRUPTIBLE); 53 + 54 + if (test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags)) { 55 + __set_current_state(TASK_RUNNING); 56 + break; 51 57 } 58 + 59 + did_work = vtsk->fn(vtsk->data); 60 + if (!did_work) 61 + schedule(); 52 62 } 53 63 54 64 complete(&vtsk->exited);
+7
tools/virtio/ringtest/.gitignore
··· 1 + # SPDX-License-Identifier: GPL-2.0-only 2 + /noring 3 + /ptr_ring 4 + /ring 5 + /virtio_ring_0_9 6 + /virtio_ring_inorder 7 + /virtio_ring_poll
+11
tools/virtio/ringtest/main.h
··· 8 8 #ifndef MAIN_H 9 9 #define MAIN_H 10 10 11 + #include <assert.h> 11 12 #include <stdbool.h> 12 13 13 14 extern int param; ··· 96 95 #define cpu_relax() asm ("rep; nop" ::: "memory") 97 96 #elif defined(__s390x__) 98 97 #define cpu_relax() barrier() 98 + #elif defined(__aarch64__) 99 + #define cpu_relax() asm ("yield" ::: "memory") 99 100 #else 100 101 #define cpu_relax() assert(0) 101 102 #endif ··· 115 112 116 113 #if defined(__x86_64__) || defined(__i386__) 117 114 #define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc") 115 + #elif defined(__aarch64__) 116 + #define smp_mb() asm volatile("dmb ish" ::: "memory") 118 117 #else 119 118 /* 120 119 * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized ··· 141 136 142 137 #if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) 143 138 #define smp_wmb() barrier() 139 + #elif defined(__aarch64__) 140 + #define smp_wmb() asm volatile("dmb ishst" ::: "memory") 144 141 #else 145 142 #define smp_wmb() smp_release() 143 + #endif 144 + 145 + #ifndef __always_inline 146 + #define __always_inline inline __attribute__((always_inline)) 146 147 #endif 147 148 148 149 static __always_inline
+1 -1
tools/virtio/virtio-trace/README
··· 95 95 96 96 1) Enable ftrace in the guest 97 97 <Example> 98 - # echo 1 > /sys/kernel/debug/tracing/events/sched/enable 98 + # echo 1 > /sys/kernel/tracing/events/sched/enable 99 99 100 100 2) Run trace agent in the guest 101 101 This agent must be operated as root.
+8 -4
tools/virtio/virtio-trace/trace-agent.c
··· 18 18 #define PIPE_DEF_BUFS 16 19 19 #define PIPE_MIN_SIZE (PAGE_SIZE*PIPE_DEF_BUFS) 20 20 #define PIPE_MAX_SIZE (1024*1024) 21 - #define READ_PATH_FMT \ 22 - "/sys/kernel/debug/tracing/per_cpu/cpu%d/trace_pipe_raw" 21 + #define TRACEFS "/sys/kernel/tracing" 22 + #define DEBUGFS "/sys/kernel/debug/tracing" 23 + #define READ_PATH_FMT "%s/per_cpu/cpu%d/trace_pipe_raw" 23 24 #define WRITE_PATH_FMT "/dev/virtio-ports/trace-path-cpu%d" 24 25 #define CTL_PATH "/dev/virtio-ports/agent-ctl-path" 25 26 ··· 121 120 if (this_is_write_path) 122 121 /* write(output) path */ 123 122 ret = snprintf(buf, PATH_MAX, WRITE_PATH_FMT, cpu_num); 124 - else 123 + else { 125 124 /* read(input) path */ 126 - ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, cpu_num); 125 + ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, TRACEFS, cpu_num); 126 + if (ret > 0 && access(buf, F_OK) != 0) 127 + ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, DEBUGFS, cpu_num); 128 + } 127 129 128 130 if (ret <= 0) { 129 131 pr_err("Failed to generate %s path(CPU#%d):%d\n",