Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio updates from Michael Tsirkin:

- vdpa/mlx5: support for resumable vqs

- virtio_scsi: mq_poll support

- 3virtio_pmem: support SHMEM_REGION

- virtio_balloon: stay awake while adjusting balloon

- virtio: support for no-reset virtio PCI PM

- Fixes, cleanups

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
vdpa/mlx5: Add mkey leak detection
vdpa/mlx5: Introduce reference counting to mrs
vdpa/mlx5: Use vq suspend/resume during .set_map
vdpa/mlx5: Mark vq state for modification in hw vq
vdpa/mlx5: Mark vq addrs for modification in hw vq
vdpa/mlx5: Introduce per vq and device resume
vdpa/mlx5: Allow modifying multiple vq fields in one modify command
vdpa/mlx5: Expose resumable vq capability
vdpa: Block vq property changes in DRIVER_OK
vdpa: Track device suspended state
scsi: virtio_scsi: Add mq_poll support
virtio_pmem: support feature SHMEM_REGION
virtio_balloon: stay awake while adjusting balloon
vdpa: Remove usage of the deprecated ida_simple_xx() API
virtio: Add support for no-reset virtio PCI PM
virtio_net: fix missing dma unmap for resize
vhost-vdpa: account iommu allocations
vdpa: Fix an error handling path in eni_vdpa_probe()

+480 -67
+32 -4
drivers/nvdimm/virtio_pmem.c
··· 29 29 return 0; 30 30 }; 31 31 32 + static int virtio_pmem_validate(struct virtio_device *vdev) 33 + { 34 + struct virtio_shm_region shm_reg; 35 + 36 + if (virtio_has_feature(vdev, VIRTIO_PMEM_F_SHMEM_REGION) && 37 + !virtio_get_shm_region(vdev, &shm_reg, (u8)VIRTIO_PMEM_SHMEM_REGION_ID) 38 + ) { 39 + dev_notice(&vdev->dev, "failed to get shared memory region %d\n", 40 + VIRTIO_PMEM_SHMEM_REGION_ID); 41 + __virtio_clear_bit(vdev, VIRTIO_PMEM_F_SHMEM_REGION); 42 + } 43 + return 0; 44 + } 45 + 32 46 static int virtio_pmem_probe(struct virtio_device *vdev) 33 47 { 34 48 struct nd_region_desc ndr_desc = {}; 35 49 struct nd_region *nd_region; 36 50 struct virtio_pmem *vpmem; 37 51 struct resource res; 52 + struct virtio_shm_region shm_reg; 38 53 int err = 0; 39 54 40 55 if (!vdev->config->get) { ··· 72 57 goto out_err; 73 58 } 74 59 75 - virtio_cread_le(vpmem->vdev, struct virtio_pmem_config, 76 - start, &vpmem->start); 77 - virtio_cread_le(vpmem->vdev, struct virtio_pmem_config, 78 - size, &vpmem->size); 60 + if (virtio_has_feature(vdev, VIRTIO_PMEM_F_SHMEM_REGION)) { 61 + virtio_get_shm_region(vdev, &shm_reg, (u8)VIRTIO_PMEM_SHMEM_REGION_ID); 62 + vpmem->start = shm_reg.addr; 63 + vpmem->size = shm_reg.len; 64 + } else { 65 + virtio_cread_le(vpmem->vdev, struct virtio_pmem_config, 66 + start, &vpmem->start); 67 + virtio_cread_le(vpmem->vdev, struct virtio_pmem_config, 68 + size, &vpmem->size); 69 + } 79 70 80 71 res.start = vpmem->start; 81 72 res.end = vpmem->start + vpmem->size - 1; ··· 143 122 virtio_reset_device(vdev); 144 123 } 145 124 125 + static unsigned int features[] = { 126 + VIRTIO_PMEM_F_SHMEM_REGION, 127 + }; 128 + 146 129 static struct virtio_driver virtio_pmem_driver = { 130 + .feature_table = features, 131 + .feature_table_size = ARRAY_SIZE(features), 147 132 .driver.name = KBUILD_MODNAME, 148 133 .driver.owner = THIS_MODULE, 149 134 .id_table = id_table, 135 + .validate = virtio_pmem_validate, 150 136 .probe = virtio_pmem_probe, 151 137 .remove = virtio_pmem_remove, 152 138 };
+73 -5
drivers/scsi/virtio_scsi.c
··· 37 37 #define VIRTIO_SCSI_EVENT_LEN 8 38 38 #define VIRTIO_SCSI_VQ_BASE 2 39 39 40 + static unsigned int virtscsi_poll_queues; 41 + module_param(virtscsi_poll_queues, uint, 0644); 42 + MODULE_PARM_DESC(virtscsi_poll_queues, 43 + "The number of dedicated virtqueues for polling I/O"); 44 + 40 45 /* Command queue element */ 41 46 struct virtio_scsi_cmd { 42 47 struct scsi_cmnd *sc; ··· 81 76 struct virtio_scsi_event_node event_list[VIRTIO_SCSI_EVENT_LEN]; 82 77 83 78 u32 num_queues; 79 + int io_queues[HCTX_MAX_TYPES]; 84 80 85 81 struct hlist_node node; 86 82 ··· 728 722 static void virtscsi_map_queues(struct Scsi_Host *shost) 729 723 { 730 724 struct virtio_scsi *vscsi = shost_priv(shost); 731 - struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT]; 725 + int i, qoff; 732 726 733 - blk_mq_virtio_map_queues(qmap, vscsi->vdev, 2); 727 + for (i = 0, qoff = 0; i < shost->nr_maps; i++) { 728 + struct blk_mq_queue_map *map = &shost->tag_set.map[i]; 729 + 730 + map->nr_queues = vscsi->io_queues[i]; 731 + map->queue_offset = qoff; 732 + qoff += map->nr_queues; 733 + 734 + if (map->nr_queues == 0) 735 + continue; 736 + 737 + /* 738 + * Regular queues have interrupts and hence CPU affinity is 739 + * defined by the core virtio code, but polling queues have 740 + * no interrupts so we let the block layer assign CPU affinity. 741 + */ 742 + if (i == HCTX_TYPE_POLL) 743 + blk_mq_map_queues(map); 744 + else 745 + blk_mq_virtio_map_queues(map, vscsi->vdev, 2); 746 + } 747 + } 748 + 749 + static int virtscsi_mq_poll(struct Scsi_Host *shost, unsigned int queue_num) 750 + { 751 + struct virtio_scsi *vscsi = shost_priv(shost); 752 + struct virtio_scsi_vq *virtscsi_vq = &vscsi->req_vqs[queue_num]; 753 + unsigned long flags; 754 + unsigned int len; 755 + int found = 0; 756 + void *buf; 757 + 758 + spin_lock_irqsave(&virtscsi_vq->vq_lock, flags); 759 + 760 + while ((buf = virtqueue_get_buf(virtscsi_vq->vq, &len)) != NULL) { 761 + virtscsi_complete_cmd(vscsi, buf); 762 + found++; 763 + } 764 + 765 + spin_unlock_irqrestore(&virtscsi_vq->vq_lock, flags); 766 + 767 + return found; 734 768 } 735 769 736 770 static void virtscsi_commit_rqs(struct Scsi_Host *shost, u16 hwq) ··· 797 751 .this_id = -1, 798 752 .cmd_size = sizeof(struct virtio_scsi_cmd), 799 753 .queuecommand = virtscsi_queuecommand, 754 + .mq_poll = virtscsi_mq_poll, 800 755 .commit_rqs = virtscsi_commit_rqs, 801 756 .change_queue_depth = virtscsi_change_queue_depth, 802 757 .eh_abort_handler = virtscsi_abort, ··· 842 795 { 843 796 int err; 844 797 u32 i; 845 - u32 num_vqs; 798 + u32 num_vqs, num_poll_vqs, num_req_vqs; 846 799 vq_callback_t **callbacks; 847 800 const char **names; 848 801 struct virtqueue **vqs; 849 802 struct irq_affinity desc = { .pre_vectors = 2 }; 850 803 851 - num_vqs = vscsi->num_queues + VIRTIO_SCSI_VQ_BASE; 804 + num_req_vqs = vscsi->num_queues; 805 + num_vqs = num_req_vqs + VIRTIO_SCSI_VQ_BASE; 852 806 vqs = kmalloc_array(num_vqs, sizeof(struct virtqueue *), GFP_KERNEL); 853 807 callbacks = kmalloc_array(num_vqs, sizeof(vq_callback_t *), 854 808 GFP_KERNEL); ··· 860 812 goto out; 861 813 } 862 814 815 + num_poll_vqs = min_t(unsigned int, virtscsi_poll_queues, 816 + num_req_vqs - 1); 817 + vscsi->io_queues[HCTX_TYPE_DEFAULT] = num_req_vqs - num_poll_vqs; 818 + vscsi->io_queues[HCTX_TYPE_READ] = 0; 819 + vscsi->io_queues[HCTX_TYPE_POLL] = num_poll_vqs; 820 + 821 + dev_info(&vdev->dev, "%d/%d/%d default/read/poll queues\n", 822 + vscsi->io_queues[HCTX_TYPE_DEFAULT], 823 + vscsi->io_queues[HCTX_TYPE_READ], 824 + vscsi->io_queues[HCTX_TYPE_POLL]); 825 + 863 826 callbacks[0] = virtscsi_ctrl_done; 864 827 callbacks[1] = virtscsi_event_done; 865 828 names[0] = "control"; 866 829 names[1] = "event"; 867 - for (i = VIRTIO_SCSI_VQ_BASE; i < num_vqs; i++) { 830 + for (i = VIRTIO_SCSI_VQ_BASE; i < num_vqs - num_poll_vqs; i++) { 868 831 callbacks[i] = virtscsi_req_done; 869 832 names[i] = "request"; 833 + } 834 + 835 + for (; i < num_vqs; i++) { 836 + callbacks[i] = NULL; 837 + names[i] = "request_poll"; 870 838 } 871 839 872 840 /* Discover virtqueues and write information to configuration. */ ··· 938 874 939 875 sg_elems = virtscsi_config_get(vdev, seg_max) ?: 1; 940 876 shost->sg_tablesize = sg_elems; 877 + shost->nr_maps = 1; 941 878 vscsi = shost_priv(shost); 942 879 vscsi->vdev = vdev; 943 880 vscsi->num_queues = num_queues; ··· 947 882 err = virtscsi_init(vdev, vscsi); 948 883 if (err) 949 884 goto virtscsi_init_failed; 885 + 886 + if (vscsi->io_queues[HCTX_TYPE_POLL]) 887 + shost->nr_maps = HCTX_TYPE_POLL + 1; 950 888 951 889 shost->can_queue = virtqueue_get_vring_size(vscsi->req_vqs[0].vq); 952 890
+4 -2
drivers/vdpa/alibaba/eni_vdpa.c
··· 497 497 if (!eni_vdpa->vring) { 498 498 ret = -ENOMEM; 499 499 ENI_ERR(pdev, "failed to allocate virtqueues\n"); 500 - goto err; 500 + goto err_remove_vp_legacy; 501 501 } 502 502 503 503 for (i = 0; i < eni_vdpa->queues; i++) { ··· 509 509 ret = vdpa_register_device(&eni_vdpa->vdpa, eni_vdpa->queues); 510 510 if (ret) { 511 511 ENI_ERR(pdev, "failed to register to vdpa bus\n"); 512 - goto err; 512 + goto err_remove_vp_legacy; 513 513 } 514 514 515 515 return 0; 516 516 517 + err_remove_vp_legacy: 518 + vp_legacy_remove(&eni_vdpa->ldev); 517 519 err: 518 520 put_device(&eni_vdpa->vdpa.dev); 519 521 return ret;
+8 -2
drivers/vdpa/mlx5/core/mlx5_vdpa.h
··· 35 35 struct vhost_iotlb *iotlb; 36 36 37 37 bool user_mr; 38 + 39 + refcount_t refcount; 40 + struct list_head mr_list; 38 41 }; 39 42 40 43 struct mlx5_vdpa_resources { ··· 96 93 u32 generation; 97 94 98 95 struct mlx5_vdpa_mr *mr[MLX5_VDPA_NUM_AS]; 96 + struct list_head mr_list_head; 99 97 /* serialize mr access */ 100 98 struct mutex mr_mtx; 101 99 struct mlx5_control_vq cvq; ··· 122 118 struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, 123 119 struct vhost_iotlb *iotlb); 124 120 void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev); 125 - void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, 126 - struct mlx5_vdpa_mr *mr); 121 + void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, 122 + struct mlx5_vdpa_mr *mr); 123 + void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, 124 + struct mlx5_vdpa_mr *mr); 127 125 void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev, 128 126 struct mlx5_vdpa_mr *mr, 129 127 unsigned int asid);
+57 -16
drivers/vdpa/mlx5/core/mr.c
··· 498 498 499 499 static void _mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 500 500 { 501 + if (WARN_ON(!mr)) 502 + return; 503 + 501 504 if (mr->user_mr) 502 505 destroy_user_mr(mvdev, mr); 503 506 else 504 507 destroy_dma_mr(mvdev, mr); 505 508 506 509 vhost_iotlb_free(mr->iotlb); 510 + 511 + list_del(&mr->mr_list); 512 + 513 + kfree(mr); 507 514 } 508 515 509 - void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, 510 - struct mlx5_vdpa_mr *mr) 516 + static void _mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, 517 + struct mlx5_vdpa_mr *mr) 511 518 { 512 519 if (!mr) 513 520 return; 514 521 522 + if (refcount_dec_and_test(&mr->refcount)) 523 + _mlx5_vdpa_destroy_mr(mvdev, mr); 524 + } 525 + 526 + void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, 527 + struct mlx5_vdpa_mr *mr) 528 + { 515 529 mutex_lock(&mvdev->mr_mtx); 516 - 517 - _mlx5_vdpa_destroy_mr(mvdev, mr); 518 - 519 - for (int i = 0; i < MLX5_VDPA_NUM_AS; i++) { 520 - if (mvdev->mr[i] == mr) 521 - mvdev->mr[i] = NULL; 522 - } 523 - 530 + _mlx5_vdpa_put_mr(mvdev, mr); 524 531 mutex_unlock(&mvdev->mr_mtx); 532 + } 525 533 526 - kfree(mr); 534 + static void _mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, 535 + struct mlx5_vdpa_mr *mr) 536 + { 537 + if (!mr) 538 + return; 539 + 540 + refcount_inc(&mr->refcount); 541 + } 542 + 543 + void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, 544 + struct mlx5_vdpa_mr *mr) 545 + { 546 + mutex_lock(&mvdev->mr_mtx); 547 + _mlx5_vdpa_get_mr(mvdev, mr); 548 + mutex_unlock(&mvdev->mr_mtx); 527 549 } 528 550 529 551 void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev, ··· 556 534 557 535 mutex_lock(&mvdev->mr_mtx); 558 536 537 + _mlx5_vdpa_put_mr(mvdev, old_mr); 559 538 mvdev->mr[asid] = new_mr; 560 - if (old_mr) { 561 - _mlx5_vdpa_destroy_mr(mvdev, old_mr); 562 - kfree(old_mr); 539 + 540 + mutex_unlock(&mvdev->mr_mtx); 541 + } 542 + 543 + static void mlx5_vdpa_show_mr_leaks(struct mlx5_vdpa_dev *mvdev) 544 + { 545 + struct mlx5_vdpa_mr *mr; 546 + 547 + mutex_lock(&mvdev->mr_mtx); 548 + 549 + list_for_each_entry(mr, &mvdev->mr_list_head, mr_list) { 550 + 551 + mlx5_vdpa_warn(mvdev, "mkey still alive after resource delete: " 552 + "mr: %p, mkey: 0x%x, refcount: %u\n", 553 + mr, mr->mkey, refcount_read(&mr->refcount)); 563 554 } 564 555 565 556 mutex_unlock(&mvdev->mr_mtx); ··· 582 547 void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev) 583 548 { 584 549 for (int i = 0; i < MLX5_VDPA_NUM_AS; i++) 585 - mlx5_vdpa_destroy_mr(mvdev, mvdev->mr[i]); 550 + mlx5_vdpa_update_mr(mvdev, NULL, i); 586 551 587 552 prune_iotlb(mvdev->cvq.iotlb); 553 + 554 + mlx5_vdpa_show_mr_leaks(mvdev); 588 555 } 589 556 590 557 static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, ··· 612 575 err = dup_iotlb(mr->iotlb, iotlb); 613 576 if (err) 614 577 goto err_iotlb; 578 + 579 + list_add_tail(&mr->mr_list, &mvdev->mr_list_head); 615 580 616 581 return 0; 617 582 ··· 645 606 646 607 if (err) 647 608 goto out_err; 609 + 610 + refcount_set(&mr->refcount, 1); 648 611 649 612 return mr; 650 613 ··· 692 651 if (asid >= MLX5_VDPA_NUM_AS) 693 652 return -EINVAL; 694 653 695 - mlx5_vdpa_destroy_mr(mvdev, mvdev->mr[asid]); 654 + mlx5_vdpa_update_mr(mvdev, NULL, asid); 696 655 697 656 if (asid == 0 && MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { 698 657 if (mlx5_vdpa_create_dma_mr(mvdev))
+188 -21
drivers/vdpa/mlx5/net/mlx5_vnet.c
··· 120 120 u16 avail_idx; 121 121 u16 used_idx; 122 122 int fw_state; 123 + 124 + u64 modified_fields; 125 + 126 + struct mlx5_vdpa_mr *vq_mr; 127 + struct mlx5_vdpa_mr *desc_mr; 128 + 123 129 struct msi_map map; 124 130 125 131 /* keep last in the struct */ ··· 949 943 kfree(in); 950 944 mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 951 945 946 + mlx5_vdpa_get_mr(mvdev, vq_mr); 947 + mvq->vq_mr = vq_mr; 948 + 949 + if (vq_desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) { 950 + mlx5_vdpa_get_mr(mvdev, vq_desc_mr); 951 + mvq->desc_mr = vq_desc_mr; 952 + } 953 + 952 954 return 0; 953 955 954 956 err_cmd: ··· 983 969 } 984 970 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; 985 971 umems_destroy(ndev, mvq); 972 + 973 + mlx5_vdpa_put_mr(&ndev->mvdev, mvq->vq_mr); 974 + mvq->vq_mr = NULL; 975 + 976 + mlx5_vdpa_put_mr(&ndev->mvdev, mvq->desc_mr); 977 + mvq->desc_mr = NULL; 986 978 } 987 979 988 980 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) ··· 1187 1167 return err; 1188 1168 } 1189 1169 1190 - static bool is_valid_state_change(int oldstate, int newstate) 1170 + static bool is_resumable(struct mlx5_vdpa_net *ndev) 1171 + { 1172 + return ndev->mvdev.vdev.config->resume; 1173 + } 1174 + 1175 + static bool is_valid_state_change(int oldstate, int newstate, bool resumable) 1191 1176 { 1192 1177 switch (oldstate) { 1193 1178 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT: ··· 1200 1175 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY: 1201 1176 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND; 1202 1177 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND: 1178 + return resumable ? newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY : false; 1203 1179 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR: 1204 1180 default: 1205 1181 return false; 1206 1182 } 1207 1183 } 1208 1184 1209 - static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state) 1185 + static bool modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue *mvq) 1186 + { 1187 + /* Only state is always modifiable */ 1188 + if (mvq->modified_fields & ~MLX5_VIRTQ_MODIFY_MASK_STATE) 1189 + return mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT || 1190 + mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND; 1191 + 1192 + return true; 1193 + } 1194 + 1195 + static int modify_virtqueue(struct mlx5_vdpa_net *ndev, 1196 + struct mlx5_vdpa_virtqueue *mvq, 1197 + int state) 1210 1198 { 1211 1199 int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in); 1212 1200 u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {}; 1201 + struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 1202 + struct mlx5_vdpa_mr *desc_mr = NULL; 1203 + struct mlx5_vdpa_mr *vq_mr = NULL; 1204 + bool state_change = false; 1213 1205 void *obj_context; 1214 1206 void *cmd_hdr; 1207 + void *vq_ctx; 1215 1208 void *in; 1216 1209 int err; 1217 1210 1218 1211 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE) 1219 1212 return 0; 1220 1213 1221 - if (!is_valid_state_change(mvq->fw_state, state)) 1214 + if (!modifiable_virtqueue_fields(mvq)) 1222 1215 return -EINVAL; 1223 1216 1224 1217 in = kzalloc(inlen, GFP_KERNEL); ··· 1251 1208 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1252 1209 1253 1210 obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context); 1254 - MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, 1255 - MLX5_VIRTQ_MODIFY_MASK_STATE); 1256 - MLX5_SET(virtio_net_q_object, obj_context, state, state); 1211 + vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); 1212 + 1213 + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE) { 1214 + if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) { 1215 + err = -EINVAL; 1216 + goto done; 1217 + } 1218 + 1219 + MLX5_SET(virtio_net_q_object, obj_context, state, state); 1220 + state_change = true; 1221 + } 1222 + 1223 + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS) { 1224 + MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); 1225 + MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); 1226 + MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); 1227 + } 1228 + 1229 + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX) 1230 + MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); 1231 + 1232 + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX) 1233 + MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); 1234 + 1235 + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) { 1236 + vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]]; 1237 + 1238 + if (vq_mr) 1239 + MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey); 1240 + else 1241 + mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY; 1242 + } 1243 + 1244 + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) { 1245 + desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; 1246 + 1247 + if (desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) 1248 + MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, desc_mr->mkey); 1249 + else 1250 + mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY; 1251 + } 1252 + 1253 + MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, mvq->modified_fields); 1257 1254 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 1258 - kfree(in); 1259 - if (!err) 1255 + if (err) 1256 + goto done; 1257 + 1258 + if (state_change) 1260 1259 mvq->fw_state = state; 1261 1260 1261 + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) { 1262 + mlx5_vdpa_put_mr(mvdev, mvq->vq_mr); 1263 + mlx5_vdpa_get_mr(mvdev, vq_mr); 1264 + mvq->vq_mr = vq_mr; 1265 + } 1266 + 1267 + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) { 1268 + mlx5_vdpa_put_mr(mvdev, mvq->desc_mr); 1269 + mlx5_vdpa_get_mr(mvdev, desc_mr); 1270 + mvq->desc_mr = desc_mr; 1271 + } 1272 + 1273 + mvq->modified_fields = 0; 1274 + 1275 + done: 1276 + kfree(in); 1262 1277 return err; 1278 + } 1279 + 1280 + static int modify_virtqueue_state(struct mlx5_vdpa_net *ndev, 1281 + struct mlx5_vdpa_virtqueue *mvq, 1282 + unsigned int state) 1283 + { 1284 + mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE; 1285 + return modify_virtqueue(ndev, mvq, state); 1263 1286 } 1264 1287 1265 1288 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) ··· 1456 1347 goto err_vq; 1457 1348 1458 1349 if (mvq->ready) { 1459 - err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); 1350 + err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); 1460 1351 if (err) { 1461 1352 mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n", 1462 1353 idx, err); ··· 1491 1382 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) 1492 1383 return; 1493 1384 1494 - if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)) 1385 + if (modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)) 1495 1386 mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n"); 1496 1387 1497 1388 if (query_virtqueue(ndev, mvq, &attr)) { ··· 1510 1401 suspend_vq(ndev, &ndev->vqs[i]); 1511 1402 } 1512 1403 1404 + static void resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1405 + { 1406 + if (!mvq->initialized || !is_resumable(ndev)) 1407 + return; 1408 + 1409 + if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND) 1410 + return; 1411 + 1412 + if (modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)) 1413 + mlx5_vdpa_warn(&ndev->mvdev, "modify to resume failed for vq %u\n", mvq->index); 1414 + } 1415 + 1416 + static void resume_vqs(struct mlx5_vdpa_net *ndev) 1417 + { 1418 + for (int i = 0; i < ndev->mvdev.max_vqs; i++) 1419 + resume_vq(ndev, &ndev->vqs[i]); 1420 + } 1421 + 1513 1422 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1514 1423 { 1515 1424 if (!mvq->initialized) 1516 1425 return; 1517 1426 1518 1427 suspend_vq(ndev, mvq); 1428 + mvq->modified_fields = 0; 1519 1429 destroy_virtqueue(ndev, mvq); 1520 1430 dealloc_vector(ndev, mvq); 1521 1431 counter_set_dealloc(ndev, mvq); ··· 2266 2138 mvq->desc_addr = desc_area; 2267 2139 mvq->device_addr = device_area; 2268 2140 mvq->driver_addr = driver_area; 2141 + mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS; 2269 2142 return 0; 2270 2143 } 2271 2144 ··· 2336 2207 if (!ready) { 2337 2208 suspend_vq(ndev, mvq); 2338 2209 } else { 2339 - err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); 2210 + err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); 2340 2211 if (err) { 2341 2212 mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err); 2342 2213 ready = false; ··· 2384 2255 2385 2256 mvq->used_idx = state->split.avail_index; 2386 2257 mvq->avail_idx = state->split.avail_index; 2258 + mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX | 2259 + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX; 2387 2260 return 0; 2388 2261 } 2389 2262 ··· 2834 2703 unsigned int asid) 2835 2704 { 2836 2705 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2706 + bool teardown = !is_resumable(ndev); 2837 2707 int err; 2838 2708 2839 2709 suspend_vqs(ndev); 2840 - err = save_channels_info(ndev); 2841 - if (err) 2842 - return err; 2710 + if (teardown) { 2711 + err = save_channels_info(ndev); 2712 + if (err) 2713 + return err; 2843 2714 2844 - teardown_driver(ndev); 2715 + teardown_driver(ndev); 2716 + } 2845 2717 2846 2718 mlx5_vdpa_update_mr(mvdev, new_mr, asid); 2719 + 2720 + for (int i = 0; i < ndev->cur_num_vqs; i++) 2721 + ndev->vqs[i].modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY | 2722 + MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY; 2847 2723 2848 2724 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended) 2849 2725 return 0; 2850 2726 2851 - restore_channels_info(ndev); 2852 - err = setup_driver(mvdev); 2853 - if (err) 2854 - return err; 2727 + if (teardown) { 2728 + restore_channels_info(ndev); 2729 + err = setup_driver(mvdev); 2730 + if (err) 2731 + return err; 2732 + } 2733 + 2734 + resume_vqs(ndev); 2855 2735 2856 2736 return 0; 2857 2737 } ··· 2946 2804 { 2947 2805 int i; 2948 2806 2949 - for (i = 0; i < ndev->mvdev.max_vqs; i++) 2807 + for (i = 0; i < ndev->mvdev.max_vqs; i++) { 2950 2808 ndev->vqs[i].ready = false; 2809 + ndev->vqs[i].modified_fields = 0; 2810 + } 2951 2811 2952 2812 ndev->mvdev.cvq.ready = false; 2953 2813 } ··· 3126 2982 return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid); 3127 2983 3128 2984 out_err: 3129 - mlx5_vdpa_destroy_mr(mvdev, new_mr); 2985 + mlx5_vdpa_put_mr(mvdev, new_mr); 3130 2986 return err; 3131 2987 } 3132 2988 ··· 3373 3229 return 0; 3374 3230 } 3375 3231 3232 + static int mlx5_vdpa_resume(struct vdpa_device *vdev) 3233 + { 3234 + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3235 + struct mlx5_vdpa_net *ndev; 3236 + 3237 + ndev = to_mlx5_vdpa_ndev(mvdev); 3238 + 3239 + mlx5_vdpa_info(mvdev, "resuming device\n"); 3240 + 3241 + down_write(&ndev->reslock); 3242 + mvdev->suspended = false; 3243 + resume_vqs(ndev); 3244 + register_link_notifier(ndev); 3245 + up_write(&ndev->reslock); 3246 + return 0; 3247 + } 3248 + 3376 3249 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group, 3377 3250 unsigned int asid) 3378 3251 { ··· 3446 3285 .get_vq_dma_dev = mlx5_get_vq_dma_dev, 3447 3286 .free = mlx5_vdpa_free, 3448 3287 .suspend = mlx5_vdpa_suspend, 3288 + .resume = mlx5_vdpa_resume, /* Op disabled if not supported. */ 3449 3289 }; 3450 3290 3451 3291 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu) ··· 3722 3560 if (err) 3723 3561 goto err_mpfs; 3724 3562 3563 + INIT_LIST_HEAD(&mvdev->mr_list_head); 3564 + 3725 3565 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { 3726 3566 err = mlx5_vdpa_create_dma_mr(mvdev); 3727 3567 if (err) ··· 3819 3655 3820 3656 if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported)) 3821 3657 mgtdev->vdpa_ops.get_vq_desc_group = NULL; 3658 + 3659 + if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, freeze_to_rdy_supported)) 3660 + mgtdev->vdpa_ops.resume = NULL; 3822 3661 3823 3662 err = vdpa_mgmtdev_register(&mgtdev->mgtdev); 3824 3663 if (err)
+2 -2
drivers/vdpa/vdpa.c
··· 131 131 if (ops->free) 132 132 ops->free(vdev); 133 133 134 - ida_simple_remove(&vdpa_index_ida, vdev->index); 134 + ida_free(&vdpa_index_ida, vdev->index); 135 135 kfree(vdev->driver_override); 136 136 kfree(vdev); 137 137 } ··· 205 205 return vdev; 206 206 207 207 err_name: 208 - ida_simple_remove(&vdpa_index_ida, vdev->index); 208 + ida_free(&vdpa_index_ida, vdev->index); 209 209 err_ida: 210 210 kfree(vdev); 211 211 err:
+23 -3
drivers/vhost/vdpa.c
··· 59 59 int in_batch; 60 60 struct vdpa_iova_range range; 61 61 u32 batch_asid; 62 + bool suspended; 62 63 }; 63 64 64 65 static DEFINE_IDA(vhost_vdpa_ida); ··· 232 231 { 233 232 struct vdpa_device *vdpa = v->vdpa; 234 233 u32 flags = 0; 234 + 235 + v->suspended = false; 235 236 236 237 if (v->vdev.vqs) { 237 238 flags |= !vhost_backend_has_feature(v->vdev.vqs[0], ··· 593 590 { 594 591 struct vdpa_device *vdpa = v->vdpa; 595 592 const struct vdpa_config_ops *ops = vdpa->config; 593 + int ret; 596 594 597 595 if (!ops->suspend) 598 596 return -EOPNOTSUPP; 599 597 600 - return ops->suspend(vdpa); 598 + ret = ops->suspend(vdpa); 599 + if (!ret) 600 + v->suspended = true; 601 + 602 + return ret; 601 603 } 602 604 603 605 /* After a successful return of this ioctl the device resumes processing ··· 613 605 { 614 606 struct vdpa_device *vdpa = v->vdpa; 615 607 const struct vdpa_config_ops *ops = vdpa->config; 608 + int ret; 616 609 617 610 if (!ops->resume) 618 611 return -EOPNOTSUPP; 619 612 620 - return ops->resume(vdpa); 613 + ret = ops->resume(vdpa); 614 + if (!ret) 615 + v->suspended = false; 616 + 617 + return ret; 621 618 } 622 619 623 620 static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, ··· 703 690 704 691 switch (cmd) { 705 692 case VHOST_SET_VRING_ADDR: 693 + if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended) 694 + return -EINVAL; 695 + 706 696 if (ops->set_vq_address(vdpa, idx, 707 697 (u64)(uintptr_t)vq->desc, 708 698 (u64)(uintptr_t)vq->avail, ··· 714 698 break; 715 699 716 700 case VHOST_SET_VRING_BASE: 701 + if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended) 702 + return -EINVAL; 703 + 717 704 if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { 718 705 vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff; 719 706 vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000); ··· 987 968 r = ops->set_map(vdpa, asid, iotlb); 988 969 } else { 989 970 r = iommu_map(v->domain, iova, pa, size, 990 - perm_to_iommu_flags(perm), GFP_KERNEL); 971 + perm_to_iommu_flags(perm), 972 + GFP_KERNEL_ACCOUNT); 991 973 } 992 974 if (r) { 993 975 vhost_iotlb_del_range(iotlb, iova, iova + size - 1);
+47 -10
drivers/virtio/virtio_balloon.c
··· 119 119 /* Free page reporting device */ 120 120 struct virtqueue *reporting_vq; 121 121 struct page_reporting_dev_info pr_dev_info; 122 + 123 + /* State for keeping the wakeup_source active while adjusting the balloon */ 124 + spinlock_t adjustment_lock; 125 + bool adjustment_signal_pending; 126 + bool adjustment_in_progress; 122 127 }; 123 128 124 129 static const struct virtio_device_id id_table[] = { ··· 442 437 queue_work(vb->balloon_wq, &vb->report_free_page_work); 443 438 } 444 439 440 + static void start_update_balloon_size(struct virtio_balloon *vb) 441 + { 442 + unsigned long flags; 443 + 444 + spin_lock_irqsave(&vb->adjustment_lock, flags); 445 + vb->adjustment_signal_pending = true; 446 + if (!vb->adjustment_in_progress) { 447 + vb->adjustment_in_progress = true; 448 + pm_stay_awake(vb->vdev->dev.parent); 449 + } 450 + spin_unlock_irqrestore(&vb->adjustment_lock, flags); 451 + 452 + queue_work(system_freezable_wq, &vb->update_balloon_size_work); 453 + } 454 + 455 + static void end_update_balloon_size(struct virtio_balloon *vb) 456 + { 457 + spin_lock_irq(&vb->adjustment_lock); 458 + if (!vb->adjustment_signal_pending && vb->adjustment_in_progress) { 459 + vb->adjustment_in_progress = false; 460 + pm_relax(vb->vdev->dev.parent); 461 + } 462 + spin_unlock_irq(&vb->adjustment_lock); 463 + } 464 + 445 465 static void virtballoon_changed(struct virtio_device *vdev) 446 466 { 447 467 struct virtio_balloon *vb = vdev->priv; ··· 474 444 475 445 spin_lock_irqsave(&vb->stop_update_lock, flags); 476 446 if (!vb->stop_update) { 477 - queue_work(system_freezable_wq, 478 - &vb->update_balloon_size_work); 447 + start_update_balloon_size(vb); 479 448 virtio_balloon_queue_free_page_work(vb); 480 449 } 481 450 spin_unlock_irqrestore(&vb->stop_update_lock, flags); ··· 505 476 506 477 vb = container_of(work, struct virtio_balloon, 507 478 update_balloon_size_work); 479 + 480 + spin_lock_irq(&vb->adjustment_lock); 481 + vb->adjustment_signal_pending = false; 482 + spin_unlock_irq(&vb->adjustment_lock); 483 + 508 484 diff = towards_target(vb); 509 485 510 - if (!diff) 511 - return; 512 - 513 - if (diff > 0) 514 - diff -= fill_balloon(vb, diff); 515 - else 516 - diff += leak_balloon(vb, -diff); 517 - update_balloon_size(vb); 486 + if (diff) { 487 + if (diff > 0) 488 + diff -= fill_balloon(vb, diff); 489 + else 490 + diff += leak_balloon(vb, -diff); 491 + update_balloon_size(vb); 492 + } 518 493 519 494 if (diff) 520 495 queue_work(system_freezable_wq, work); 496 + else 497 + end_update_balloon_size(vb); 521 498 } 522 499 523 500 static int init_vqs(struct virtio_balloon *vb) ··· 1026 991 if (err) 1027 992 goto out_unregister_oom; 1028 993 } 994 + 995 + spin_lock_init(&vb->adjustment_lock); 1029 996 1030 997 virtio_device_ready(vdev); 1031 998
+33 -1
drivers/virtio/virtio_pci_common.c
··· 495 495 return virtio_device_restore(&vp_dev->vdev); 496 496 } 497 497 498 + static bool vp_supports_pm_no_reset(struct device *dev) 499 + { 500 + struct pci_dev *pci_dev = to_pci_dev(dev); 501 + u16 pmcsr; 502 + 503 + if (!pci_dev->pm_cap) 504 + return false; 505 + 506 + pci_read_config_word(pci_dev, pci_dev->pm_cap + PCI_PM_CTRL, &pmcsr); 507 + if (PCI_POSSIBLE_ERROR(pmcsr)) { 508 + dev_err(dev, "Unable to query pmcsr"); 509 + return false; 510 + } 511 + 512 + return pmcsr & PCI_PM_CTRL_NO_SOFT_RESET; 513 + } 514 + 515 + static int virtio_pci_suspend(struct device *dev) 516 + { 517 + return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_freeze(dev); 518 + } 519 + 520 + static int virtio_pci_resume(struct device *dev) 521 + { 522 + return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_restore(dev); 523 + } 524 + 498 525 static const struct dev_pm_ops virtio_pci_pm_ops = { 499 - SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore) 526 + .suspend = virtio_pci_suspend, 527 + .resume = virtio_pci_resume, 528 + .freeze = virtio_pci_freeze, 529 + .thaw = virtio_pci_restore, 530 + .poweroff = virtio_pci_freeze, 531 + .restore = virtio_pci_restore, 500 532 }; 501 533 #endif 502 534
+2 -1
include/linux/mlx5/mlx5_ifc.h
··· 1241 1241 1242 1242 u8 reserved_at_c0[0x13]; 1243 1243 u8 desc_group_mkey_supported[0x1]; 1244 - u8 reserved_at_d4[0xc]; 1244 + u8 freeze_to_rdy_supported[0x1]; 1245 + u8 reserved_at_d5[0xb]; 1245 1246 1246 1247 u8 reserved_at_e0[0x20]; 1247 1248
+4
include/linux/mlx5/mlx5_ifc_vdpa.h
··· 145 145 MLX5_VIRTQ_MODIFY_MASK_STATE = (u64)1 << 0, 146 146 MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_PARAMS = (u64)1 << 3, 147 147 MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_DUMP_ENABLE = (u64)1 << 4, 148 + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS = (u64)1 << 6, 149 + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX = (u64)1 << 7, 150 + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX = (u64)1 << 8, 151 + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY = (u64)1 << 11, 148 152 MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY = (u64)1 << 14, 149 153 }; 150 154
+7
include/uapi/linux/virtio_pmem.h
··· 14 14 #include <linux/virtio_ids.h> 15 15 #include <linux/virtio_config.h> 16 16 17 + /* Feature bits */ 18 + /* guest physical address range will be indicated as shared memory region 0 */ 19 + #define VIRTIO_PMEM_F_SHMEM_REGION 0 20 + 21 + /* shmid of the shared memory region corresponding to the pmem */ 22 + #define VIRTIO_PMEM_SHMEM_REGION_ID 0 23 + 17 24 struct virtio_pmem_config { 18 25 __le64 start; 19 26 __le64 size;