Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio updates from Michael Tsirkin:
"vhost,virtio and vdpa features, fixes, and cleanups:

- mac vlan filter and stats support in mlx5 vdpa

- irq hardening in virtio

- performance improvements in virtio crypto

- polling i/o support in virtio blk

- ASID support in vhost

- fixes, cleanups all over the place"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (64 commits)
vdpa: ifcvf: set pci driver data in probe
vdpa/mlx5: Add RX MAC VLAN filter support
vdpa/mlx5: Remove flow counter from steering
vhost: rename vhost_work_dev_flush
vhost-test: drop flush after vhost_dev_cleanup
vhost-scsi: drop flush after vhost_dev_cleanup
vhost_vsock: simplify vhost_vsock_flush()
vhost_test: remove vhost_test_flush_vq()
vhost_net: get rid of vhost_net_flush_vq() and extra flush calls
vhost: flush dev once during vhost_dev_stop
vhost: get rid of vhost_poll_flush() wrapper
vhost-vdpa: return -EFAULT on copy_to_user() failure
vdpasim: Off by one in vdpasim_set_group_asid()
virtio: Directly use ida_alloc()/free()
virtio: use WARN_ON() to warning illegal status value
virtio: harden vring IRQ
virtio: allow to unbreak virtqueue
virtio-ccw: implement synchronize_cbs()
virtio-mmio: implement synchronize_cbs()
virtio-pci: implement synchronize_cbs()
...

+1964 -591
+203 -21
drivers/block/virtio_blk.c
··· 37 37 "0 for no limit. " 38 38 "Values > nr_cpu_ids truncated to nr_cpu_ids."); 39 39 40 + static unsigned int poll_queues; 41 + module_param(poll_queues, uint, 0644); 42 + MODULE_PARM_DESC(poll_queues, "The number of dedicated virtqueues for polling I/O"); 43 + 40 44 static int major; 41 45 static DEFINE_IDA(vd_index_ida); 42 46 ··· 78 74 79 75 /* num of vqs */ 80 76 int num_vqs; 77 + int io_queues[HCTX_MAX_TYPES]; 81 78 struct virtio_blk_vq *vqs; 82 79 }; 83 80 ··· 101 96 } 102 97 } 103 98 104 - static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr, 105 - struct scatterlist *data_sg, bool have_data) 99 + static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr) 106 100 { 107 101 struct scatterlist hdr, status, *sgs[3]; 108 102 unsigned int num_out = 0, num_in = 0; ··· 109 105 sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); 110 106 sgs[num_out++] = &hdr; 111 107 112 - if (have_data) { 108 + if (vbr->sg_table.nents) { 113 109 if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT)) 114 - sgs[num_out++] = data_sg; 110 + sgs[num_out++] = vbr->sg_table.sgl; 115 111 else 116 - sgs[num_out + num_in++] = data_sg; 112 + sgs[num_out + num_in++] = vbr->sg_table.sgl; 117 113 } 118 114 119 115 sg_init_one(&status, &vbr->status, sizeof(vbr->status)); ··· 303 299 virtqueue_notify(vq->vq); 304 300 } 305 301 306 - static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, 307 - const struct blk_mq_queue_data *bd) 302 + static blk_status_t virtblk_prep_rq(struct blk_mq_hw_ctx *hctx, 303 + struct virtio_blk *vblk, 304 + struct request *req, 305 + struct virtblk_req *vbr) 308 306 { 309 - struct virtio_blk *vblk = hctx->queue->queuedata; 310 - struct request *req = bd->rq; 311 - struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 312 - unsigned long flags; 313 - int num; 314 - int qid = hctx->queue_num; 315 - bool notify = false; 316 307 blk_status_t status; 317 - int err; 318 308 319 309 status = virtblk_setup_cmd(vblk->vdev, req, vbr); 320 310 if (unlikely(status)) ··· 316 318 317 319 blk_mq_start_request(req); 318 320 319 - num = virtblk_map_data(hctx, req, vbr); 320 - if (unlikely(num < 0)) { 321 + vbr->sg_table.nents = virtblk_map_data(hctx, req, vbr); 322 + if (unlikely(vbr->sg_table.nents < 0)) { 321 323 virtblk_cleanup_cmd(req); 322 324 return BLK_STS_RESOURCE; 323 325 } 324 326 327 + return BLK_STS_OK; 328 + } 329 + 330 + static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, 331 + const struct blk_mq_queue_data *bd) 332 + { 333 + struct virtio_blk *vblk = hctx->queue->queuedata; 334 + struct request *req = bd->rq; 335 + struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 336 + unsigned long flags; 337 + int qid = hctx->queue_num; 338 + bool notify = false; 339 + blk_status_t status; 340 + int err; 341 + 342 + status = virtblk_prep_rq(hctx, vblk, req, vbr); 343 + if (unlikely(status)) 344 + return status; 345 + 325 346 spin_lock_irqsave(&vblk->vqs[qid].lock, flags); 326 - err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg_table.sgl, num); 347 + err = virtblk_add_req(vblk->vqs[qid].vq, vbr); 327 348 if (err) { 328 349 virtqueue_kick(vblk->vqs[qid].vq); 329 350 /* Don't stop the queue if -ENOMEM: we may have failed to ··· 370 353 if (notify) 371 354 virtqueue_notify(vblk->vqs[qid].vq); 372 355 return BLK_STS_OK; 356 + } 357 + 358 + static bool virtblk_prep_rq_batch(struct request *req) 359 + { 360 + struct virtio_blk *vblk = req->mq_hctx->queue->queuedata; 361 + struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 362 + 363 + req->mq_hctx->tags->rqs[req->tag] = req; 364 + 365 + return virtblk_prep_rq(req->mq_hctx, vblk, req, vbr) == BLK_STS_OK; 366 + } 367 + 368 + static bool virtblk_add_req_batch(struct virtio_blk_vq *vq, 369 + struct request **rqlist, 370 + struct request **requeue_list) 371 + { 372 + unsigned long flags; 373 + int err; 374 + bool kick; 375 + 376 + spin_lock_irqsave(&vq->lock, flags); 377 + 378 + while (!rq_list_empty(*rqlist)) { 379 + struct request *req = rq_list_pop(rqlist); 380 + struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 381 + 382 + err = virtblk_add_req(vq->vq, vbr); 383 + if (err) { 384 + virtblk_unmap_data(req, vbr); 385 + virtblk_cleanup_cmd(req); 386 + rq_list_add(requeue_list, req); 387 + } 388 + } 389 + 390 + kick = virtqueue_kick_prepare(vq->vq); 391 + spin_unlock_irqrestore(&vq->lock, flags); 392 + 393 + return kick; 394 + } 395 + 396 + static void virtio_queue_rqs(struct request **rqlist) 397 + { 398 + struct request *req, *next, *prev = NULL; 399 + struct request *requeue_list = NULL; 400 + 401 + rq_list_for_each_safe(rqlist, req, next) { 402 + struct virtio_blk_vq *vq = req->mq_hctx->driver_data; 403 + bool kick; 404 + 405 + if (!virtblk_prep_rq_batch(req)) { 406 + rq_list_move(rqlist, &requeue_list, req, prev); 407 + req = prev; 408 + if (!req) 409 + continue; 410 + } 411 + 412 + if (!next || req->mq_hctx != next->mq_hctx) { 413 + req->rq_next = NULL; 414 + kick = virtblk_add_req_batch(vq, rqlist, &requeue_list); 415 + if (kick) 416 + virtqueue_notify(vq->vq); 417 + 418 + *rqlist = next; 419 + prev = NULL; 420 + } else 421 + prev = req; 422 + } 423 + 424 + *rqlist = requeue_list; 373 425 } 374 426 375 427 /* return id (s/n) string for *disk to *id_str ··· 598 512 const char **names; 599 513 struct virtqueue **vqs; 600 514 unsigned short num_vqs; 515 + unsigned int num_poll_vqs; 601 516 struct virtio_device *vdev = vblk->vdev; 602 517 struct irq_affinity desc = { 0, }; 603 518 ··· 607 520 &num_vqs); 608 521 if (err) 609 522 num_vqs = 1; 523 + 610 524 if (!err && !num_vqs) { 611 525 dev_err(&vdev->dev, "MQ advertised but zero queues reported\n"); 612 526 return -EINVAL; ··· 616 528 num_vqs = min_t(unsigned int, 617 529 min_not_zero(num_request_queues, nr_cpu_ids), 618 530 num_vqs); 531 + 532 + num_poll_vqs = min_t(unsigned int, poll_queues, num_vqs - 1); 533 + 534 + vblk->io_queues[HCTX_TYPE_DEFAULT] = num_vqs - num_poll_vqs; 535 + vblk->io_queues[HCTX_TYPE_READ] = 0; 536 + vblk->io_queues[HCTX_TYPE_POLL] = num_poll_vqs; 537 + 538 + dev_info(&vdev->dev, "%d/%d/%d default/read/poll queues\n", 539 + vblk->io_queues[HCTX_TYPE_DEFAULT], 540 + vblk->io_queues[HCTX_TYPE_READ], 541 + vblk->io_queues[HCTX_TYPE_POLL]); 619 542 620 543 vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL); 621 544 if (!vblk->vqs) ··· 640 541 goto out; 641 542 } 642 543 643 - for (i = 0; i < num_vqs; i++) { 544 + for (i = 0; i < num_vqs - num_poll_vqs; i++) { 644 545 callbacks[i] = virtblk_done; 645 546 snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i); 547 + names[i] = vblk->vqs[i].name; 548 + } 549 + 550 + for (; i < num_vqs; i++) { 551 + callbacks[i] = NULL; 552 + snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req_poll.%d", i); 646 553 names[i] = vblk->vqs[i].name; 647 554 } 648 555 ··· 797 692 static int virtblk_map_queues(struct blk_mq_tag_set *set) 798 693 { 799 694 struct virtio_blk *vblk = set->driver_data; 695 + int i, qoff; 800 696 801 - return blk_mq_virtio_map_queues(&set->map[HCTX_TYPE_DEFAULT], 802 - vblk->vdev, 0); 697 + for (i = 0, qoff = 0; i < set->nr_maps; i++) { 698 + struct blk_mq_queue_map *map = &set->map[i]; 699 + 700 + map->nr_queues = vblk->io_queues[i]; 701 + map->queue_offset = qoff; 702 + qoff += map->nr_queues; 703 + 704 + if (map->nr_queues == 0) 705 + continue; 706 + 707 + /* 708 + * Regular queues have interrupts and hence CPU affinity is 709 + * defined by the core virtio code, but polling queues have 710 + * no interrupts so we let the block layer assign CPU affinity. 711 + */ 712 + if (i == HCTX_TYPE_POLL) 713 + blk_mq_map_queues(&set->map[i]); 714 + else 715 + blk_mq_virtio_map_queues(&set->map[i], vblk->vdev, 0); 716 + } 717 + 718 + return 0; 719 + } 720 + 721 + static void virtblk_complete_batch(struct io_comp_batch *iob) 722 + { 723 + struct request *req; 724 + 725 + rq_list_for_each(&iob->req_list, req) { 726 + virtblk_unmap_data(req, blk_mq_rq_to_pdu(req)); 727 + virtblk_cleanup_cmd(req); 728 + } 729 + blk_mq_end_request_batch(iob); 730 + } 731 + 732 + static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) 733 + { 734 + struct virtio_blk *vblk = hctx->queue->queuedata; 735 + struct virtio_blk_vq *vq = hctx->driver_data; 736 + struct virtblk_req *vbr; 737 + unsigned long flags; 738 + unsigned int len; 739 + int found = 0; 740 + 741 + spin_lock_irqsave(&vq->lock, flags); 742 + 743 + while ((vbr = virtqueue_get_buf(vq->vq, &len)) != NULL) { 744 + struct request *req = blk_mq_rq_from_pdu(vbr); 745 + 746 + found++; 747 + if (!blk_mq_add_to_batch(req, iob, vbr->status, 748 + virtblk_complete_batch)) 749 + blk_mq_complete_request(req); 750 + } 751 + 752 + if (found) 753 + blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); 754 + 755 + spin_unlock_irqrestore(&vq->lock, flags); 756 + 757 + return found; 758 + } 759 + 760 + static int virtblk_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 761 + unsigned int hctx_idx) 762 + { 763 + struct virtio_blk *vblk = data; 764 + struct virtio_blk_vq *vq = &vblk->vqs[hctx_idx]; 765 + 766 + WARN_ON(vblk->tag_set.tags[hctx_idx] != hctx->tags); 767 + hctx->driver_data = vq; 768 + return 0; 803 769 } 804 770 805 771 static const struct blk_mq_ops virtio_mq_ops = { 806 772 .queue_rq = virtio_queue_rq, 773 + .queue_rqs = virtio_queue_rqs, 807 774 .commit_rqs = virtio_commit_rqs, 775 + .init_hctx = virtblk_init_hctx, 808 776 .complete = virtblk_request_done, 809 777 .map_queues = virtblk_map_queues, 778 + .poll = virtblk_poll, 810 779 }; 811 780 812 781 static unsigned int virtblk_queue_depth; ··· 957 778 sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT; 958 779 vblk->tag_set.driver_data = vblk; 959 780 vblk->tag_set.nr_hw_queues = vblk->num_vqs; 781 + vblk->tag_set.nr_maps = 1; 782 + if (vblk->io_queues[HCTX_TYPE_POLL]) 783 + vblk->tag_set.nr_maps = 3; 960 784 961 785 err = blk_mq_alloc_tag_set(&vblk->tag_set); 962 786 if (err)
+50 -45
drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
··· 90 90 } 91 91 92 92 akcipher_req = vc_akcipher_req->akcipher_req; 93 - if (vc_akcipher_req->opcode != VIRTIO_CRYPTO_AKCIPHER_VERIFY) 93 + if (vc_akcipher_req->opcode != VIRTIO_CRYPTO_AKCIPHER_VERIFY) { 94 + /* actuall length maybe less than dst buffer */ 95 + akcipher_req->dst_len = len - sizeof(vc_req->status); 94 96 sg_copy_from_buffer(akcipher_req->dst, sg_nents(akcipher_req->dst), 95 97 vc_akcipher_req->dst_buf, akcipher_req->dst_len); 98 + } 96 99 virtio_crypto_akcipher_finalize_req(vc_akcipher_req, akcipher_req, error); 97 100 } 98 101 ··· 106 103 struct scatterlist outhdr_sg, key_sg, inhdr_sg, *sgs[3]; 107 104 struct virtio_crypto *vcrypto = ctx->vcrypto; 108 105 uint8_t *pkey; 109 - unsigned int inlen; 110 106 int err; 111 107 unsigned int num_out = 0, num_in = 0; 108 + struct virtio_crypto_op_ctrl_req *ctrl; 109 + struct virtio_crypto_session_input *input; 110 + struct virtio_crypto_ctrl_request *vc_ctrl_req; 112 111 113 112 pkey = kmemdup(key, keylen, GFP_ATOMIC); 114 113 if (!pkey) 115 114 return -ENOMEM; 116 115 117 - spin_lock(&vcrypto->ctrl_lock); 118 - memcpy(&vcrypto->ctrl.header, header, sizeof(vcrypto->ctrl.header)); 119 - memcpy(&vcrypto->ctrl.u, para, sizeof(vcrypto->ctrl.u)); 120 - vcrypto->input.status = cpu_to_le32(VIRTIO_CRYPTO_ERR); 116 + vc_ctrl_req = kzalloc(sizeof(*vc_ctrl_req), GFP_KERNEL); 117 + if (!vc_ctrl_req) { 118 + err = -ENOMEM; 119 + goto out; 120 + } 121 121 122 - sg_init_one(&outhdr_sg, &vcrypto->ctrl, sizeof(vcrypto->ctrl)); 122 + ctrl = &vc_ctrl_req->ctrl; 123 + memcpy(&ctrl->header, header, sizeof(ctrl->header)); 124 + memcpy(&ctrl->u, para, sizeof(ctrl->u)); 125 + input = &vc_ctrl_req->input; 126 + input->status = cpu_to_le32(VIRTIO_CRYPTO_ERR); 127 + 128 + sg_init_one(&outhdr_sg, ctrl, sizeof(*ctrl)); 123 129 sgs[num_out++] = &outhdr_sg; 124 130 125 131 sg_init_one(&key_sg, pkey, keylen); 126 132 sgs[num_out++] = &key_sg; 127 133 128 - sg_init_one(&inhdr_sg, &vcrypto->input, sizeof(vcrypto->input)); 134 + sg_init_one(&inhdr_sg, input, sizeof(*input)); 129 135 sgs[num_out + num_in++] = &inhdr_sg; 130 136 131 - err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, num_out, num_in, vcrypto, GFP_ATOMIC); 137 + err = virtio_crypto_ctrl_vq_request(vcrypto, sgs, num_out, num_in, vc_ctrl_req); 132 138 if (err < 0) 133 139 goto out; 134 140 135 - virtqueue_kick(vcrypto->ctrl_vq); 136 - while (!virtqueue_get_buf(vcrypto->ctrl_vq, &inlen) && 137 - !virtqueue_is_broken(vcrypto->ctrl_vq)) 138 - cpu_relax(); 139 - 140 - if (le32_to_cpu(vcrypto->input.status) != VIRTIO_CRYPTO_OK) { 141 + if (le32_to_cpu(input->status) != VIRTIO_CRYPTO_OK) { 142 + pr_err("virtio_crypto: Create session failed status: %u\n", 143 + le32_to_cpu(input->status)); 141 144 err = -EINVAL; 142 145 goto out; 143 146 } 144 147 145 - ctx->session_id = le64_to_cpu(vcrypto->input.session_id); 148 + ctx->session_id = le64_to_cpu(input->session_id); 146 149 ctx->session_valid = true; 147 150 err = 0; 148 151 149 152 out: 150 - spin_unlock(&vcrypto->ctrl_lock); 153 + kfree(vc_ctrl_req); 151 154 kfree_sensitive(pkey); 152 - 153 - if (err < 0) 154 - pr_err("virtio_crypto: Create session failed status: %u\n", 155 - le32_to_cpu(vcrypto->input.status)); 156 155 157 156 return err; 158 157 } ··· 164 159 struct scatterlist outhdr_sg, inhdr_sg, *sgs[2]; 165 160 struct virtio_crypto_destroy_session_req *destroy_session; 166 161 struct virtio_crypto *vcrypto = ctx->vcrypto; 167 - unsigned int num_out = 0, num_in = 0, inlen; 162 + unsigned int num_out = 0, num_in = 0; 168 163 int err; 164 + struct virtio_crypto_op_ctrl_req *ctrl; 165 + struct virtio_crypto_inhdr *ctrl_status; 166 + struct virtio_crypto_ctrl_request *vc_ctrl_req; 169 167 170 - spin_lock(&vcrypto->ctrl_lock); 171 - if (!ctx->session_valid) { 172 - err = 0; 173 - goto out; 174 - } 175 - vcrypto->ctrl_status.status = VIRTIO_CRYPTO_ERR; 176 - vcrypto->ctrl.header.opcode = cpu_to_le32(VIRTIO_CRYPTO_AKCIPHER_DESTROY_SESSION); 177 - vcrypto->ctrl.header.queue_id = 0; 168 + if (!ctx->session_valid) 169 + return 0; 178 170 179 - destroy_session = &vcrypto->ctrl.u.destroy_session; 171 + vc_ctrl_req = kzalloc(sizeof(*vc_ctrl_req), GFP_KERNEL); 172 + if (!vc_ctrl_req) 173 + return -ENOMEM; 174 + 175 + ctrl_status = &vc_ctrl_req->ctrl_status; 176 + ctrl_status->status = VIRTIO_CRYPTO_ERR; 177 + ctrl = &vc_ctrl_req->ctrl; 178 + ctrl->header.opcode = cpu_to_le32(VIRTIO_CRYPTO_AKCIPHER_DESTROY_SESSION); 179 + ctrl->header.queue_id = 0; 180 + 181 + destroy_session = &ctrl->u.destroy_session; 180 182 destroy_session->session_id = cpu_to_le64(ctx->session_id); 181 183 182 - sg_init_one(&outhdr_sg, &vcrypto->ctrl, sizeof(vcrypto->ctrl)); 184 + sg_init_one(&outhdr_sg, ctrl, sizeof(*ctrl)); 183 185 sgs[num_out++] = &outhdr_sg; 184 186 185 - sg_init_one(&inhdr_sg, &vcrypto->ctrl_status.status, sizeof(vcrypto->ctrl_status.status)); 187 + sg_init_one(&inhdr_sg, &ctrl_status->status, sizeof(ctrl_status->status)); 186 188 sgs[num_out + num_in++] = &inhdr_sg; 187 189 188 - err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, num_out, num_in, vcrypto, GFP_ATOMIC); 190 + err = virtio_crypto_ctrl_vq_request(vcrypto, sgs, num_out, num_in, vc_ctrl_req); 189 191 if (err < 0) 190 192 goto out; 191 193 192 - virtqueue_kick(vcrypto->ctrl_vq); 193 - while (!virtqueue_get_buf(vcrypto->ctrl_vq, &inlen) && 194 - !virtqueue_is_broken(vcrypto->ctrl_vq)) 195 - cpu_relax(); 196 - 197 - if (vcrypto->ctrl_status.status != VIRTIO_CRYPTO_OK) { 194 + if (ctrl_status->status != VIRTIO_CRYPTO_OK) { 195 + pr_err("virtio_crypto: Close session failed status: %u, session_id: 0x%llx\n", 196 + ctrl_status->status, destroy_session->session_id); 198 197 err = -EINVAL; 199 198 goto out; 200 199 } ··· 207 198 ctx->session_valid = false; 208 199 209 200 out: 210 - spin_unlock(&vcrypto->ctrl_lock); 211 - if (err < 0) { 212 - pr_err("virtio_crypto: Close session failed status: %u, session_id: 0x%llx\n", 213 - vcrypto->ctrl_status.status, destroy_session->session_id); 214 - } 201 + kfree(vc_ctrl_req); 215 202 216 203 return err; 217 204 }
+16 -5
drivers/crypto/virtio/virtio_crypto_common.h
··· 13 13 #include <crypto/aead.h> 14 14 #include <crypto/aes.h> 15 15 #include <crypto/engine.h> 16 + #include <uapi/linux/virtio_crypto.h> 16 17 17 18 18 19 /* Internal representation of a data virtqueue */ ··· 66 65 /* Maximum size of per request */ 67 66 u64 max_size; 68 67 69 - /* Control VQ buffers: protected by the ctrl_lock */ 70 - struct virtio_crypto_op_ctrl_req ctrl; 71 - struct virtio_crypto_session_input input; 72 - struct virtio_crypto_inhdr ctrl_status; 73 - 74 68 unsigned long status; 75 69 atomic_t ref_count; 76 70 struct list_head list; ··· 79 83 struct virtio_crypto_sym_session_info { 80 84 /* Backend session id, which come from the host side */ 81 85 __u64 session_id; 86 + }; 87 + 88 + /* 89 + * Note: there are padding fields in request, clear them to zero before 90 + * sending to host to avoid to divulge any information. 91 + * Ex, virtio_crypto_ctrl_request::ctrl::u::destroy_session::padding[48] 92 + */ 93 + struct virtio_crypto_ctrl_request { 94 + struct virtio_crypto_op_ctrl_req ctrl; 95 + struct virtio_crypto_session_input input; 96 + struct virtio_crypto_inhdr ctrl_status; 97 + struct completion compl; 82 98 }; 83 99 84 100 struct virtio_crypto_request; ··· 142 134 void virtio_crypto_skcipher_algs_unregister(struct virtio_crypto *vcrypto); 143 135 int virtio_crypto_akcipher_algs_register(struct virtio_crypto *vcrypto); 144 136 void virtio_crypto_akcipher_algs_unregister(struct virtio_crypto *vcrypto); 137 + int virtio_crypto_ctrl_vq_request(struct virtio_crypto *vcrypto, struct scatterlist *sgs[], 138 + unsigned int out_sgs, unsigned int in_sgs, 139 + struct virtio_crypto_ctrl_request *vc_ctrl_req); 145 140 146 141 #endif /* _VIRTIO_CRYPTO_COMMON_H */
+53 -2
drivers/crypto/virtio/virtio_crypto_core.c
··· 22 22 } 23 23 } 24 24 25 + static void virtio_crypto_ctrlq_callback(struct virtio_crypto_ctrl_request *vc_ctrl_req) 26 + { 27 + complete(&vc_ctrl_req->compl); 28 + } 29 + 30 + static void virtcrypto_ctrlq_callback(struct virtqueue *vq) 31 + { 32 + struct virtio_crypto *vcrypto = vq->vdev->priv; 33 + struct virtio_crypto_ctrl_request *vc_ctrl_req; 34 + unsigned long flags; 35 + unsigned int len; 36 + 37 + spin_lock_irqsave(&vcrypto->ctrl_lock, flags); 38 + do { 39 + virtqueue_disable_cb(vq); 40 + while ((vc_ctrl_req = virtqueue_get_buf(vq, &len)) != NULL) { 41 + spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags); 42 + virtio_crypto_ctrlq_callback(vc_ctrl_req); 43 + spin_lock_irqsave(&vcrypto->ctrl_lock, flags); 44 + } 45 + if (unlikely(virtqueue_is_broken(vq))) 46 + break; 47 + } while (!virtqueue_enable_cb(vq)); 48 + spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags); 49 + } 50 + 51 + int virtio_crypto_ctrl_vq_request(struct virtio_crypto *vcrypto, struct scatterlist *sgs[], 52 + unsigned int out_sgs, unsigned int in_sgs, 53 + struct virtio_crypto_ctrl_request *vc_ctrl_req) 54 + { 55 + int err; 56 + unsigned long flags; 57 + 58 + init_completion(&vc_ctrl_req->compl); 59 + 60 + spin_lock_irqsave(&vcrypto->ctrl_lock, flags); 61 + err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, out_sgs, in_sgs, vc_ctrl_req, GFP_ATOMIC); 62 + if (err < 0) { 63 + spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags); 64 + return err; 65 + } 66 + 67 + virtqueue_kick(vcrypto->ctrl_vq); 68 + spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags); 69 + 70 + wait_for_completion(&vc_ctrl_req->compl); 71 + 72 + return 0; 73 + } 74 + 25 75 static void virtcrypto_dataq_callback(struct virtqueue *vq) 26 76 { 27 77 struct virtio_crypto *vcrypto = vq->vdev->priv; ··· 123 73 goto err_names; 124 74 125 75 /* Parameters for control virtqueue */ 126 - callbacks[total_vqs - 1] = NULL; 76 + callbacks[total_vqs - 1] = virtcrypto_ctrlq_callback; 127 77 names[total_vqs - 1] = "controlq"; 128 78 129 79 /* Allocate/initialize parameters for data virtqueues */ ··· 144 94 spin_lock_init(&vi->data_vq[i].lock); 145 95 vi->data_vq[i].vq = vqs[i]; 146 96 /* Initialize crypto engine */ 147 - vi->data_vq[i].engine = crypto_engine_alloc_init(dev, 1); 97 + vi->data_vq[i].engine = crypto_engine_alloc_init_and_set(dev, true, NULL, true, 98 + virtqueue_get_vring_size(vqs[i])); 148 99 if (!vi->data_vq[i].engine) { 149 100 ret = -ENOMEM; 150 101 goto err_engine;
+62 -76
drivers/crypto/virtio/virtio_crypto_skcipher_algs.c
··· 118 118 int encrypt) 119 119 { 120 120 struct scatterlist outhdr, key_sg, inhdr, *sgs[3]; 121 - unsigned int tmp; 122 121 struct virtio_crypto *vcrypto = ctx->vcrypto; 123 122 int op = encrypt ? VIRTIO_CRYPTO_OP_ENCRYPT : VIRTIO_CRYPTO_OP_DECRYPT; 124 123 int err; 125 124 unsigned int num_out = 0, num_in = 0; 125 + struct virtio_crypto_op_ctrl_req *ctrl; 126 + struct virtio_crypto_session_input *input; 127 + struct virtio_crypto_sym_create_session_req *sym_create_session; 128 + struct virtio_crypto_ctrl_request *vc_ctrl_req; 126 129 127 130 /* 128 131 * Avoid to do DMA from the stack, switch to using ··· 136 133 if (!cipher_key) 137 134 return -ENOMEM; 138 135 139 - spin_lock(&vcrypto->ctrl_lock); 136 + vc_ctrl_req = kzalloc(sizeof(*vc_ctrl_req), GFP_KERNEL); 137 + if (!vc_ctrl_req) { 138 + err = -ENOMEM; 139 + goto out; 140 + } 141 + 140 142 /* Pad ctrl header */ 141 - vcrypto->ctrl.header.opcode = 142 - cpu_to_le32(VIRTIO_CRYPTO_CIPHER_CREATE_SESSION); 143 - vcrypto->ctrl.header.algo = cpu_to_le32(alg); 143 + ctrl = &vc_ctrl_req->ctrl; 144 + ctrl->header.opcode = cpu_to_le32(VIRTIO_CRYPTO_CIPHER_CREATE_SESSION); 145 + ctrl->header.algo = cpu_to_le32(alg); 144 146 /* Set the default dataqueue id to 0 */ 145 - vcrypto->ctrl.header.queue_id = 0; 147 + ctrl->header.queue_id = 0; 146 148 147 - vcrypto->input.status = cpu_to_le32(VIRTIO_CRYPTO_ERR); 149 + input = &vc_ctrl_req->input; 150 + input->status = cpu_to_le32(VIRTIO_CRYPTO_ERR); 148 151 /* Pad cipher's parameters */ 149 - vcrypto->ctrl.u.sym_create_session.op_type = 150 - cpu_to_le32(VIRTIO_CRYPTO_SYM_OP_CIPHER); 151 - vcrypto->ctrl.u.sym_create_session.u.cipher.para.algo = 152 - vcrypto->ctrl.header.algo; 153 - vcrypto->ctrl.u.sym_create_session.u.cipher.para.keylen = 154 - cpu_to_le32(keylen); 155 - vcrypto->ctrl.u.sym_create_session.u.cipher.para.op = 156 - cpu_to_le32(op); 152 + sym_create_session = &ctrl->u.sym_create_session; 153 + sym_create_session->op_type = cpu_to_le32(VIRTIO_CRYPTO_SYM_OP_CIPHER); 154 + sym_create_session->u.cipher.para.algo = ctrl->header.algo; 155 + sym_create_session->u.cipher.para.keylen = cpu_to_le32(keylen); 156 + sym_create_session->u.cipher.para.op = cpu_to_le32(op); 157 157 158 - sg_init_one(&outhdr, &vcrypto->ctrl, sizeof(vcrypto->ctrl)); 158 + sg_init_one(&outhdr, ctrl, sizeof(*ctrl)); 159 159 sgs[num_out++] = &outhdr; 160 160 161 161 /* Set key */ ··· 166 160 sgs[num_out++] = &key_sg; 167 161 168 162 /* Return status and session id back */ 169 - sg_init_one(&inhdr, &vcrypto->input, sizeof(vcrypto->input)); 163 + sg_init_one(&inhdr, input, sizeof(*input)); 170 164 sgs[num_out + num_in++] = &inhdr; 171 165 172 - err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, num_out, 173 - num_in, vcrypto, GFP_ATOMIC); 174 - if (err < 0) { 175 - spin_unlock(&vcrypto->ctrl_lock); 176 - kfree_sensitive(cipher_key); 177 - return err; 178 - } 179 - virtqueue_kick(vcrypto->ctrl_vq); 166 + err = virtio_crypto_ctrl_vq_request(vcrypto, sgs, num_out, num_in, vc_ctrl_req); 167 + if (err < 0) 168 + goto out; 180 169 181 - /* 182 - * Trapping into the hypervisor, so the request should be 183 - * handled immediately. 184 - */ 185 - while (!virtqueue_get_buf(vcrypto->ctrl_vq, &tmp) && 186 - !virtqueue_is_broken(vcrypto->ctrl_vq)) 187 - cpu_relax(); 188 - 189 - if (le32_to_cpu(vcrypto->input.status) != VIRTIO_CRYPTO_OK) { 190 - spin_unlock(&vcrypto->ctrl_lock); 170 + if (le32_to_cpu(input->status) != VIRTIO_CRYPTO_OK) { 191 171 pr_err("virtio_crypto: Create session failed status: %u\n", 192 - le32_to_cpu(vcrypto->input.status)); 193 - kfree_sensitive(cipher_key); 194 - return -EINVAL; 172 + le32_to_cpu(input->status)); 173 + err = -EINVAL; 174 + goto out; 195 175 } 196 176 197 177 if (encrypt) 198 - ctx->enc_sess_info.session_id = 199 - le64_to_cpu(vcrypto->input.session_id); 178 + ctx->enc_sess_info.session_id = le64_to_cpu(input->session_id); 200 179 else 201 - ctx->dec_sess_info.session_id = 202 - le64_to_cpu(vcrypto->input.session_id); 180 + ctx->dec_sess_info.session_id = le64_to_cpu(input->session_id); 203 181 204 - spin_unlock(&vcrypto->ctrl_lock); 205 - 182 + err = 0; 183 + out: 184 + kfree(vc_ctrl_req); 206 185 kfree_sensitive(cipher_key); 207 - return 0; 186 + return err; 208 187 } 209 188 210 189 static int virtio_crypto_alg_skcipher_close_session( ··· 197 206 int encrypt) 198 207 { 199 208 struct scatterlist outhdr, status_sg, *sgs[2]; 200 - unsigned int tmp; 201 209 struct virtio_crypto_destroy_session_req *destroy_session; 202 210 struct virtio_crypto *vcrypto = ctx->vcrypto; 203 211 int err; 204 212 unsigned int num_out = 0, num_in = 0; 213 + struct virtio_crypto_op_ctrl_req *ctrl; 214 + struct virtio_crypto_inhdr *ctrl_status; 215 + struct virtio_crypto_ctrl_request *vc_ctrl_req; 205 216 206 - spin_lock(&vcrypto->ctrl_lock); 207 - vcrypto->ctrl_status.status = VIRTIO_CRYPTO_ERR; 217 + vc_ctrl_req = kzalloc(sizeof(*vc_ctrl_req), GFP_KERNEL); 218 + if (!vc_ctrl_req) 219 + return -ENOMEM; 220 + 221 + ctrl_status = &vc_ctrl_req->ctrl_status; 222 + ctrl_status->status = VIRTIO_CRYPTO_ERR; 208 223 /* Pad ctrl header */ 209 - vcrypto->ctrl.header.opcode = 210 - cpu_to_le32(VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION); 224 + ctrl = &vc_ctrl_req->ctrl; 225 + ctrl->header.opcode = cpu_to_le32(VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION); 211 226 /* Set the default virtqueue id to 0 */ 212 - vcrypto->ctrl.header.queue_id = 0; 227 + ctrl->header.queue_id = 0; 213 228 214 - destroy_session = &vcrypto->ctrl.u.destroy_session; 229 + destroy_session = &ctrl->u.destroy_session; 215 230 216 231 if (encrypt) 217 - destroy_session->session_id = 218 - cpu_to_le64(ctx->enc_sess_info.session_id); 232 + destroy_session->session_id = cpu_to_le64(ctx->enc_sess_info.session_id); 219 233 else 220 - destroy_session->session_id = 221 - cpu_to_le64(ctx->dec_sess_info.session_id); 234 + destroy_session->session_id = cpu_to_le64(ctx->dec_sess_info.session_id); 222 235 223 - sg_init_one(&outhdr, &vcrypto->ctrl, sizeof(vcrypto->ctrl)); 236 + sg_init_one(&outhdr, ctrl, sizeof(*ctrl)); 224 237 sgs[num_out++] = &outhdr; 225 238 226 239 /* Return status and session id back */ 227 - sg_init_one(&status_sg, &vcrypto->ctrl_status.status, 228 - sizeof(vcrypto->ctrl_status.status)); 240 + sg_init_one(&status_sg, &ctrl_status->status, sizeof(ctrl_status->status)); 229 241 sgs[num_out + num_in++] = &status_sg; 230 242 231 - err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, num_out, 232 - num_in, vcrypto, GFP_ATOMIC); 233 - if (err < 0) { 234 - spin_unlock(&vcrypto->ctrl_lock); 235 - return err; 236 - } 237 - virtqueue_kick(vcrypto->ctrl_vq); 243 + err = virtio_crypto_ctrl_vq_request(vcrypto, sgs, num_out, num_in, vc_ctrl_req); 244 + if (err < 0) 245 + goto out; 238 246 239 - while (!virtqueue_get_buf(vcrypto->ctrl_vq, &tmp) && 240 - !virtqueue_is_broken(vcrypto->ctrl_vq)) 241 - cpu_relax(); 242 - 243 - if (vcrypto->ctrl_status.status != VIRTIO_CRYPTO_OK) { 244 - spin_unlock(&vcrypto->ctrl_lock); 247 + if (ctrl_status->status != VIRTIO_CRYPTO_OK) { 245 248 pr_err("virtio_crypto: Close session failed status: %u, session_id: 0x%llx\n", 246 - vcrypto->ctrl_status.status, 247 - destroy_session->session_id); 249 + ctrl_status->status, destroy_session->session_id); 248 250 249 251 return -EINVAL; 250 252 } 251 - spin_unlock(&vcrypto->ctrl_lock); 252 253 253 - return 0; 254 + err = 0; 255 + out: 256 + kfree(vc_ctrl_req); 257 + return err; 254 258 } 255 259 256 260 static int virtio_crypto_alg_skcipher_init_sessions(
+34
drivers/s390/virtio/virtio_ccw.c
··· 62 62 unsigned int revision; /* Transport revision */ 63 63 wait_queue_head_t wait_q; 64 64 spinlock_t lock; 65 + rwlock_t irq_lock; 65 66 struct mutex io_lock; /* Serializes I/O requests */ 66 67 struct list_head virtqueues; 67 68 bool is_thinint; ··· 971 970 ccw->flags = 0; 972 971 ccw->count = sizeof(status); 973 972 ccw->cda = (__u32)(unsigned long)&vcdev->dma_area->status; 973 + /* We use ssch for setting the status which is a serializing 974 + * instruction that guarantees the memory writes have 975 + * completed before ssch. 976 + */ 974 977 ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_STATUS); 975 978 /* Write failed? We assume status is unchanged. */ 976 979 if (ret) ··· 989 984 return dev_name(&vcdev->cdev->dev); 990 985 } 991 986 987 + static void virtio_ccw_synchronize_cbs(struct virtio_device *vdev) 988 + { 989 + struct virtio_ccw_device *vcdev = to_vc_device(vdev); 990 + struct airq_info *info = vcdev->airq_info; 991 + 992 + if (info) { 993 + /* 994 + * This device uses adapter interrupts: synchronize with 995 + * vring_interrupt() called by virtio_airq_handler() 996 + * via the indicator area lock. 997 + */ 998 + write_lock_irq(&info->lock); 999 + write_unlock_irq(&info->lock); 1000 + } else { 1001 + /* This device uses classic interrupts: synchronize 1002 + * with vring_interrupt() called by 1003 + * virtio_ccw_int_handler() via the per-device 1004 + * irq_lock 1005 + */ 1006 + write_lock_irq(&vcdev->irq_lock); 1007 + write_unlock_irq(&vcdev->irq_lock); 1008 + } 1009 + } 1010 + 992 1011 static const struct virtio_config_ops virtio_ccw_config_ops = { 993 1012 .get_features = virtio_ccw_get_features, 994 1013 .finalize_features = virtio_ccw_finalize_features, ··· 1024 995 .find_vqs = virtio_ccw_find_vqs, 1025 996 .del_vqs = virtio_ccw_del_vqs, 1026 997 .bus_name = virtio_ccw_bus_name, 998 + .synchronize_cbs = virtio_ccw_synchronize_cbs, 1027 999 }; 1028 1000 1029 1001 ··· 1136 1106 vcdev->err = -EIO; 1137 1107 } 1138 1108 virtio_ccw_check_activity(vcdev, activity); 1109 + /* Interrupts are disabled here */ 1110 + read_lock(&vcdev->irq_lock); 1139 1111 for_each_set_bit(i, indicators(vcdev), 1140 1112 sizeof(*indicators(vcdev)) * BITS_PER_BYTE) { 1141 1113 /* The bit clear must happen before the vring kick. */ ··· 1146 1114 vq = virtio_ccw_vq_by_ind(vcdev, i); 1147 1115 vring_interrupt(0, vq); 1148 1116 } 1117 + read_unlock(&vcdev->irq_lock); 1149 1118 if (test_bit(0, indicators2(vcdev))) { 1150 1119 virtio_config_changed(&vcdev->vdev); 1151 1120 clear_bit(0, indicators2(vcdev)); ··· 1317 1284 init_waitqueue_head(&vcdev->wait_q); 1318 1285 INIT_LIST_HEAD(&vcdev->virtqueues); 1319 1286 spin_lock_init(&vcdev->lock); 1287 + rwlock_init(&vcdev->irq_lock); 1320 1288 mutex_init(&vcdev->io_lock); 1321 1289 1322 1290 spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
+1 -1
drivers/vdpa/alibaba/eni_vdpa.c
··· 470 470 return ret; 471 471 472 472 eni_vdpa = vdpa_alloc_device(struct eni_vdpa, vdpa, 473 - dev, &eni_vdpa_ops, NULL, false); 473 + dev, &eni_vdpa_ops, 1, 1, NULL, false); 474 474 if (IS_ERR(eni_vdpa)) { 475 475 ENI_ERR(pdev, "failed to allocate vDPA structure\n"); 476 476 return PTR_ERR(eni_vdpa);
+15 -8
drivers/vdpa/ifcvf/ifcvf_main.c
··· 290 290 struct ifcvf_hw *vf = &adapter->vf; 291 291 int config_vector, ret; 292 292 293 - if (vf->msix_vector_status == MSIX_VECTOR_DEV_SHARED) 294 - return 0; 295 - 296 293 if (vf->msix_vector_status == MSIX_VECTOR_PER_VQ_AND_CONFIG) 297 - /* vector 0 ~ vf->nr_vring for vqs, num vf->nr_vring vector for config interrupt */ 298 294 config_vector = vf->nr_vring; 299 - 300 - if (vf->msix_vector_status == MSIX_VECTOR_SHARED_VQ_AND_CONFIG) 295 + else if (vf->msix_vector_status == MSIX_VECTOR_SHARED_VQ_AND_CONFIG) 301 296 /* vector 0 for vqs and 1 for config interrupt */ 302 297 config_vector = 1; 298 + else if (vf->msix_vector_status == MSIX_VECTOR_DEV_SHARED) 299 + /* re-use the vqs vector */ 300 + return 0; 301 + else 302 + return -EINVAL; 303 303 304 304 snprintf(vf->config_msix_name, 256, "ifcvf[%s]-config\n", 305 305 pci_name(pdev)); ··· 626 626 return vf->config_size; 627 627 } 628 628 629 + static u32 ifcvf_vdpa_get_vq_group(struct vdpa_device *vdpa, u16 idx) 630 + { 631 + return 0; 632 + } 633 + 629 634 static void ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev, 630 635 unsigned int offset, 631 636 void *buf, unsigned int len) ··· 709 704 .get_device_id = ifcvf_vdpa_get_device_id, 710 705 .get_vendor_id = ifcvf_vdpa_get_vendor_id, 711 706 .get_vq_align = ifcvf_vdpa_get_vq_align, 707 + .get_vq_group = ifcvf_vdpa_get_vq_group, 712 708 .get_config_size = ifcvf_vdpa_get_config_size, 713 709 .get_config = ifcvf_vdpa_get_config, 714 710 .set_config = ifcvf_vdpa_set_config, ··· 764 758 pdev = ifcvf_mgmt_dev->pdev; 765 759 dev = &pdev->dev; 766 760 adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa, 767 - dev, &ifc_vdpa_ops, name, false); 761 + dev, &ifc_vdpa_ops, 1, 1, name, false); 768 762 if (IS_ERR(adapter)) { 769 763 IFCVF_ERR(pdev, "Failed to allocate vDPA structure"); 770 764 return PTR_ERR(adapter); 771 765 } 772 766 773 767 ifcvf_mgmt_dev->adapter = adapter; 774 - pci_set_drvdata(pdev, ifcvf_mgmt_dev); 775 768 776 769 vf = &adapter->vf; 777 770 vf->dev_type = get_dev_type(pdev); ··· 884 879 "Failed to initialize the management interfaces\n"); 885 880 goto err; 886 881 } 882 + 883 + pci_set_drvdata(pdev, ifcvf_mgmt_dev); 887 884 888 885 return 0; 889 886
+2
drivers/vdpa/mlx5/core/mlx5_vdpa.h
··· 61 61 struct vringh_kiov riov; 62 62 struct vringh_kiov wiov; 63 63 unsigned short head; 64 + unsigned int received_desc; 65 + unsigned int completed_desc; 64 66 }; 65 67 66 68 struct mlx5_vdpa_wq_ent {
+395 -96
drivers/vdpa/mlx5/net/mlx5_vnet.c
··· 48 48 49 49 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature))) 50 50 51 + #define MLX5V_UNTAGGED 0x1000 52 + 51 53 struct mlx5_vdpa_net_resources { 52 54 u32 tisn; 53 55 u32 tdn; ··· 121 119 struct mlx5_vdpa_umem umem2; 122 120 struct mlx5_vdpa_umem umem3; 123 121 122 + u32 counter_set_id; 124 123 bool initialized; 125 124 int index; 126 125 u32 virtq_id; ··· 146 143 return idx <= mvdev->max_idx; 147 144 } 148 145 146 + #define MLX5V_MACVLAN_SIZE 256 147 + 149 148 struct mlx5_vdpa_net { 150 149 struct mlx5_vdpa_dev mvdev; 151 150 struct mlx5_vdpa_net_resources res; ··· 159 154 * since memory map might change and we need to destroy and create 160 155 * resources while driver in operational. 161 156 */ 162 - struct mutex reslock; 157 + struct rw_semaphore reslock; 163 158 struct mlx5_flow_table *rxft; 164 - struct mlx5_fc *rx_counter; 165 - struct mlx5_flow_handle *rx_rule_ucast; 166 - struct mlx5_flow_handle *rx_rule_mcast; 167 159 bool setup; 168 160 u32 cur_num_vqs; 169 161 u32 rqt_size; 170 162 struct notifier_block nb; 171 163 struct vdpa_callback config_cb; 172 164 struct mlx5_vdpa_wq_ent cvq_ent; 165 + struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE]; 166 + }; 167 + 168 + struct macvlan_node { 169 + struct hlist_node hlist; 170 + struct mlx5_flow_handle *ucast_rule; 171 + struct mlx5_flow_handle *mcast_rule; 172 + u64 macvlan; 173 173 }; 174 174 175 175 static void free_resources(struct mlx5_vdpa_net *ndev); ··· 828 818 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6); 829 819 } 830 820 821 + static bool counters_supported(const struct mlx5_vdpa_dev *mvdev) 822 + { 823 + return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) & 824 + BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 825 + } 826 + 831 827 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 832 828 { 833 829 int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in); ··· 888 872 MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id); 889 873 MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size); 890 874 MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn); 875 + if (counters_supported(&ndev->mvdev)) 876 + MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id); 891 877 892 878 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 893 879 if (err) ··· 1153 1135 return err; 1154 1136 } 1155 1137 1138 + static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1139 + { 1140 + u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {}; 1141 + u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {}; 1142 + void *cmd_hdr; 1143 + int err; 1144 + 1145 + if (!counters_supported(&ndev->mvdev)) 1146 + return 0; 1147 + 1148 + cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr); 1149 + 1150 + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 1151 + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 1152 + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1153 + 1154 + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); 1155 + if (err) 1156 + return err; 1157 + 1158 + mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 1159 + 1160 + return 0; 1161 + } 1162 + 1163 + static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1164 + { 1165 + u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {}; 1166 + u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {}; 1167 + 1168 + if (!counters_supported(&ndev->mvdev)) 1169 + return; 1170 + 1171 + MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 1172 + MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id); 1173 + MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid); 1174 + MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 1175 + if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) 1176 + mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id); 1177 + } 1178 + 1156 1179 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1157 1180 { 1158 1181 u16 idx = mvq->index; ··· 1221 1162 if (err) 1222 1163 goto err_connect; 1223 1164 1165 + err = counter_set_alloc(ndev, mvq); 1166 + if (err) 1167 + goto err_counter; 1168 + 1224 1169 err = create_virtqueue(ndev, mvq); 1225 1170 if (err) 1226 1171 goto err_connect; ··· 1242 1179 return 0; 1243 1180 1244 1181 err_connect: 1182 + counter_set_dealloc(ndev, mvq); 1183 + err_counter: 1245 1184 qp_destroy(ndev, &mvq->vqqp); 1246 1185 err_vqqp: 1247 1186 qp_destroy(ndev, &mvq->fwqp); ··· 1288 1223 1289 1224 suspend_vq(ndev, mvq); 1290 1225 destroy_virtqueue(ndev, mvq); 1226 + counter_set_dealloc(ndev, mvq); 1291 1227 qp_destroy(ndev, &mvq->vqqp); 1292 1228 qp_destroy(ndev, &mvq->fwqp); 1293 1229 cq_destroy(ndev, mvq->index); ··· 1413 1347 mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn); 1414 1348 } 1415 1349 1416 - static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev) 1350 + #define MAX_STEERING_ENT 0x8000 1351 + #define MAX_STEERING_GROUPS 2 1352 + 1353 + static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, 1354 + u16 vid, bool tagged, 1355 + struct mlx5_flow_handle **ucast, 1356 + struct mlx5_flow_handle **mcast) 1417 1357 { 1418 - struct mlx5_flow_destination dest[2] = {}; 1419 - struct mlx5_flow_table_attr ft_attr = {}; 1358 + struct mlx5_flow_destination dest = {}; 1420 1359 struct mlx5_flow_act flow_act = {}; 1421 - struct mlx5_flow_namespace *ns; 1360 + struct mlx5_flow_handle *rule; 1422 1361 struct mlx5_flow_spec *spec; 1423 1362 void *headers_c; 1424 1363 void *headers_v; ··· 1436 1365 return -ENOMEM; 1437 1366 1438 1367 spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; 1439 - ft_attr.max_fte = 2; 1440 - ft_attr.autogroup.max_num_groups = 2; 1441 - 1442 - ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); 1443 - if (!ns) { 1444 - mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n"); 1445 - err = -EOPNOTSUPP; 1446 - goto err_ns; 1447 - } 1448 - 1449 - ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); 1450 - if (IS_ERR(ndev->rxft)) { 1451 - err = PTR_ERR(ndev->rxft); 1452 - goto err_ns; 1453 - } 1454 - 1455 - ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false); 1456 - if (IS_ERR(ndev->rx_counter)) { 1457 - err = PTR_ERR(ndev->rx_counter); 1458 - goto err_fc; 1459 - } 1460 - 1461 1368 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); 1462 - dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16); 1463 - memset(dmac_c, 0xff, ETH_ALEN); 1464 1369 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); 1370 + dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16); 1465 1371 dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); 1466 - ether_addr_copy(dmac_v, ndev->config.mac); 1467 - 1468 - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; 1469 - dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR; 1470 - dest[0].tir_num = ndev->res.tirn; 1471 - dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; 1472 - dest[1].counter_id = mlx5_fc_id(ndev->rx_counter); 1473 - ndev->rx_rule_ucast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dest, 2); 1474 - 1475 - if (IS_ERR(ndev->rx_rule_ucast)) { 1476 - err = PTR_ERR(ndev->rx_rule_ucast); 1477 - ndev->rx_rule_ucast = NULL; 1478 - goto err_rule_ucast; 1372 + memset(dmac_c, 0xff, ETH_ALEN); 1373 + ether_addr_copy(dmac_v, mac); 1374 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); 1375 + if (tagged) { 1376 + MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1); 1377 + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid); 1378 + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, vid); 1479 1379 } 1380 + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1381 + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; 1382 + dest.tir_num = ndev->res.tirn; 1383 + rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1); 1384 + if (IS_ERR(rule)) 1385 + return PTR_ERR(rule); 1386 + 1387 + *ucast = rule; 1480 1388 1481 1389 memset(dmac_c, 0, ETH_ALEN); 1482 1390 memset(dmac_v, 0, ETH_ALEN); 1483 1391 dmac_c[0] = 1; 1484 1392 dmac_v[0] = 1; 1485 - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1486 - ndev->rx_rule_mcast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dest, 1); 1487 - if (IS_ERR(ndev->rx_rule_mcast)) { 1488 - err = PTR_ERR(ndev->rx_rule_mcast); 1489 - ndev->rx_rule_mcast = NULL; 1490 - goto err_rule_mcast; 1393 + rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1); 1394 + kvfree(spec); 1395 + if (IS_ERR(rule)) { 1396 + err = PTR_ERR(rule); 1397 + goto err_mcast; 1491 1398 } 1492 1399 1493 - kvfree(spec); 1400 + *mcast = rule; 1494 1401 return 0; 1495 1402 1496 - err_rule_mcast: 1497 - mlx5_del_flow_rules(ndev->rx_rule_ucast); 1498 - ndev->rx_rule_ucast = NULL; 1499 - err_rule_ucast: 1500 - mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter); 1501 - err_fc: 1502 - mlx5_destroy_flow_table(ndev->rxft); 1503 - err_ns: 1504 - kvfree(spec); 1403 + err_mcast: 1404 + mlx5_del_flow_rules(*ucast); 1505 1405 return err; 1506 1406 } 1507 1407 1508 - static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev) 1408 + static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev, 1409 + struct mlx5_flow_handle *ucast, 1410 + struct mlx5_flow_handle *mcast) 1509 1411 { 1510 - if (!ndev->rx_rule_ucast) 1412 + mlx5_del_flow_rules(ucast); 1413 + mlx5_del_flow_rules(mcast); 1414 + } 1415 + 1416 + static u64 search_val(u8 *mac, u16 vlan, bool tagged) 1417 + { 1418 + u64 val; 1419 + 1420 + if (!tagged) 1421 + vlan = MLX5V_UNTAGGED; 1422 + 1423 + val = (u64)vlan << 48 | 1424 + (u64)mac[0] << 40 | 1425 + (u64)mac[1] << 32 | 1426 + (u64)mac[2] << 24 | 1427 + (u64)mac[3] << 16 | 1428 + (u64)mac[4] << 8 | 1429 + (u64)mac[5]; 1430 + 1431 + return val; 1432 + } 1433 + 1434 + static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value) 1435 + { 1436 + struct macvlan_node *pos; 1437 + u32 idx; 1438 + 1439 + idx = hash_64(value, 8); // tbd 8 1440 + hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) { 1441 + if (pos->macvlan == value) 1442 + return pos; 1443 + } 1444 + return NULL; 1445 + } 1446 + 1447 + static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) // vlan -> vid 1448 + { 1449 + struct macvlan_node *ptr; 1450 + u64 val; 1451 + u32 idx; 1452 + int err; 1453 + 1454 + val = search_val(mac, vlan, tagged); 1455 + if (mac_vlan_lookup(ndev, val)) 1456 + return -EEXIST; 1457 + 1458 + ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); 1459 + if (!ptr) 1460 + return -ENOMEM; 1461 + 1462 + err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, vlan, tagged, 1463 + &ptr->ucast_rule, &ptr->mcast_rule); 1464 + if (err) 1465 + goto err_add; 1466 + 1467 + ptr->macvlan = val; 1468 + idx = hash_64(val, 8); 1469 + hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]); 1470 + return 0; 1471 + 1472 + err_add: 1473 + kfree(ptr); 1474 + return err; 1475 + } 1476 + 1477 + static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) 1478 + { 1479 + struct macvlan_node *ptr; 1480 + 1481 + ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged)); 1482 + if (!ptr) 1511 1483 return; 1512 1484 1513 - mlx5_del_flow_rules(ndev->rx_rule_mcast); 1514 - ndev->rx_rule_mcast = NULL; 1515 - mlx5_del_flow_rules(ndev->rx_rule_ucast); 1516 - ndev->rx_rule_ucast = NULL; 1517 - mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter); 1485 + hlist_del(&ptr->hlist); 1486 + mlx5_vdpa_del_mac_vlan_rules(ndev, ptr->ucast_rule, ptr->mcast_rule); 1487 + kfree(ptr); 1488 + } 1489 + 1490 + static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev) 1491 + { 1492 + struct macvlan_node *pos; 1493 + struct hlist_node *n; 1494 + int i; 1495 + 1496 + for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) { 1497 + hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) { 1498 + hlist_del(&pos->hlist); 1499 + mlx5_vdpa_del_mac_vlan_rules(ndev, pos->ucast_rule, pos->mcast_rule); 1500 + kfree(pos); 1501 + } 1502 + } 1503 + } 1504 + 1505 + static int setup_steering(struct mlx5_vdpa_net *ndev) 1506 + { 1507 + struct mlx5_flow_table_attr ft_attr = {}; 1508 + struct mlx5_flow_namespace *ns; 1509 + int err; 1510 + 1511 + ft_attr.max_fte = MAX_STEERING_ENT; 1512 + ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS; 1513 + 1514 + ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); 1515 + if (!ns) { 1516 + mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n"); 1517 + return -EOPNOTSUPP; 1518 + } 1519 + 1520 + ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); 1521 + if (IS_ERR(ndev->rxft)) { 1522 + mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n"); 1523 + return PTR_ERR(ndev->rxft); 1524 + } 1525 + 1526 + err = mac_vlan_add(ndev, ndev->config.mac, 0, false); 1527 + if (err) 1528 + goto err_add; 1529 + 1530 + return 0; 1531 + 1532 + err_add: 1533 + mlx5_destroy_flow_table(ndev->rxft); 1534 + return err; 1535 + } 1536 + 1537 + static void teardown_steering(struct mlx5_vdpa_net *ndev) 1538 + { 1539 + clear_mac_vlan_table(ndev); 1518 1540 mlx5_destroy_flow_table(ndev->rxft); 1519 1541 } 1520 1542 ··· 1658 1494 1659 1495 /* Need recreate the flow table entry, so that the packet could forward back 1660 1496 */ 1661 - remove_fwd_to_tir(ndev); 1497 + mac_vlan_del(ndev, ndev->config.mac, 0, false); 1662 1498 1663 - if (add_fwd_to_tir(ndev)) { 1499 + if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) { 1664 1500 mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n"); 1665 1501 1666 1502 /* Although it hardly run here, we still need double check */ ··· 1684 1520 1685 1521 memcpy(ndev->config.mac, mac_back, ETH_ALEN); 1686 1522 1687 - if (add_fwd_to_tir(ndev)) 1523 + if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) 1688 1524 mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n"); 1689 1525 1690 1526 break; ··· 1786 1622 return status; 1787 1623 } 1788 1624 1625 + static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd) 1626 + { 1627 + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1628 + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1629 + struct mlx5_control_vq *cvq = &mvdev->cvq; 1630 + __virtio16 vlan; 1631 + size_t read; 1632 + u16 id; 1633 + 1634 + switch (cmd) { 1635 + case VIRTIO_NET_CTRL_VLAN_ADD: 1636 + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); 1637 + if (read != sizeof(vlan)) 1638 + break; 1639 + 1640 + id = mlx5vdpa16_to_cpu(mvdev, vlan); 1641 + if (mac_vlan_add(ndev, ndev->config.mac, id, true)) 1642 + break; 1643 + 1644 + status = VIRTIO_NET_OK; 1645 + break; 1646 + case VIRTIO_NET_CTRL_VLAN_DEL: 1647 + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); 1648 + if (read != sizeof(vlan)) 1649 + break; 1650 + 1651 + id = mlx5vdpa16_to_cpu(mvdev, vlan); 1652 + mac_vlan_del(ndev, ndev->config.mac, id, true); 1653 + break; 1654 + default: 1655 + break; 1656 + } 1657 + 1658 + return status; 1659 + } 1660 + 1789 1661 static void mlx5_cvq_kick_handler(struct work_struct *work) 1790 1662 { 1791 1663 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; ··· 1838 1638 ndev = to_mlx5_vdpa_ndev(mvdev); 1839 1639 cvq = &mvdev->cvq; 1840 1640 1841 - mutex_lock(&ndev->reslock); 1641 + down_write(&ndev->reslock); 1842 1642 1843 1643 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 1844 1644 goto out; ··· 1859 1659 if (read != sizeof(ctrl)) 1860 1660 break; 1861 1661 1662 + cvq->received_desc++; 1862 1663 switch (ctrl.class) { 1863 1664 case VIRTIO_NET_CTRL_MAC: 1864 1665 status = handle_ctrl_mac(mvdev, ctrl.cmd); ··· 1867 1666 case VIRTIO_NET_CTRL_MQ: 1868 1667 status = handle_ctrl_mq(mvdev, ctrl.cmd); 1869 1668 break; 1870 - 1669 + case VIRTIO_NET_CTRL_VLAN: 1670 + status = handle_ctrl_vlan(mvdev, ctrl.cmd); 1671 + break; 1871 1672 default: 1872 1673 break; 1873 1674 } ··· 1885 1682 if (vringh_need_notify_iotlb(&cvq->vring)) 1886 1683 vringh_notify(&cvq->vring); 1887 1684 1685 + cvq->completed_desc++; 1888 1686 queue_work(mvdev->wq, &wqent->work); 1889 1687 break; 1890 1688 } 1891 1689 1892 1690 out: 1893 - mutex_unlock(&ndev->reslock); 1691 + up_write(&ndev->reslock); 1894 1692 } 1895 1693 1896 1694 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) ··· 2092 1888 return PAGE_SIZE; 2093 1889 } 2094 1890 1891 + static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdpa, u16 idx) 1892 + { 1893 + return 0; 1894 + } 1895 + 2095 1896 enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9, 2096 1897 MLX5_VIRTIO_NET_F_CSUM = 1 << 10, 2097 1898 MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11, ··· 2134 1925 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ); 2135 1926 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS); 2136 1927 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU); 1928 + mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN); 2137 1929 2138 1930 return mlx_vdpa_features; 2139 1931 } ··· 2395 2185 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2396 2186 int err; 2397 2187 2398 - WARN_ON(!mutex_is_locked(&ndev->reslock)); 2188 + WARN_ON(!rwsem_is_locked(&ndev->reslock)); 2399 2189 2400 2190 if (ndev->setup) { 2401 2191 mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n"); ··· 2420 2210 goto err_tir; 2421 2211 } 2422 2212 2423 - err = add_fwd_to_tir(ndev); 2213 + err = setup_steering(ndev); 2424 2214 if (err) { 2425 - mlx5_vdpa_warn(mvdev, "add_fwd_to_tir\n"); 2215 + mlx5_vdpa_warn(mvdev, "setup_steering\n"); 2426 2216 goto err_fwd; 2427 2217 } 2428 2218 ndev->setup = true; ··· 2443 2233 static void teardown_driver(struct mlx5_vdpa_net *ndev) 2444 2234 { 2445 2235 2446 - WARN_ON(!mutex_is_locked(&ndev->reslock)); 2236 + WARN_ON(!rwsem_is_locked(&ndev->reslock)); 2447 2237 2448 2238 if (!ndev->setup) 2449 2239 return; 2450 2240 2451 - remove_fwd_to_tir(ndev); 2241 + teardown_steering(ndev); 2452 2242 destroy_tir(ndev); 2453 2243 destroy_rqt(ndev); 2454 2244 teardown_virtqueues(ndev); ··· 2473 2263 2474 2264 print_status(mvdev, status, true); 2475 2265 2476 - mutex_lock(&ndev->reslock); 2266 + down_write(&ndev->reslock); 2477 2267 2478 2268 if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) { 2479 2269 if (status & VIRTIO_CONFIG_S_DRIVER_OK) { ··· 2489 2279 } 2490 2280 2491 2281 ndev->mvdev.status = status; 2492 - mutex_unlock(&ndev->reslock); 2282 + up_write(&ndev->reslock); 2493 2283 return; 2494 2284 2495 2285 err_setup: 2496 2286 mlx5_vdpa_destroy_mr(&ndev->mvdev); 2497 2287 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; 2498 2288 err_clear: 2499 - mutex_unlock(&ndev->reslock); 2289 + up_write(&ndev->reslock); 2500 2290 } 2501 2291 2502 2292 static int mlx5_vdpa_reset(struct vdpa_device *vdev) ··· 2507 2297 print_status(mvdev, 0, true); 2508 2298 mlx5_vdpa_info(mvdev, "performing device reset\n"); 2509 2299 2510 - mutex_lock(&ndev->reslock); 2300 + down_write(&ndev->reslock); 2511 2301 teardown_driver(ndev); 2512 2302 clear_vqs_ready(ndev); 2513 2303 mlx5_vdpa_destroy_mr(&ndev->mvdev); 2514 2304 ndev->mvdev.status = 0; 2515 2305 ndev->cur_num_vqs = 0; 2306 + ndev->mvdev.cvq.received_desc = 0; 2307 + ndev->mvdev.cvq.completed_desc = 0; 2516 2308 memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1)); 2517 2309 ndev->mvdev.actual_features = 0; 2518 2310 ++mvdev->generation; ··· 2522 2310 if (mlx5_vdpa_create_mr(mvdev, NULL)) 2523 2311 mlx5_vdpa_warn(mvdev, "create MR failed\n"); 2524 2312 } 2525 - mutex_unlock(&ndev->reslock); 2313 + up_write(&ndev->reslock); 2526 2314 2527 2315 return 0; 2528 2316 } ··· 2555 2343 return mvdev->generation; 2556 2344 } 2557 2345 2558 - static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb) 2346 + static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid, 2347 + struct vhost_iotlb *iotlb) 2559 2348 { 2560 2349 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2561 2350 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2562 2351 bool change_map; 2563 2352 int err; 2564 2353 2565 - mutex_lock(&ndev->reslock); 2354 + down_write(&ndev->reslock); 2566 2355 2567 2356 err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map); 2568 2357 if (err) { ··· 2575 2362 err = mlx5_vdpa_change_map(mvdev, iotlb); 2576 2363 2577 2364 err: 2578 - mutex_unlock(&ndev->reslock); 2365 + up_write(&ndev->reslock); 2579 2366 return err; 2580 2367 } 2581 2368 ··· 2594 2381 mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); 2595 2382 } 2596 2383 mlx5_vdpa_free_resources(&ndev->mvdev); 2597 - mutex_destroy(&ndev->reslock); 2598 2384 kfree(ndev->event_cbs); 2599 2385 kfree(ndev->vqs); 2600 2386 } ··· 2634 2422 return mvdev->actual_features; 2635 2423 } 2636 2424 2425 + static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 2426 + u64 *received_desc, u64 *completed_desc) 2427 + { 2428 + u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {}; 2429 + u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {}; 2430 + void *cmd_hdr; 2431 + void *ctx; 2432 + int err; 2433 + 2434 + if (!counters_supported(&ndev->mvdev)) 2435 + return -EOPNOTSUPP; 2436 + 2437 + if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) 2438 + return -EAGAIN; 2439 + 2440 + cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr); 2441 + 2442 + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); 2443 + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 2444 + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 2445 + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id); 2446 + 2447 + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); 2448 + if (err) 2449 + return err; 2450 + 2451 + ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters); 2452 + *received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc); 2453 + *completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc); 2454 + return 0; 2455 + } 2456 + 2457 + static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx, 2458 + struct sk_buff *msg, 2459 + struct netlink_ext_ack *extack) 2460 + { 2461 + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2462 + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2463 + struct mlx5_vdpa_virtqueue *mvq; 2464 + struct mlx5_control_vq *cvq; 2465 + u64 received_desc; 2466 + u64 completed_desc; 2467 + int err = 0; 2468 + 2469 + down_read(&ndev->reslock); 2470 + if (!is_index_valid(mvdev, idx)) { 2471 + NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid"); 2472 + err = -EINVAL; 2473 + goto out_err; 2474 + } 2475 + 2476 + if (idx == ctrl_vq_idx(mvdev)) { 2477 + cvq = &mvdev->cvq; 2478 + received_desc = cvq->received_desc; 2479 + completed_desc = cvq->completed_desc; 2480 + goto out; 2481 + } 2482 + 2483 + mvq = &ndev->vqs[idx]; 2484 + err = counter_set_query(ndev, mvq, &received_desc, &completed_desc); 2485 + if (err) { 2486 + NL_SET_ERR_MSG_MOD(extack, "failed to query hardware"); 2487 + goto out_err; 2488 + } 2489 + 2490 + out: 2491 + err = -EMSGSIZE; 2492 + if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc")) 2493 + goto out_err; 2494 + 2495 + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc, 2496 + VDPA_ATTR_PAD)) 2497 + goto out_err; 2498 + 2499 + if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc")) 2500 + goto out_err; 2501 + 2502 + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc, 2503 + VDPA_ATTR_PAD)) 2504 + goto out_err; 2505 + 2506 + err = 0; 2507 + out_err: 2508 + up_read(&ndev->reslock); 2509 + return err; 2510 + } 2511 + 2637 2512 static const struct vdpa_config_ops mlx5_vdpa_ops = { 2638 2513 .set_vq_address = mlx5_vdpa_set_vq_address, 2639 2514 .set_vq_num = mlx5_vdpa_set_vq_num, ··· 2730 2431 .get_vq_ready = mlx5_vdpa_get_vq_ready, 2731 2432 .set_vq_state = mlx5_vdpa_set_vq_state, 2732 2433 .get_vq_state = mlx5_vdpa_get_vq_state, 2434 + .get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats, 2733 2435 .get_vq_notification = mlx5_get_vq_notification, 2734 2436 .get_vq_irq = mlx5_get_vq_irq, 2735 2437 .get_vq_align = mlx5_vdpa_get_vq_align, 2438 + .get_vq_group = mlx5_vdpa_get_vq_group, 2736 2439 .get_device_features = mlx5_vdpa_get_device_features, 2737 2440 .set_driver_features = mlx5_vdpa_set_driver_features, 2738 2441 .get_driver_features = mlx5_vdpa_get_driver_features, ··· 2970 2669 } 2971 2670 2972 2671 ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops, 2973 - name, false); 2672 + 1, 1, name, false); 2974 2673 if (IS_ERR(ndev)) 2975 2674 return PTR_ERR(ndev); 2976 2675 ··· 2987 2686 } 2988 2687 2989 2688 init_mvqs(ndev); 2990 - mutex_init(&ndev->reslock); 2689 + init_rwsem(&ndev->reslock); 2991 2690 config = &ndev->config; 2992 2691 2993 2692 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) { 2994 2693 err = config_func_mtu(mdev, add_config->net.mtu); 2995 2694 if (err) 2996 - goto err_mtu; 2695 + goto err_alloc; 2997 2696 } 2998 2697 2999 2698 err = query_mtu(mdev, &mtu); 3000 2699 if (err) 3001 - goto err_mtu; 2700 + goto err_alloc; 3002 2701 3003 2702 ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu); 3004 2703 ··· 3012 2711 } else { 3013 2712 err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); 3014 2713 if (err) 3015 - goto err_mtu; 2714 + goto err_alloc; 3016 2715 } 3017 2716 3018 2717 if (!is_zero_ether_addr(config->mac)) { 3019 2718 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); 3020 2719 err = mlx5_mpfs_add_mac(pfmdev, config->mac); 3021 2720 if (err) 3022 - goto err_mtu; 2721 + goto err_alloc; 3023 2722 3024 2723 ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC); 3025 2724 } ··· 3069 2768 err_mpfs: 3070 2769 if (!is_zero_ether_addr(config->mac)) 3071 2770 mlx5_mpfs_del_mac(pfmdev, config->mac); 3072 - err_mtu: 3073 - mutex_destroy(&ndev->reslock); 3074 2771 err_alloc: 3075 2772 put_device(&mvdev->vdev.dev); 3076 2773 return err;
+214 -43
drivers/vdpa/vdpa.c
··· 18 18 19 19 static LIST_HEAD(mdev_head); 20 20 /* A global mutex that protects vdpa management device and device level operations. */ 21 - static DEFINE_MUTEX(vdpa_dev_mutex); 21 + static DECLARE_RWSEM(vdpa_dev_lock); 22 22 static DEFINE_IDA(vdpa_index_ida); 23 23 24 24 void vdpa_set_status(struct vdpa_device *vdev, u8 status) 25 25 { 26 - mutex_lock(&vdev->cf_mutex); 26 + down_write(&vdev->cf_lock); 27 27 vdev->config->set_status(vdev, status); 28 - mutex_unlock(&vdev->cf_mutex); 28 + up_write(&vdev->cf_lock); 29 29 } 30 30 EXPORT_SYMBOL(vdpa_set_status); 31 31 ··· 148 148 ops->free(vdev); 149 149 150 150 ida_simple_remove(&vdpa_index_ida, vdev->index); 151 - mutex_destroy(&vdev->cf_mutex); 152 151 kfree(vdev->driver_override); 153 152 kfree(vdev); 154 153 } ··· 158 159 * initialized but before registered. 159 160 * @parent: the parent device 160 161 * @config: the bus operations that is supported by this device 162 + * @ngroups: number of groups supported by this device 163 + * @nas: number of address spaces supported by this device 161 164 * @size: size of the parent structure that contains private data 162 165 * @name: name of the vdpa device; optional. 163 166 * @use_va: indicate whether virtual address must be used by this device ··· 172 171 */ 173 172 struct vdpa_device *__vdpa_alloc_device(struct device *parent, 174 173 const struct vdpa_config_ops *config, 174 + unsigned int ngroups, unsigned int nas, 175 175 size_t size, const char *name, 176 176 bool use_va) 177 177 { ··· 205 203 vdev->config = config; 206 204 vdev->features_valid = false; 207 205 vdev->use_va = use_va; 206 + vdev->ngroups = ngroups; 207 + vdev->nas = nas; 208 208 209 209 if (name) 210 210 err = dev_set_name(&vdev->dev, "%s", name); ··· 215 211 if (err) 216 212 goto err_name; 217 213 218 - mutex_init(&vdev->cf_mutex); 214 + init_rwsem(&vdev->cf_lock); 219 215 device_initialize(&vdev->dev); 220 216 221 217 return vdev; ··· 242 238 243 239 vdev->nvqs = nvqs; 244 240 245 - lockdep_assert_held(&vdpa_dev_mutex); 241 + lockdep_assert_held(&vdpa_dev_lock); 246 242 dev = bus_find_device(&vdpa_bus, NULL, dev_name(&vdev->dev), vdpa_name_match); 247 243 if (dev) { 248 244 put_device(dev); ··· 282 278 { 283 279 int err; 284 280 285 - mutex_lock(&vdpa_dev_mutex); 281 + down_write(&vdpa_dev_lock); 286 282 err = __vdpa_register_device(vdev, nvqs); 287 - mutex_unlock(&vdpa_dev_mutex); 283 + up_write(&vdpa_dev_lock); 288 284 return err; 289 285 } 290 286 EXPORT_SYMBOL_GPL(vdpa_register_device); ··· 297 293 */ 298 294 void _vdpa_unregister_device(struct vdpa_device *vdev) 299 295 { 300 - lockdep_assert_held(&vdpa_dev_mutex); 296 + lockdep_assert_held(&vdpa_dev_lock); 301 297 WARN_ON(!vdev->mdev); 302 298 device_unregister(&vdev->dev); 303 299 } ··· 309 305 */ 310 306 void vdpa_unregister_device(struct vdpa_device *vdev) 311 307 { 312 - mutex_lock(&vdpa_dev_mutex); 308 + down_write(&vdpa_dev_lock); 313 309 device_unregister(&vdev->dev); 314 - mutex_unlock(&vdpa_dev_mutex); 310 + up_write(&vdpa_dev_lock); 315 311 } 316 312 EXPORT_SYMBOL_GPL(vdpa_unregister_device); 317 313 ··· 356 352 return -EINVAL; 357 353 358 354 INIT_LIST_HEAD(&mdev->list); 359 - mutex_lock(&vdpa_dev_mutex); 355 + down_write(&vdpa_dev_lock); 360 356 list_add_tail(&mdev->list, &mdev_head); 361 - mutex_unlock(&vdpa_dev_mutex); 357 + up_write(&vdpa_dev_lock); 362 358 return 0; 363 359 } 364 360 EXPORT_SYMBOL_GPL(vdpa_mgmtdev_register); ··· 375 371 376 372 void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev) 377 373 { 378 - mutex_lock(&vdpa_dev_mutex); 374 + down_write(&vdpa_dev_lock); 379 375 380 376 list_del(&mdev->list); 381 377 382 378 /* Filter out all the entries belong to this management device and delete it. */ 383 379 bus_for_each_dev(&vdpa_bus, NULL, mdev, vdpa_match_remove); 384 380 385 - mutex_unlock(&vdpa_dev_mutex); 381 + up_write(&vdpa_dev_lock); 386 382 } 387 383 EXPORT_SYMBOL_GPL(vdpa_mgmtdev_unregister); 388 384 ··· 411 407 void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, 412 408 void *buf, unsigned int len) 413 409 { 414 - mutex_lock(&vdev->cf_mutex); 410 + down_read(&vdev->cf_lock); 415 411 vdpa_get_config_unlocked(vdev, offset, buf, len); 416 - mutex_unlock(&vdev->cf_mutex); 412 + up_read(&vdev->cf_lock); 417 413 } 418 414 EXPORT_SYMBOL_GPL(vdpa_get_config); 419 415 ··· 427 423 void vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, 428 424 const void *buf, unsigned int length) 429 425 { 430 - mutex_lock(&vdev->cf_mutex); 426 + down_write(&vdev->cf_lock); 431 427 vdev->config->set_config(vdev, offset, buf, length); 432 - mutex_unlock(&vdev->cf_mutex); 428 + up_write(&vdev->cf_lock); 433 429 } 434 430 EXPORT_SYMBOL_GPL(vdpa_set_config); 435 431 ··· 536 532 if (!msg) 537 533 return -ENOMEM; 538 534 539 - mutex_lock(&vdpa_dev_mutex); 535 + down_read(&vdpa_dev_lock); 540 536 mdev = vdpa_mgmtdev_get_from_attr(info->attrs); 541 537 if (IS_ERR(mdev)) { 542 - mutex_unlock(&vdpa_dev_mutex); 538 + up_read(&vdpa_dev_lock); 543 539 NL_SET_ERR_MSG_MOD(info->extack, "Fail to find the specified mgmt device"); 544 540 err = PTR_ERR(mdev); 545 541 goto out; 546 542 } 547 543 548 544 err = vdpa_mgmtdev_fill(mdev, msg, info->snd_portid, info->snd_seq, 0); 549 - mutex_unlock(&vdpa_dev_mutex); 545 + up_read(&vdpa_dev_lock); 550 546 if (err) 551 547 goto out; 552 548 err = genlmsg_reply(msg, info); ··· 565 561 int idx = 0; 566 562 int err; 567 563 568 - mutex_lock(&vdpa_dev_mutex); 564 + down_read(&vdpa_dev_lock); 569 565 list_for_each_entry(mdev, &mdev_head, list) { 570 566 if (idx < start) { 571 567 idx++; ··· 578 574 idx++; 579 575 } 580 576 out: 581 - mutex_unlock(&vdpa_dev_mutex); 577 + up_read(&vdpa_dev_lock); 582 578 cb->args[0] = idx; 583 579 return msg->len; 584 580 } ··· 631 627 !netlink_capable(skb, CAP_NET_ADMIN)) 632 628 return -EPERM; 633 629 634 - mutex_lock(&vdpa_dev_mutex); 630 + down_write(&vdpa_dev_lock); 635 631 mdev = vdpa_mgmtdev_get_from_attr(info->attrs); 636 632 if (IS_ERR(mdev)) { 637 633 NL_SET_ERR_MSG_MOD(info->extack, "Fail to find the specified management device"); ··· 647 643 648 644 err = mdev->ops->dev_add(mdev, name, &config); 649 645 err: 650 - mutex_unlock(&vdpa_dev_mutex); 646 + up_write(&vdpa_dev_lock); 651 647 return err; 652 648 } 653 649 ··· 663 659 return -EINVAL; 664 660 name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); 665 661 666 - mutex_lock(&vdpa_dev_mutex); 662 + down_write(&vdpa_dev_lock); 667 663 dev = bus_find_device(&vdpa_bus, NULL, name, vdpa_name_match); 668 664 if (!dev) { 669 665 NL_SET_ERR_MSG_MOD(info->extack, "device not found"); ··· 681 677 mdev_err: 682 678 put_device(dev); 683 679 dev_err: 684 - mutex_unlock(&vdpa_dev_mutex); 680 + up_write(&vdpa_dev_lock); 685 681 return err; 686 682 } 687 683 ··· 747 743 if (!msg) 748 744 return -ENOMEM; 749 745 750 - mutex_lock(&vdpa_dev_mutex); 746 + down_read(&vdpa_dev_lock); 751 747 dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match); 752 748 if (!dev) { 753 749 NL_SET_ERR_MSG_MOD(info->extack, "device not found"); ··· 760 756 goto mdev_err; 761 757 } 762 758 err = vdpa_dev_fill(vdev, msg, info->snd_portid, info->snd_seq, 0, info->extack); 763 - if (!err) 764 - err = genlmsg_reply(msg, info); 759 + if (err) 760 + goto mdev_err; 761 + 762 + err = genlmsg_reply(msg, info); 763 + put_device(dev); 764 + up_read(&vdpa_dev_lock); 765 + return err; 766 + 765 767 mdev_err: 766 768 put_device(dev); 767 769 err: 768 - mutex_unlock(&vdpa_dev_mutex); 769 - if (err) 770 - nlmsg_free(msg); 770 + up_read(&vdpa_dev_lock); 771 + nlmsg_free(msg); 771 772 return err; 772 773 } 773 774 ··· 813 804 info.start_idx = cb->args[0]; 814 805 info.idx = 0; 815 806 816 - mutex_lock(&vdpa_dev_mutex); 807 + down_read(&vdpa_dev_lock); 817 808 bus_for_each_dev(&vdpa_bus, NULL, &info, vdpa_dev_dump); 818 - mutex_unlock(&vdpa_dev_mutex); 809 + up_read(&vdpa_dev_lock); 819 810 cb->args[0] = info.idx; 820 811 return msg->len; 821 812 } ··· 870 861 u8 status; 871 862 int err; 872 863 873 - mutex_lock(&vdev->cf_mutex); 864 + down_read(&vdev->cf_lock); 874 865 status = vdev->config->get_status(vdev); 875 866 if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) { 876 867 NL_SET_ERR_MSG_MOD(extack, "Features negotiation not completed"); ··· 907 898 if (err) 908 899 goto msg_err; 909 900 910 - mutex_unlock(&vdev->cf_mutex); 901 + up_read(&vdev->cf_lock); 911 902 genlmsg_end(msg, hdr); 912 903 return 0; 913 904 914 905 msg_err: 915 906 genlmsg_cancel(msg, hdr); 916 907 out: 917 - mutex_unlock(&vdev->cf_mutex); 908 + up_read(&vdev->cf_lock); 909 + return err; 910 + } 911 + 912 + static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg, 913 + struct genl_info *info, u32 index) 914 + { 915 + struct virtio_net_config config = {}; 916 + u64 features; 917 + u16 max_vqp; 918 + u8 status; 919 + int err; 920 + 921 + status = vdev->config->get_status(vdev); 922 + if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) { 923 + NL_SET_ERR_MSG_MOD(info->extack, "feature negotiation not complete"); 924 + return -EAGAIN; 925 + } 926 + vdpa_get_config_unlocked(vdev, 0, &config, sizeof(config)); 927 + 928 + max_vqp = le16_to_cpu(config.max_virtqueue_pairs); 929 + if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, max_vqp)) 930 + return -EMSGSIZE; 931 + 932 + features = vdev->config->get_driver_features(vdev); 933 + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_NEGOTIATED_FEATURES, 934 + features, VDPA_ATTR_PAD)) 935 + return -EMSGSIZE; 936 + 937 + if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index)) 938 + return -EMSGSIZE; 939 + 940 + err = vdev->config->get_vendor_vq_stats(vdev, index, msg, info->extack); 941 + if (err) 942 + return err; 943 + 944 + return 0; 945 + } 946 + 947 + static int vendor_stats_fill(struct vdpa_device *vdev, struct sk_buff *msg, 948 + struct genl_info *info, u32 index) 949 + { 950 + int err; 951 + 952 + down_read(&vdev->cf_lock); 953 + if (!vdev->config->get_vendor_vq_stats) { 954 + err = -EOPNOTSUPP; 955 + goto out; 956 + } 957 + 958 + err = vdpa_fill_stats_rec(vdev, msg, info, index); 959 + out: 960 + up_read(&vdev->cf_lock); 961 + return err; 962 + } 963 + 964 + static int vdpa_dev_vendor_stats_fill(struct vdpa_device *vdev, 965 + struct sk_buff *msg, 966 + struct genl_info *info, u32 index) 967 + { 968 + u32 device_id; 969 + void *hdr; 970 + int err; 971 + u32 portid = info->snd_portid; 972 + u32 seq = info->snd_seq; 973 + u32 flags = 0; 974 + 975 + hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, 976 + VDPA_CMD_DEV_VSTATS_GET); 977 + if (!hdr) 978 + return -EMSGSIZE; 979 + 980 + if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) { 981 + err = -EMSGSIZE; 982 + goto undo_msg; 983 + } 984 + 985 + device_id = vdev->config->get_device_id(vdev); 986 + if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) { 987 + err = -EMSGSIZE; 988 + goto undo_msg; 989 + } 990 + 991 + switch (device_id) { 992 + case VIRTIO_ID_NET: 993 + if (index > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) { 994 + NL_SET_ERR_MSG_MOD(info->extack, "queue index excceeds max value"); 995 + err = -ERANGE; 996 + break; 997 + } 998 + 999 + err = vendor_stats_fill(vdev, msg, info, index); 1000 + break; 1001 + default: 1002 + err = -EOPNOTSUPP; 1003 + break; 1004 + } 1005 + genlmsg_end(msg, hdr); 1006 + 1007 + return err; 1008 + 1009 + undo_msg: 1010 + genlmsg_cancel(msg, hdr); 918 1011 return err; 919 1012 } 920 1013 ··· 1035 924 if (!msg) 1036 925 return -ENOMEM; 1037 926 1038 - mutex_lock(&vdpa_dev_mutex); 927 + down_read(&vdpa_dev_lock); 1039 928 dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match); 1040 929 if (!dev) { 1041 930 NL_SET_ERR_MSG_MOD(info->extack, "device not found"); ··· 1056 945 mdev_err: 1057 946 put_device(dev); 1058 947 dev_err: 1059 - mutex_unlock(&vdpa_dev_mutex); 948 + up_read(&vdpa_dev_lock); 1060 949 if (err) 1061 950 nlmsg_free(msg); 1062 951 return err; ··· 1094 983 info.start_idx = cb->args[0]; 1095 984 info.idx = 0; 1096 985 1097 - mutex_lock(&vdpa_dev_mutex); 986 + down_read(&vdpa_dev_lock); 1098 987 bus_for_each_dev(&vdpa_bus, NULL, &info, vdpa_dev_config_dump); 1099 - mutex_unlock(&vdpa_dev_mutex); 988 + up_read(&vdpa_dev_lock); 1100 989 cb->args[0] = info.idx; 1101 990 return msg->len; 991 + } 992 + 993 + static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb, 994 + struct genl_info *info) 995 + { 996 + struct vdpa_device *vdev; 997 + struct sk_buff *msg; 998 + const char *devname; 999 + struct device *dev; 1000 + u32 index; 1001 + int err; 1002 + 1003 + if (!info->attrs[VDPA_ATTR_DEV_NAME]) 1004 + return -EINVAL; 1005 + 1006 + if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX]) 1007 + return -EINVAL; 1008 + 1009 + devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); 1010 + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1011 + if (!msg) 1012 + return -ENOMEM; 1013 + 1014 + index = nla_get_u32(info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX]); 1015 + down_read(&vdpa_dev_lock); 1016 + dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match); 1017 + if (!dev) { 1018 + NL_SET_ERR_MSG_MOD(info->extack, "device not found"); 1019 + err = -ENODEV; 1020 + goto dev_err; 1021 + } 1022 + vdev = container_of(dev, struct vdpa_device, dev); 1023 + if (!vdev->mdev) { 1024 + NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device"); 1025 + err = -EINVAL; 1026 + goto mdev_err; 1027 + } 1028 + err = vdpa_dev_vendor_stats_fill(vdev, msg, info, index); 1029 + if (err) 1030 + goto mdev_err; 1031 + 1032 + err = genlmsg_reply(msg, info); 1033 + 1034 + put_device(dev); 1035 + up_read(&vdpa_dev_lock); 1036 + 1037 + return err; 1038 + 1039 + mdev_err: 1040 + put_device(dev); 1041 + dev_err: 1042 + nlmsg_free(msg); 1043 + up_read(&vdpa_dev_lock); 1044 + return err; 1102 1045 } 1103 1046 1104 1047 static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = { ··· 1194 1029 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1195 1030 .doit = vdpa_nl_cmd_dev_config_get_doit, 1196 1031 .dumpit = vdpa_nl_cmd_dev_config_get_dumpit, 1032 + }, 1033 + { 1034 + .cmd = VDPA_CMD_DEV_VSTATS_GET, 1035 + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1036 + .doit = vdpa_nl_cmd_dev_stats_get_doit, 1037 + .flags = GENL_ADMIN_PERM, 1197 1038 }, 1198 1039 }; 1199 1040
+87 -20
drivers/vdpa/vdpa_sim/vdpa_sim.c
··· 96 96 { 97 97 int i; 98 98 99 - for (i = 0; i < vdpasim->dev_attr.nvqs; i++) 100 - vdpasim_vq_reset(vdpasim, &vdpasim->vqs[i]); 101 - 102 99 spin_lock(&vdpasim->iommu_lock); 103 - vhost_iotlb_reset(vdpasim->iommu); 100 + 101 + for (i = 0; i < vdpasim->dev_attr.nvqs; i++) { 102 + vdpasim_vq_reset(vdpasim, &vdpasim->vqs[i]); 103 + vringh_set_iotlb(&vdpasim->vqs[i].vring, &vdpasim->iommu[0], 104 + &vdpasim->iommu_lock); 105 + } 106 + 107 + for (i = 0; i < vdpasim->dev_attr.nas; i++) 108 + vhost_iotlb_reset(&vdpasim->iommu[i]); 109 + 104 110 spin_unlock(&vdpasim->iommu_lock); 105 111 106 112 vdpasim->features = 0; ··· 151 145 dma_addr = iova_dma_addr(&vdpasim->iova, iova); 152 146 153 147 spin_lock(&vdpasim->iommu_lock); 154 - ret = vhost_iotlb_add_range(vdpasim->iommu, (u64)dma_addr, 148 + ret = vhost_iotlb_add_range(&vdpasim->iommu[0], (u64)dma_addr, 155 149 (u64)dma_addr + size - 1, (u64)paddr, perm); 156 150 spin_unlock(&vdpasim->iommu_lock); 157 151 ··· 167 161 size_t size) 168 162 { 169 163 spin_lock(&vdpasim->iommu_lock); 170 - vhost_iotlb_del_range(vdpasim->iommu, (u64)dma_addr, 164 + vhost_iotlb_del_range(&vdpasim->iommu[0], (u64)dma_addr, 171 165 (u64)dma_addr + size - 1); 172 166 spin_unlock(&vdpasim->iommu_lock); 173 167 ··· 257 251 ops = &vdpasim_config_ops; 258 252 259 253 vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, 254 + dev_attr->ngroups, dev_attr->nas, 260 255 dev_attr->name, false); 261 256 if (IS_ERR(vdpasim)) { 262 257 ret = PTR_ERR(vdpasim); ··· 285 278 if (!vdpasim->vqs) 286 279 goto err_iommu; 287 280 288 - vdpasim->iommu = vhost_iotlb_alloc(max_iotlb_entries, 0); 281 + vdpasim->iommu = kmalloc_array(vdpasim->dev_attr.nas, 282 + sizeof(*vdpasim->iommu), GFP_KERNEL); 289 283 if (!vdpasim->iommu) 290 284 goto err_iommu; 285 + 286 + for (i = 0; i < vdpasim->dev_attr.nas; i++) 287 + vhost_iotlb_init(&vdpasim->iommu[i], 0, 0); 291 288 292 289 vdpasim->buffer = kvmalloc(dev_attr->buffer_size, GFP_KERNEL); 293 290 if (!vdpasim->buffer) 294 291 goto err_iommu; 295 292 296 293 for (i = 0; i < dev_attr->nvqs; i++) 297 - vringh_set_iotlb(&vdpasim->vqs[i].vring, vdpasim->iommu, 294 + vringh_set_iotlb(&vdpasim->vqs[i].vring, &vdpasim->iommu[0], 298 295 &vdpasim->iommu_lock); 299 296 300 297 ret = iova_cache_get(); ··· 364 353 { 365 354 struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 366 355 struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; 356 + bool old_ready; 367 357 368 358 spin_lock(&vdpasim->lock); 359 + old_ready = vq->ready; 369 360 vq->ready = ready; 370 - if (vq->ready) 361 + if (vq->ready && !old_ready) { 371 362 vdpasim_queue_ready(vdpasim, idx); 363 + } 372 364 spin_unlock(&vdpasim->lock); 373 365 } 374 366 ··· 411 397 static u32 vdpasim_get_vq_align(struct vdpa_device *vdpa) 412 398 { 413 399 return VDPASIM_QUEUE_ALIGN; 400 + } 401 + 402 + static u32 vdpasim_get_vq_group(struct vdpa_device *vdpa, u16 idx) 403 + { 404 + /* RX and TX belongs to group 0, CVQ belongs to group 1 */ 405 + if (idx == 2) 406 + return 1; 407 + else 408 + return 0; 414 409 } 415 410 416 411 static u64 vdpasim_get_device_features(struct vdpa_device *vdpa) ··· 557 534 return range; 558 535 } 559 536 560 - static int vdpasim_set_map(struct vdpa_device *vdpa, 537 + static int vdpasim_set_group_asid(struct vdpa_device *vdpa, unsigned int group, 538 + unsigned int asid) 539 + { 540 + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 541 + struct vhost_iotlb *iommu; 542 + int i; 543 + 544 + if (group > vdpasim->dev_attr.ngroups) 545 + return -EINVAL; 546 + 547 + if (asid >= vdpasim->dev_attr.nas) 548 + return -EINVAL; 549 + 550 + iommu = &vdpasim->iommu[asid]; 551 + 552 + spin_lock(&vdpasim->lock); 553 + 554 + for (i = 0; i < vdpasim->dev_attr.nvqs; i++) 555 + if (vdpasim_get_vq_group(vdpa, i) == group) 556 + vringh_set_iotlb(&vdpasim->vqs[i].vring, iommu, 557 + &vdpasim->iommu_lock); 558 + 559 + spin_unlock(&vdpasim->lock); 560 + 561 + return 0; 562 + } 563 + 564 + static int vdpasim_set_map(struct vdpa_device *vdpa, unsigned int asid, 561 565 struct vhost_iotlb *iotlb) 562 566 { 563 567 struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 564 568 struct vhost_iotlb_map *map; 569 + struct vhost_iotlb *iommu; 565 570 u64 start = 0ULL, last = 0ULL - 1; 566 571 int ret; 567 572 573 + if (asid >= vdpasim->dev_attr.nas) 574 + return -EINVAL; 575 + 568 576 spin_lock(&vdpasim->iommu_lock); 569 - vhost_iotlb_reset(vdpasim->iommu); 577 + 578 + iommu = &vdpasim->iommu[asid]; 579 + vhost_iotlb_reset(iommu); 570 580 571 581 for (map = vhost_iotlb_itree_first(iotlb, start, last); map; 572 582 map = vhost_iotlb_itree_next(map, start, last)) { 573 - ret = vhost_iotlb_add_range(vdpasim->iommu, map->start, 583 + ret = vhost_iotlb_add_range(iommu, map->start, 574 584 map->last, map->addr, map->perm); 575 585 if (ret) 576 586 goto err; ··· 612 556 return 0; 613 557 614 558 err: 615 - vhost_iotlb_reset(vdpasim->iommu); 559 + vhost_iotlb_reset(iommu); 616 560 spin_unlock(&vdpasim->iommu_lock); 617 561 return ret; 618 562 } 619 563 620 - static int vdpasim_dma_map(struct vdpa_device *vdpa, u64 iova, u64 size, 564 + static int vdpasim_dma_map(struct vdpa_device *vdpa, unsigned int asid, 565 + u64 iova, u64 size, 621 566 u64 pa, u32 perm, void *opaque) 622 567 { 623 568 struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 624 569 int ret; 625 570 571 + if (asid >= vdpasim->dev_attr.nas) 572 + return -EINVAL; 573 + 626 574 spin_lock(&vdpasim->iommu_lock); 627 - ret = vhost_iotlb_add_range_ctx(vdpasim->iommu, iova, iova + size - 1, 628 - pa, perm, opaque); 575 + ret = vhost_iotlb_add_range_ctx(&vdpasim->iommu[asid], iova, 576 + iova + size - 1, pa, perm, opaque); 629 577 spin_unlock(&vdpasim->iommu_lock); 630 578 631 579 return ret; 632 580 } 633 581 634 - static int vdpasim_dma_unmap(struct vdpa_device *vdpa, u64 iova, u64 size) 582 + static int vdpasim_dma_unmap(struct vdpa_device *vdpa, unsigned int asid, 583 + u64 iova, u64 size) 635 584 { 636 585 struct vdpasim *vdpasim = vdpa_to_sim(vdpa); 637 586 587 + if (asid >= vdpasim->dev_attr.nas) 588 + return -EINVAL; 589 + 638 590 spin_lock(&vdpasim->iommu_lock); 639 - vhost_iotlb_del_range(vdpasim->iommu, iova, iova + size - 1); 591 + vhost_iotlb_del_range(&vdpasim->iommu[asid], iova, iova + size - 1); 640 592 spin_unlock(&vdpasim->iommu_lock); 641 593 642 594 return 0; ··· 668 604 } 669 605 670 606 kvfree(vdpasim->buffer); 671 - if (vdpasim->iommu) 672 - vhost_iotlb_free(vdpasim->iommu); 607 + vhost_iotlb_free(vdpasim->iommu); 673 608 kfree(vdpasim->vqs); 674 609 kfree(vdpasim->config); 675 610 } ··· 683 620 .set_vq_state = vdpasim_set_vq_state, 684 621 .get_vq_state = vdpasim_get_vq_state, 685 622 .get_vq_align = vdpasim_get_vq_align, 623 + .get_vq_group = vdpasim_get_vq_group, 686 624 .get_device_features = vdpasim_get_device_features, 687 625 .set_driver_features = vdpasim_set_driver_features, 688 626 .get_driver_features = vdpasim_get_driver_features, ··· 699 635 .set_config = vdpasim_set_config, 700 636 .get_generation = vdpasim_get_generation, 701 637 .get_iova_range = vdpasim_get_iova_range, 638 + .set_group_asid = vdpasim_set_group_asid, 702 639 .dma_map = vdpasim_dma_map, 703 640 .dma_unmap = vdpasim_dma_unmap, 704 641 .free = vdpasim_free, ··· 715 650 .set_vq_state = vdpasim_set_vq_state, 716 651 .get_vq_state = vdpasim_get_vq_state, 717 652 .get_vq_align = vdpasim_get_vq_align, 653 + .get_vq_group = vdpasim_get_vq_group, 718 654 .get_device_features = vdpasim_get_device_features, 719 655 .set_driver_features = vdpasim_set_driver_features, 720 656 .get_driver_features = vdpasim_get_driver_features, ··· 731 665 .set_config = vdpasim_set_config, 732 666 .get_generation = vdpasim_get_generation, 733 667 .get_iova_range = vdpasim_get_iova_range, 668 + .set_group_asid = vdpasim_set_group_asid, 734 669 .set_map = vdpasim_set_map, 735 670 .free = vdpasim_free, 736 671 };
+3
drivers/vdpa/vdpa_sim/vdpa_sim.h
··· 41 41 size_t buffer_size; 42 42 int nvqs; 43 43 u32 id; 44 + u32 ngroups; 45 + u32 nas; 44 46 45 47 work_func_t work_fn; 46 48 void (*get_config)(struct vdpasim *vdpasim, void *config); ··· 65 63 u32 status; 66 64 u32 generation; 67 65 u64 features; 66 + u32 groups; 68 67 /* spinlock to synchronize iommu table */ 69 68 spinlock_t iommu_lock; 70 69 };
+135 -34
drivers/vdpa/vdpa_sim/vdpa_sim_net.c
··· 26 26 #define DRV_LICENSE "GPL v2" 27 27 28 28 #define VDPASIM_NET_FEATURES (VDPASIM_FEATURES | \ 29 - (1ULL << VIRTIO_NET_F_MAC)) 29 + (1ULL << VIRTIO_NET_F_MAC) | \ 30 + (1ULL << VIRTIO_NET_F_MTU) | \ 31 + (1ULL << VIRTIO_NET_F_CTRL_VQ) | \ 32 + (1ULL << VIRTIO_NET_F_CTRL_MAC_ADDR)) 30 33 31 - #define VDPASIM_NET_VQ_NUM 2 34 + /* 3 virtqueues, 2 address spaces, 2 virtqueue groups */ 35 + #define VDPASIM_NET_VQ_NUM 3 36 + #define VDPASIM_NET_AS_NUM 2 37 + #define VDPASIM_NET_GROUP_NUM 2 38 + 39 + static void vdpasim_net_complete(struct vdpasim_virtqueue *vq, size_t len) 40 + { 41 + /* Make sure data is wrote before advancing index */ 42 + smp_wmb(); 43 + 44 + vringh_complete_iotlb(&vq->vring, vq->head, len); 45 + 46 + /* Make sure used is visible before rasing the interrupt. */ 47 + smp_wmb(); 48 + 49 + local_bh_disable(); 50 + if (vringh_need_notify_iotlb(&vq->vring) > 0) 51 + vringh_notify(&vq->vring); 52 + local_bh_enable(); 53 + } 54 + 55 + static bool receive_filter(struct vdpasim *vdpasim, size_t len) 56 + { 57 + bool modern = vdpasim->features & (1ULL << VIRTIO_F_VERSION_1); 58 + size_t hdr_len = modern ? sizeof(struct virtio_net_hdr_v1) : 59 + sizeof(struct virtio_net_hdr); 60 + struct virtio_net_config *vio_config = vdpasim->config; 61 + 62 + if (len < ETH_ALEN + hdr_len) 63 + return false; 64 + 65 + if (!strncmp(vdpasim->buffer + hdr_len, vio_config->mac, ETH_ALEN)) 66 + return true; 67 + 68 + return false; 69 + } 70 + 71 + static virtio_net_ctrl_ack vdpasim_handle_ctrl_mac(struct vdpasim *vdpasim, 72 + u8 cmd) 73 + { 74 + struct virtio_net_config *vio_config = vdpasim->config; 75 + struct vdpasim_virtqueue *cvq = &vdpasim->vqs[2]; 76 + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 77 + size_t read; 78 + 79 + switch (cmd) { 80 + case VIRTIO_NET_CTRL_MAC_ADDR_SET: 81 + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->in_iov, 82 + vio_config->mac, ETH_ALEN); 83 + if (read == ETH_ALEN) 84 + status = VIRTIO_NET_OK; 85 + break; 86 + default: 87 + break; 88 + } 89 + 90 + return status; 91 + } 92 + 93 + static void vdpasim_handle_cvq(struct vdpasim *vdpasim) 94 + { 95 + struct vdpasim_virtqueue *cvq = &vdpasim->vqs[2]; 96 + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 97 + struct virtio_net_ctrl_hdr ctrl; 98 + size_t read, write; 99 + int err; 100 + 101 + if (!(vdpasim->features & (1ULL << VIRTIO_NET_F_CTRL_VQ))) 102 + return; 103 + 104 + if (!cvq->ready) 105 + return; 106 + 107 + while (true) { 108 + err = vringh_getdesc_iotlb(&cvq->vring, &cvq->in_iov, 109 + &cvq->out_iov, 110 + &cvq->head, GFP_ATOMIC); 111 + if (err <= 0) 112 + break; 113 + 114 + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->in_iov, &ctrl, 115 + sizeof(ctrl)); 116 + if (read != sizeof(ctrl)) 117 + break; 118 + 119 + switch (ctrl.class) { 120 + case VIRTIO_NET_CTRL_MAC: 121 + status = vdpasim_handle_ctrl_mac(vdpasim, ctrl.cmd); 122 + break; 123 + default: 124 + break; 125 + } 126 + 127 + /* Make sure data is wrote before advancing index */ 128 + smp_wmb(); 129 + 130 + write = vringh_iov_push_iotlb(&cvq->vring, &cvq->out_iov, 131 + &status, sizeof(status)); 132 + vringh_complete_iotlb(&cvq->vring, cvq->head, write); 133 + vringh_kiov_cleanup(&cvq->in_iov); 134 + vringh_kiov_cleanup(&cvq->out_iov); 135 + 136 + /* Make sure used is visible before rasing the interrupt. */ 137 + smp_wmb(); 138 + 139 + local_bh_disable(); 140 + if (cvq->cb) 141 + cvq->cb(cvq->private); 142 + local_bh_enable(); 143 + } 144 + } 32 145 33 146 static void vdpasim_net_work(struct work_struct *work) 34 147 { ··· 149 36 struct vdpasim_virtqueue *txq = &vdpasim->vqs[1]; 150 37 struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0]; 151 38 ssize_t read, write; 152 - size_t total_write; 153 39 int pkts = 0; 154 40 int err; 155 41 ··· 157 45 if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) 158 46 goto out; 159 47 48 + vdpasim_handle_cvq(vdpasim); 49 + 160 50 if (!txq->ready || !rxq->ready) 161 51 goto out; 162 52 163 53 while (true) { 164 - total_write = 0; 165 54 err = vringh_getdesc_iotlb(&txq->vring, &txq->out_iov, NULL, 166 55 &txq->head, GFP_ATOMIC); 167 56 if (err <= 0) 168 57 break; 169 58 59 + read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov, 60 + vdpasim->buffer, 61 + PAGE_SIZE); 62 + 63 + if (!receive_filter(vdpasim, read)) { 64 + vdpasim_net_complete(txq, 0); 65 + continue; 66 + } 67 + 170 68 err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->in_iov, 171 69 &rxq->head, GFP_ATOMIC); 172 70 if (err <= 0) { 173 - vringh_complete_iotlb(&txq->vring, txq->head, 0); 71 + vdpasim_net_complete(txq, 0); 174 72 break; 175 73 } 176 74 177 - while (true) { 178 - read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov, 179 - vdpasim->buffer, 180 - PAGE_SIZE); 181 - if (read <= 0) 182 - break; 75 + write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov, 76 + vdpasim->buffer, read); 77 + if (write <= 0) 78 + break; 183 79 184 - write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov, 185 - vdpasim->buffer, read); 186 - if (write <= 0) 187 - break; 188 - 189 - total_write += write; 190 - } 191 - 192 - /* Make sure data is wrote before advancing index */ 193 - smp_wmb(); 194 - 195 - vringh_complete_iotlb(&txq->vring, txq->head, 0); 196 - vringh_complete_iotlb(&rxq->vring, rxq->head, total_write); 197 - 198 - /* Make sure used is visible before rasing the interrupt. */ 199 - smp_wmb(); 200 - 201 - local_bh_disable(); 202 - if (vringh_need_notify_iotlb(&txq->vring) > 0) 203 - vringh_notify(&txq->vring); 204 - if (vringh_need_notify_iotlb(&rxq->vring) > 0) 205 - vringh_notify(&rxq->vring); 206 - local_bh_enable(); 80 + vdpasim_net_complete(txq, 0); 81 + vdpasim_net_complete(rxq, write); 207 82 208 83 if (++pkts > 4) { 209 84 schedule_work(&vdpasim->work); ··· 244 145 dev_attr.id = VIRTIO_ID_NET; 245 146 dev_attr.supported_features = VDPASIM_NET_FEATURES; 246 147 dev_attr.nvqs = VDPASIM_NET_VQ_NUM; 148 + dev_attr.ngroups = VDPASIM_NET_GROUP_NUM; 149 + dev_attr.nas = VDPASIM_NET_AS_NUM; 247 150 dev_attr.config_size = sizeof(struct virtio_net_config); 248 151 dev_attr.get_config = vdpasim_net_get_config; 249 152 dev_attr.work_fn = vdpasim_net_work;
+2 -1
drivers/vdpa/vdpa_user/vduse_dev.c
··· 693 693 } 694 694 695 695 static int vduse_vdpa_set_map(struct vdpa_device *vdpa, 696 + unsigned int asid, 696 697 struct vhost_iotlb *iotlb) 697 698 { 698 699 struct vduse_dev *dev = vdpa_to_vduse(vdpa); ··· 1496 1495 return -EEXIST; 1497 1496 1498 1497 vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev, 1499 - &vduse_vdpa_config_ops, name, true); 1498 + &vduse_vdpa_config_ops, 1, 1, name, true); 1500 1499 if (IS_ERR(vdev)) 1501 1500 return PTR_ERR(vdev); 1502 1501
+129 -32
drivers/vdpa/virtio_pci/vp_vdpa.c
··· 32 32 33 33 struct vp_vdpa { 34 34 struct vdpa_device vdpa; 35 - struct virtio_pci_modern_device mdev; 35 + struct virtio_pci_modern_device *mdev; 36 36 struct vp_vring *vring; 37 37 struct vdpa_callback config_cb; 38 38 char msix_name[VP_VDPA_NAME_SIZE]; 39 39 int config_irq; 40 40 int queues; 41 41 int vectors; 42 + }; 43 + 44 + struct vp_vdpa_mgmtdev { 45 + struct vdpa_mgmt_dev mgtdev; 46 + struct virtio_pci_modern_device *mdev; 47 + struct vp_vdpa *vp_vdpa; 42 48 }; 43 49 44 50 static struct vp_vdpa *vdpa_to_vp(struct vdpa_device *vdpa) ··· 56 50 { 57 51 struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); 58 52 59 - return &vp_vdpa->mdev; 53 + return vp_vdpa->mdev; 54 + } 55 + 56 + static struct virtio_pci_modern_device *vp_vdpa_to_mdev(struct vp_vdpa *vp_vdpa) 57 + { 58 + return vp_vdpa->mdev; 60 59 } 61 60 62 61 static u64 vp_vdpa_get_device_features(struct vdpa_device *vdpa) ··· 107 96 108 97 static void vp_vdpa_free_irq(struct vp_vdpa *vp_vdpa) 109 98 { 110 - struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; 99 + struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa); 111 100 struct pci_dev *pdev = mdev->pci_dev; 112 101 int i; 113 102 ··· 154 143 155 144 static int vp_vdpa_request_irq(struct vp_vdpa *vp_vdpa) 156 145 { 157 - struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; 146 + struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa); 158 147 struct pci_dev *pdev = mdev->pci_dev; 159 148 int i, ret, irq; 160 149 int queues = vp_vdpa->queues; ··· 209 198 static void vp_vdpa_set_status(struct vdpa_device *vdpa, u8 status) 210 199 { 211 200 struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); 212 - struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; 201 + struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa); 213 202 u8 s = vp_vdpa_get_status(vdpa); 214 203 215 204 if (status & VIRTIO_CONFIG_S_DRIVER_OK && ··· 223 212 static int vp_vdpa_reset(struct vdpa_device *vdpa) 224 213 { 225 214 struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); 226 - struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; 215 + struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa); 227 216 u8 s = vp_vdpa_get_status(vdpa); 228 217 229 218 vp_modern_set_status(mdev, 0); ··· 383 372 void *buf, unsigned int len) 384 373 { 385 374 struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); 386 - struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; 375 + struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa); 387 376 u8 old, new; 388 377 u8 *p; 389 378 int i; ··· 403 392 unsigned int len) 404 393 { 405 394 struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); 406 - struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; 395 + struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa); 407 396 const u8 *p = buf; 408 397 int i; 409 398 ··· 423 412 vp_vdpa_get_vq_notification(struct vdpa_device *vdpa, u16 qid) 424 413 { 425 414 struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); 426 - struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; 415 + struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa); 427 416 struct vdpa_notification_area notify; 428 417 429 418 notify.addr = vp_vdpa->vring[qid].notify_pa; ··· 465 454 pci_free_irq_vectors(data); 466 455 } 467 456 468 - static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) 457 + static int vp_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, 458 + const struct vdpa_dev_set_config *add_config) 469 459 { 470 - struct virtio_pci_modern_device *mdev; 460 + struct vp_vdpa_mgmtdev *vp_vdpa_mgtdev = 461 + container_of(v_mdev, struct vp_vdpa_mgmtdev, mgtdev); 462 + 463 + struct virtio_pci_modern_device *mdev = vp_vdpa_mgtdev->mdev; 464 + struct pci_dev *pdev = mdev->pci_dev; 471 465 struct device *dev = &pdev->dev; 472 - struct vp_vdpa *vp_vdpa; 466 + struct vp_vdpa *vp_vdpa = NULL; 473 467 int ret, i; 474 468 475 - ret = pcim_enable_device(pdev); 476 - if (ret) 477 - return ret; 478 - 479 469 vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa, 480 - dev, &vp_vdpa_ops, NULL, false); 470 + dev, &vp_vdpa_ops, 1, 1, name, false); 471 + 481 472 if (IS_ERR(vp_vdpa)) { 482 473 dev_err(dev, "vp_vdpa: Failed to allocate vDPA structure\n"); 483 474 return PTR_ERR(vp_vdpa); 484 475 } 485 476 486 - mdev = &vp_vdpa->mdev; 487 - mdev->pci_dev = pdev; 488 - 489 - ret = vp_modern_probe(mdev); 490 - if (ret) { 491 - dev_err(&pdev->dev, "Failed to probe modern PCI device\n"); 492 - goto err; 493 - } 494 - 495 - pci_set_master(pdev); 496 - pci_set_drvdata(pdev, vp_vdpa); 477 + vp_vdpa_mgtdev->vp_vdpa = vp_vdpa; 497 478 498 479 vp_vdpa->vdpa.dma_dev = &pdev->dev; 499 480 vp_vdpa->queues = vp_modern_get_num_queues(mdev); 481 + vp_vdpa->mdev = mdev; 500 482 501 483 ret = devm_add_action_or_reset(dev, vp_vdpa_free_irq_vectors, pdev); 502 484 if (ret) { ··· 520 516 } 521 517 vp_vdpa->config_irq = VIRTIO_MSI_NO_VECTOR; 522 518 523 - ret = vdpa_register_device(&vp_vdpa->vdpa, vp_vdpa->queues); 519 + vp_vdpa->vdpa.mdev = &vp_vdpa_mgtdev->mgtdev; 520 + ret = _vdpa_register_device(&vp_vdpa->vdpa, vp_vdpa->queues); 524 521 if (ret) { 525 522 dev_err(&pdev->dev, "Failed to register to vdpa bus\n"); 526 523 goto err; ··· 534 529 return ret; 535 530 } 536 531 532 + static void vp_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, 533 + struct vdpa_device *dev) 534 + { 535 + struct vp_vdpa_mgmtdev *vp_vdpa_mgtdev = 536 + container_of(v_mdev, struct vp_vdpa_mgmtdev, mgtdev); 537 + 538 + struct vp_vdpa *vp_vdpa = vp_vdpa_mgtdev->vp_vdpa; 539 + 540 + _vdpa_unregister_device(&vp_vdpa->vdpa); 541 + vp_vdpa_mgtdev->vp_vdpa = NULL; 542 + } 543 + 544 + static const struct vdpa_mgmtdev_ops vp_vdpa_mdev_ops = { 545 + .dev_add = vp_vdpa_dev_add, 546 + .dev_del = vp_vdpa_dev_del, 547 + }; 548 + 549 + static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) 550 + { 551 + struct vp_vdpa_mgmtdev *vp_vdpa_mgtdev = NULL; 552 + struct vdpa_mgmt_dev *mgtdev; 553 + struct device *dev = &pdev->dev; 554 + struct virtio_pci_modern_device *mdev = NULL; 555 + struct virtio_device_id *mdev_id = NULL; 556 + int err; 557 + 558 + vp_vdpa_mgtdev = kzalloc(sizeof(*vp_vdpa_mgtdev), GFP_KERNEL); 559 + if (!vp_vdpa_mgtdev) 560 + return -ENOMEM; 561 + 562 + mgtdev = &vp_vdpa_mgtdev->mgtdev; 563 + mgtdev->ops = &vp_vdpa_mdev_ops; 564 + mgtdev->device = dev; 565 + 566 + mdev = kzalloc(sizeof(struct virtio_pci_modern_device), GFP_KERNEL); 567 + if (!mdev) { 568 + err = -ENOMEM; 569 + goto mdev_err; 570 + } 571 + 572 + mdev_id = kzalloc(sizeof(struct virtio_device_id), GFP_KERNEL); 573 + if (!mdev_id) { 574 + err = -ENOMEM; 575 + goto mdev_id_err; 576 + } 577 + 578 + vp_vdpa_mgtdev->mdev = mdev; 579 + mdev->pci_dev = pdev; 580 + 581 + err = pcim_enable_device(pdev); 582 + if (err) { 583 + goto probe_err; 584 + } 585 + 586 + err = vp_modern_probe(mdev); 587 + if (err) { 588 + dev_err(&pdev->dev, "Failed to probe modern PCI device\n"); 589 + goto probe_err; 590 + } 591 + 592 + mdev_id->device = mdev->id.device; 593 + mdev_id->vendor = mdev->id.vendor; 594 + mgtdev->id_table = mdev_id; 595 + mgtdev->max_supported_vqs = vp_modern_get_num_queues(mdev); 596 + mgtdev->supported_features = vp_modern_get_features(mdev); 597 + pci_set_master(pdev); 598 + pci_set_drvdata(pdev, vp_vdpa_mgtdev); 599 + 600 + err = vdpa_mgmtdev_register(mgtdev); 601 + if (err) { 602 + dev_err(&pdev->dev, "Failed to register vdpa mgmtdev device\n"); 603 + goto register_err; 604 + } 605 + 606 + return 0; 607 + 608 + register_err: 609 + vp_modern_remove(vp_vdpa_mgtdev->mdev); 610 + probe_err: 611 + kfree(mdev_id); 612 + mdev_id_err: 613 + kfree(mdev); 614 + mdev_err: 615 + kfree(vp_vdpa_mgtdev); 616 + return err; 617 + } 618 + 537 619 static void vp_vdpa_remove(struct pci_dev *pdev) 538 620 { 539 - struct vp_vdpa *vp_vdpa = pci_get_drvdata(pdev); 621 + struct vp_vdpa_mgmtdev *vp_vdpa_mgtdev = pci_get_drvdata(pdev); 622 + struct virtio_pci_modern_device *mdev = NULL; 540 623 541 - vp_modern_remove(&vp_vdpa->mdev); 542 - vdpa_unregister_device(&vp_vdpa->vdpa); 624 + mdev = vp_vdpa_mgtdev->mdev; 625 + vp_modern_remove(mdev); 626 + vdpa_mgmtdev_unregister(&vp_vdpa_mgtdev->mgtdev); 627 + kfree(&vp_vdpa_mgtdev->mgtdev.id_table); 628 + kfree(mdev); 629 + kfree(vp_vdpa_mgtdev); 543 630 } 544 631 545 632 static struct pci_driver vp_vdpa_driver = {
+18 -5
drivers/vhost/iotlb.c
··· 126 126 EXPORT_SYMBOL_GPL(vhost_iotlb_del_range); 127 127 128 128 /** 129 + * vhost_iotlb_init - initialize a vhost IOTLB 130 + * @iotlb: the IOTLB that needs to be initialized 131 + * @limit: maximum number of IOTLB entries 132 + * @flags: VHOST_IOTLB_FLAG_XXX 133 + */ 134 + void vhost_iotlb_init(struct vhost_iotlb *iotlb, unsigned int limit, 135 + unsigned int flags) 136 + { 137 + iotlb->root = RB_ROOT_CACHED; 138 + iotlb->limit = limit; 139 + iotlb->nmaps = 0; 140 + iotlb->flags = flags; 141 + INIT_LIST_HEAD(&iotlb->list); 142 + } 143 + EXPORT_SYMBOL_GPL(vhost_iotlb_init); 144 + 145 + /** 129 146 * vhost_iotlb_alloc - add a new vhost IOTLB 130 147 * @limit: maximum number of IOTLB entries 131 148 * @flags: VHOST_IOTLB_FLAG_XXX ··· 156 139 if (!iotlb) 157 140 return NULL; 158 141 159 - iotlb->root = RB_ROOT_CACHED; 160 - iotlb->limit = limit; 161 - iotlb->nmaps = 0; 162 - iotlb->flags = flags; 163 - INIT_LIST_HEAD(&iotlb->list); 142 + vhost_iotlb_init(iotlb, limit, flags); 164 143 165 144 return iotlb; 166 145 }
+2 -9
drivers/vhost/net.c
··· 1374 1374 *rx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_RX].vq); 1375 1375 } 1376 1376 1377 - static void vhost_net_flush_vq(struct vhost_net *n, int index) 1378 - { 1379 - vhost_poll_flush(n->poll + index); 1380 - vhost_poll_flush(&n->vqs[index].vq.poll); 1381 - } 1382 - 1383 1377 static void vhost_net_flush(struct vhost_net *n) 1384 1378 { 1385 - vhost_net_flush_vq(n, VHOST_NET_VQ_TX); 1386 - vhost_net_flush_vq(n, VHOST_NET_VQ_RX); 1379 + vhost_dev_flush(&n->dev); 1387 1380 if (n->vqs[VHOST_NET_VQ_TX].ubufs) { 1388 1381 mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); 1389 1382 n->tx_flush = true; ··· 1565 1572 } 1566 1573 1567 1574 if (oldsock) { 1568 - vhost_net_flush_vq(n, index); 1575 + vhost_dev_flush(&n->dev); 1569 1576 sockfd_put(oldsock); 1570 1577 } 1571 1578
+1 -3
drivers/vhost/scsi.c
··· 1436 1436 kref_put(&old_inflight[i]->kref, vhost_scsi_done_inflight); 1437 1437 1438 1438 /* Flush both the vhost poll and vhost work */ 1439 - vhost_work_dev_flush(&vs->dev); 1439 + vhost_dev_flush(&vs->dev); 1440 1440 1441 1441 /* Wait for all reqs issued before the flush to be finished */ 1442 1442 for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) ··· 1827 1827 vhost_scsi_clear_endpoint(vs, &t); 1828 1828 vhost_dev_stop(&vs->dev); 1829 1829 vhost_dev_cleanup(&vs->dev); 1830 - /* Jobs can re-queue themselves in evt kick handler. Do extra flush. */ 1831 - vhost_scsi_flush(vs); 1832 1830 kfree(vs->dev.vqs); 1833 1831 kvfree(vs); 1834 1832 return 0;
+3 -11
drivers/vhost/test.c
··· 144 144 *privatep = vhost_test_stop_vq(n, n->vqs + VHOST_TEST_VQ); 145 145 } 146 146 147 - static void vhost_test_flush_vq(struct vhost_test *n, int index) 148 - { 149 - vhost_poll_flush(&n->vqs[index].poll); 150 - } 151 - 152 147 static void vhost_test_flush(struct vhost_test *n) 153 148 { 154 - vhost_test_flush_vq(n, VHOST_TEST_VQ); 149 + vhost_dev_flush(&n->dev); 155 150 } 156 151 157 152 static int vhost_test_release(struct inode *inode, struct file *f) ··· 158 163 vhost_test_flush(n); 159 164 vhost_dev_stop(&n->dev); 160 165 vhost_dev_cleanup(&n->dev); 161 - /* We do an extra flush before freeing memory, 162 - * since jobs can re-queue themselves. */ 163 - vhost_test_flush(n); 164 166 kfree(n->dev.vqs); 165 167 kfree(n); 166 168 return 0; ··· 202 210 goto err; 203 211 204 212 if (oldpriv) { 205 - vhost_test_flush_vq(n, index); 213 + vhost_test_flush(n); 206 214 } 207 215 } 208 216 ··· 295 303 mutex_unlock(&vq->mutex); 296 304 297 305 if (enable) { 298 - vhost_test_flush_vq(n, index); 306 + vhost_test_flush(n); 299 307 } 300 308 301 309 mutex_unlock(&n->dev.mutex);
+216 -55
drivers/vhost/vdpa.c
··· 28 28 enum { 29 29 VHOST_VDPA_BACKEND_FEATURES = 30 30 (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) | 31 - (1ULL << VHOST_BACKEND_F_IOTLB_BATCH), 31 + (1ULL << VHOST_BACKEND_F_IOTLB_BATCH) | 32 + (1ULL << VHOST_BACKEND_F_IOTLB_ASID), 32 33 }; 33 34 34 35 #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) 36 + 37 + #define VHOST_VDPA_IOTLB_BUCKETS 16 38 + 39 + struct vhost_vdpa_as { 40 + struct hlist_node hash_link; 41 + struct vhost_iotlb iotlb; 42 + u32 id; 43 + }; 35 44 36 45 struct vhost_vdpa { 37 46 struct vhost_dev vdev; ··· 48 39 struct vhost_virtqueue *vqs; 49 40 struct completion completion; 50 41 struct vdpa_device *vdpa; 42 + struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS]; 51 43 struct device dev; 52 44 struct cdev cdev; 53 45 atomic_t opened; ··· 58 48 struct eventfd_ctx *config_ctx; 59 49 int in_batch; 60 50 struct vdpa_iova_range range; 51 + u32 batch_asid; 61 52 }; 62 53 63 54 static DEFINE_IDA(vhost_vdpa_ida); 64 55 65 56 static dev_t vhost_vdpa_major; 57 + 58 + static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb) 59 + { 60 + struct vhost_vdpa_as *as = container_of(iotlb, struct 61 + vhost_vdpa_as, iotlb); 62 + return as->id; 63 + } 64 + 65 + static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid) 66 + { 67 + struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; 68 + struct vhost_vdpa_as *as; 69 + 70 + hlist_for_each_entry(as, head, hash_link) 71 + if (as->id == asid) 72 + return as; 73 + 74 + return NULL; 75 + } 76 + 77 + static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid) 78 + { 79 + struct vhost_vdpa_as *as = asid_to_as(v, asid); 80 + 81 + if (!as) 82 + return NULL; 83 + 84 + return &as->iotlb; 85 + } 86 + 87 + static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid) 88 + { 89 + struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; 90 + struct vhost_vdpa_as *as; 91 + 92 + if (asid_to_as(v, asid)) 93 + return NULL; 94 + 95 + if (asid >= v->vdpa->nas) 96 + return NULL; 97 + 98 + as = kmalloc(sizeof(*as), GFP_KERNEL); 99 + if (!as) 100 + return NULL; 101 + 102 + vhost_iotlb_init(&as->iotlb, 0, 0); 103 + as->id = asid; 104 + hlist_add_head(&as->hash_link, head); 105 + 106 + return as; 107 + } 108 + 109 + static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v, 110 + u32 asid) 111 + { 112 + struct vhost_vdpa_as *as = asid_to_as(v, asid); 113 + 114 + if (as) 115 + return as; 116 + 117 + return vhost_vdpa_alloc_as(v, asid); 118 + } 119 + 120 + static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) 121 + { 122 + struct vhost_vdpa_as *as = asid_to_as(v, asid); 123 + 124 + if (!as) 125 + return -EINVAL; 126 + 127 + hlist_del(&as->hash_link); 128 + vhost_iotlb_reset(&as->iotlb); 129 + kfree(as); 130 + 131 + return 0; 132 + } 66 133 67 134 static void handle_vq_kick(struct vhost_work *work) 68 135 { ··· 498 411 return -EFAULT; 499 412 ops->set_vq_ready(vdpa, idx, s.num); 500 413 return 0; 414 + case VHOST_VDPA_GET_VRING_GROUP: 415 + s.index = idx; 416 + s.num = ops->get_vq_group(vdpa, idx); 417 + if (s.num >= vdpa->ngroups) 418 + return -EIO; 419 + else if (copy_to_user(argp, &s, sizeof(s))) 420 + return -EFAULT; 421 + return 0; 422 + case VHOST_VDPA_SET_GROUP_ASID: 423 + if (copy_from_user(&s, argp, sizeof(s))) 424 + return -EFAULT; 425 + if (s.num >= vdpa->nas) 426 + return -EINVAL; 427 + if (!ops->set_group_asid) 428 + return -EOPNOTSUPP; 429 + return ops->set_group_asid(vdpa, idx, s.num); 501 430 case VHOST_GET_VRING_BASE: 502 431 r = ops->get_vq_state(v->vdpa, idx, &vq_state); 503 432 if (r) ··· 608 505 case VHOST_VDPA_GET_VRING_NUM: 609 506 r = vhost_vdpa_get_vring_num(v, argp); 610 507 break; 508 + case VHOST_VDPA_GET_GROUP_NUM: 509 + if (copy_to_user(argp, &v->vdpa->ngroups, 510 + sizeof(v->vdpa->ngroups))) 511 + r = -EFAULT; 512 + break; 513 + case VHOST_VDPA_GET_AS_NUM: 514 + if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas))) 515 + r = -EFAULT; 516 + break; 611 517 case VHOST_SET_LOG_BASE: 612 518 case VHOST_SET_LOG_FD: 613 519 r = -ENOIOCTLCMD; ··· 649 537 return r; 650 538 } 651 539 652 - static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last) 540 + static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, 541 + struct vhost_iotlb *iotlb, 542 + u64 start, u64 last) 653 543 { 654 544 struct vhost_dev *dev = &v->vdev; 655 - struct vhost_iotlb *iotlb = dev->iotlb; 656 545 struct vhost_iotlb_map *map; 657 546 struct page *page; 658 547 unsigned long pfn, pinned; ··· 672 559 } 673 560 } 674 561 675 - static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, u64 start, u64 last) 562 + static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, 563 + struct vhost_iotlb *iotlb, 564 + u64 start, u64 last) 676 565 { 677 - struct vhost_dev *dev = &v->vdev; 678 - struct vhost_iotlb *iotlb = dev->iotlb; 679 566 struct vhost_iotlb_map *map; 680 567 struct vdpa_map_file *map_file; 681 568 ··· 687 574 } 688 575 } 689 576 690 - static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) 577 + static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, 578 + struct vhost_iotlb *iotlb, 579 + u64 start, u64 last) 691 580 { 692 581 struct vdpa_device *vdpa = v->vdpa; 693 582 694 583 if (vdpa->use_va) 695 - return vhost_vdpa_va_unmap(v, start, last); 584 + return vhost_vdpa_va_unmap(v, iotlb, start, last); 696 585 697 - return vhost_vdpa_pa_unmap(v, start, last); 698 - } 699 - 700 - static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v) 701 - { 702 - struct vhost_dev *dev = &v->vdev; 703 - 704 - vhost_vdpa_iotlb_unmap(v, 0ULL, 0ULL - 1); 705 - kfree(dev->iotlb); 706 - dev->iotlb = NULL; 586 + return vhost_vdpa_pa_unmap(v, iotlb, start, last); 707 587 } 708 588 709 589 static int perm_to_iommu_flags(u32 perm) ··· 721 615 return flags | IOMMU_CACHE; 722 616 } 723 617 724 - static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova, 725 - u64 size, u64 pa, u32 perm, void *opaque) 618 + static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, 619 + u64 iova, u64 size, u64 pa, u32 perm, void *opaque) 726 620 { 727 621 struct vhost_dev *dev = &v->vdev; 728 622 struct vdpa_device *vdpa = v->vdpa; 729 623 const struct vdpa_config_ops *ops = vdpa->config; 624 + u32 asid = iotlb_to_asid(iotlb); 730 625 int r = 0; 731 626 732 - r = vhost_iotlb_add_range_ctx(dev->iotlb, iova, iova + size - 1, 627 + r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1, 733 628 pa, perm, opaque); 734 629 if (r) 735 630 return r; 736 631 737 632 if (ops->dma_map) { 738 - r = ops->dma_map(vdpa, iova, size, pa, perm, opaque); 633 + r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque); 739 634 } else if (ops->set_map) { 740 635 if (!v->in_batch) 741 - r = ops->set_map(vdpa, dev->iotlb); 636 + r = ops->set_map(vdpa, asid, iotlb); 742 637 } else { 743 638 r = iommu_map(v->domain, iova, pa, size, 744 639 perm_to_iommu_flags(perm)); 745 640 } 746 641 if (r) { 747 - vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); 642 + vhost_iotlb_del_range(iotlb, iova, iova + size - 1); 748 643 return r; 749 644 } 750 645 ··· 755 648 return 0; 756 649 } 757 650 758 - static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) 651 + static void vhost_vdpa_unmap(struct vhost_vdpa *v, 652 + struct vhost_iotlb *iotlb, 653 + u64 iova, u64 size) 759 654 { 760 - struct vhost_dev *dev = &v->vdev; 761 655 struct vdpa_device *vdpa = v->vdpa; 762 656 const struct vdpa_config_ops *ops = vdpa->config; 657 + u32 asid = iotlb_to_asid(iotlb); 763 658 764 - vhost_vdpa_iotlb_unmap(v, iova, iova + size - 1); 659 + vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1); 765 660 766 661 if (ops->dma_map) { 767 - ops->dma_unmap(vdpa, iova, size); 662 + ops->dma_unmap(vdpa, asid, iova, size); 768 663 } else if (ops->set_map) { 769 664 if (!v->in_batch) 770 - ops->set_map(vdpa, dev->iotlb); 665 + ops->set_map(vdpa, asid, iotlb); 771 666 } else { 772 667 iommu_unmap(v->domain, iova, size); 773 668 } 669 + 670 + /* If we are in the middle of batch processing, delay the free 671 + * of AS until BATCH_END. 672 + */ 673 + if (!v->in_batch && !iotlb->nmaps) 674 + vhost_vdpa_remove_as(v, asid); 774 675 } 775 676 776 677 static int vhost_vdpa_va_map(struct vhost_vdpa *v, 678 + struct vhost_iotlb *iotlb, 777 679 u64 iova, u64 size, u64 uaddr, u32 perm) 778 680 { 779 681 struct vhost_dev *dev = &v->vdev; ··· 812 696 offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start; 813 697 map_file->offset = offset; 814 698 map_file->file = get_file(vma->vm_file); 815 - ret = vhost_vdpa_map(v, map_iova, map_size, uaddr, 699 + ret = vhost_vdpa_map(v, iotlb, map_iova, map_size, uaddr, 816 700 perm, map_file); 817 701 if (ret) { 818 702 fput(map_file->file); ··· 825 709 map_iova += map_size; 826 710 } 827 711 if (ret) 828 - vhost_vdpa_unmap(v, iova, map_iova - iova); 712 + vhost_vdpa_unmap(v, iotlb, iova, map_iova - iova); 829 713 830 714 mmap_read_unlock(dev->mm); 831 715 ··· 833 717 } 834 718 835 719 static int vhost_vdpa_pa_map(struct vhost_vdpa *v, 720 + struct vhost_iotlb *iotlb, 836 721 u64 iova, u64 size, u64 uaddr, u32 perm) 837 722 { 838 723 struct vhost_dev *dev = &v->vdev; ··· 897 780 if (last_pfn && (this_pfn != last_pfn + 1)) { 898 781 /* Pin a contiguous chunk of memory */ 899 782 csize = PFN_PHYS(last_pfn - map_pfn + 1); 900 - ret = vhost_vdpa_map(v, iova, csize, 783 + ret = vhost_vdpa_map(v, iotlb, iova, csize, 901 784 PFN_PHYS(map_pfn), 902 785 perm, NULL); 903 786 if (ret) { ··· 927 810 } 928 811 929 812 /* Pin the rest chunk */ 930 - ret = vhost_vdpa_map(v, iova, PFN_PHYS(last_pfn - map_pfn + 1), 813 + ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1), 931 814 PFN_PHYS(map_pfn), perm, NULL); 932 815 out: 933 816 if (ret) { ··· 947 830 for (pfn = map_pfn; pfn <= last_pfn; pfn++) 948 831 unpin_user_page(pfn_to_page(pfn)); 949 832 } 950 - vhost_vdpa_unmap(v, start, size); 833 + vhost_vdpa_unmap(v, iotlb, start, size); 951 834 } 952 835 unlock: 953 836 mmap_read_unlock(dev->mm); ··· 958 841 } 959 842 960 843 static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, 844 + struct vhost_iotlb *iotlb, 961 845 struct vhost_iotlb_msg *msg) 962 846 { 963 - struct vhost_dev *dev = &v->vdev; 964 847 struct vdpa_device *vdpa = v->vdpa; 965 - struct vhost_iotlb *iotlb = dev->iotlb; 966 848 967 849 if (msg->iova < v->range.first || !msg->size || 968 850 msg->iova > U64_MAX - msg->size + 1 || ··· 973 857 return -EEXIST; 974 858 975 859 if (vdpa->use_va) 976 - return vhost_vdpa_va_map(v, msg->iova, msg->size, 860 + return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size, 977 861 msg->uaddr, msg->perm); 978 862 979 - return vhost_vdpa_pa_map(v, msg->iova, msg->size, msg->uaddr, 863 + return vhost_vdpa_pa_map(v, iotlb, msg->iova, msg->size, msg->uaddr, 980 864 msg->perm); 981 865 } 982 866 983 - static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, 867 + static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid, 984 868 struct vhost_iotlb_msg *msg) 985 869 { 986 870 struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); 987 871 struct vdpa_device *vdpa = v->vdpa; 988 872 const struct vdpa_config_ops *ops = vdpa->config; 873 + struct vhost_iotlb *iotlb = NULL; 874 + struct vhost_vdpa_as *as = NULL; 989 875 int r = 0; 990 876 991 877 mutex_lock(&dev->mutex); ··· 996 878 if (r) 997 879 goto unlock; 998 880 881 + if (msg->type == VHOST_IOTLB_UPDATE || 882 + msg->type == VHOST_IOTLB_BATCH_BEGIN) { 883 + as = vhost_vdpa_find_alloc_as(v, asid); 884 + if (!as) { 885 + dev_err(&v->dev, "can't find and alloc asid %d\n", 886 + asid); 887 + r = -EINVAL; 888 + goto unlock; 889 + } 890 + iotlb = &as->iotlb; 891 + } else 892 + iotlb = asid_to_iotlb(v, asid); 893 + 894 + if ((v->in_batch && v->batch_asid != asid) || !iotlb) { 895 + if (v->in_batch && v->batch_asid != asid) { 896 + dev_info(&v->dev, "batch id %d asid %d\n", 897 + v->batch_asid, asid); 898 + } 899 + if (!iotlb) 900 + dev_err(&v->dev, "no iotlb for asid %d\n", asid); 901 + r = -EINVAL; 902 + goto unlock; 903 + } 904 + 999 905 switch (msg->type) { 1000 906 case VHOST_IOTLB_UPDATE: 1001 - r = vhost_vdpa_process_iotlb_update(v, msg); 907 + r = vhost_vdpa_process_iotlb_update(v, iotlb, msg); 1002 908 break; 1003 909 case VHOST_IOTLB_INVALIDATE: 1004 - vhost_vdpa_unmap(v, msg->iova, msg->size); 910 + vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size); 1005 911 break; 1006 912 case VHOST_IOTLB_BATCH_BEGIN: 913 + v->batch_asid = asid; 1007 914 v->in_batch = true; 1008 915 break; 1009 916 case VHOST_IOTLB_BATCH_END: 1010 917 if (v->in_batch && ops->set_map) 1011 - ops->set_map(vdpa, dev->iotlb); 918 + ops->set_map(vdpa, asid, iotlb); 1012 919 v->in_batch = false; 920 + if (!iotlb->nmaps) 921 + vhost_vdpa_remove_as(v, asid); 1013 922 break; 1014 923 default: 1015 924 r = -EINVAL; ··· 1122 977 } 1123 978 } 1124 979 980 + static void vhost_vdpa_cleanup(struct vhost_vdpa *v) 981 + { 982 + struct vhost_vdpa_as *as; 983 + u32 asid; 984 + 985 + vhost_dev_cleanup(&v->vdev); 986 + kfree(v->vdev.vqs); 987 + 988 + for (asid = 0; asid < v->vdpa->nas; asid++) { 989 + as = asid_to_as(v, asid); 990 + if (as) 991 + vhost_vdpa_remove_as(v, asid); 992 + } 993 + } 994 + 1125 995 static int vhost_vdpa_open(struct inode *inode, struct file *filep) 1126 996 { 1127 997 struct vhost_vdpa *v; ··· 1170 1010 vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, 1171 1011 vhost_vdpa_process_iotlb_msg); 1172 1012 1173 - dev->iotlb = vhost_iotlb_alloc(0, 0); 1174 - if (!dev->iotlb) { 1175 - r = -ENOMEM; 1176 - goto err_init_iotlb; 1177 - } 1178 - 1179 1013 r = vhost_vdpa_alloc_domain(v); 1180 1014 if (r) 1181 - goto err_init_iotlb; 1015 + goto err_alloc_domain; 1182 1016 1183 1017 vhost_vdpa_set_iova_range(v); 1184 1018 ··· 1180 1026 1181 1027 return 0; 1182 1028 1183 - err_init_iotlb: 1184 - vhost_dev_cleanup(&v->vdev); 1185 - kfree(vqs); 1029 + err_alloc_domain: 1030 + vhost_vdpa_cleanup(v); 1186 1031 err: 1187 1032 atomic_dec(&v->opened); 1188 1033 return r; ··· 1205 1052 vhost_vdpa_clean_irq(v); 1206 1053 vhost_vdpa_reset(v); 1207 1054 vhost_dev_stop(&v->vdev); 1208 - vhost_vdpa_iotlb_free(v); 1209 1055 vhost_vdpa_free_domain(v); 1210 1056 vhost_vdpa_config_put(v); 1211 1057 vhost_dev_cleanup(&v->vdev); 1212 - kfree(v->vdev.vqs); 1213 1058 mutex_unlock(&d->mutex); 1214 1059 1215 1060 atomic_dec(&v->opened); ··· 1303 1152 const struct vdpa_config_ops *ops = vdpa->config; 1304 1153 struct vhost_vdpa *v; 1305 1154 int minor; 1306 - int r; 1155 + int i, r; 1156 + 1157 + /* We can't support platform IOMMU device with more than 1 1158 + * group or as 1159 + */ 1160 + if (!ops->set_map && !ops->dma_map && 1161 + (vdpa->ngroups > 1 || vdpa->nas > 1)) 1162 + return -EOPNOTSUPP; 1307 1163 1308 1164 v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); 1309 1165 if (!v) ··· 1353 1195 1354 1196 init_completion(&v->completion); 1355 1197 vdpa_set_drvdata(vdpa, v); 1198 + 1199 + for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++) 1200 + INIT_HLIST_HEAD(&v->as[i]); 1356 1201 1357 1202 return 0; 1358 1203
+25 -20
drivers/vhost/vhost.c
··· 231 231 } 232 232 EXPORT_SYMBOL_GPL(vhost_poll_stop); 233 233 234 - void vhost_work_dev_flush(struct vhost_dev *dev) 234 + void vhost_dev_flush(struct vhost_dev *dev) 235 235 { 236 236 struct vhost_flush_struct flush; 237 237 ··· 243 243 wait_for_completion(&flush.wait_event); 244 244 } 245 245 } 246 - EXPORT_SYMBOL_GPL(vhost_work_dev_flush); 247 - 248 - /* Flush any work that has been scheduled. When calling this, don't hold any 249 - * locks that are also used by the callback. */ 250 - void vhost_poll_flush(struct vhost_poll *poll) 251 - { 252 - vhost_work_dev_flush(poll->dev); 253 - } 254 - EXPORT_SYMBOL_GPL(vhost_poll_flush); 246 + EXPORT_SYMBOL_GPL(vhost_dev_flush); 255 247 256 248 void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) 257 249 { ··· 460 468 struct vhost_virtqueue **vqs, int nvqs, 461 469 int iov_limit, int weight, int byte_weight, 462 470 bool use_worker, 463 - int (*msg_handler)(struct vhost_dev *dev, 471 + int (*msg_handler)(struct vhost_dev *dev, u32 asid, 464 472 struct vhost_iotlb_msg *msg)) 465 473 { 466 474 struct vhost_virtqueue *vq; ··· 530 538 attach.owner = current; 531 539 vhost_work_init(&attach.work, vhost_attach_cgroups_work); 532 540 vhost_work_queue(dev, &attach.work); 533 - vhost_work_dev_flush(dev); 541 + vhost_dev_flush(dev); 534 542 return attach.ret; 535 543 } 536 544 ··· 653 661 int i; 654 662 655 663 for (i = 0; i < dev->nvqs; ++i) { 656 - if (dev->vqs[i]->kick && dev->vqs[i]->handle_kick) { 664 + if (dev->vqs[i]->kick && dev->vqs[i]->handle_kick) 657 665 vhost_poll_stop(&dev->vqs[i]->poll); 658 - vhost_poll_flush(&dev->vqs[i]->poll); 659 - } 660 666 } 667 + 668 + vhost_dev_flush(dev); 661 669 } 662 670 EXPORT_SYMBOL_GPL(vhost_dev_stop); 663 671 ··· 1082 1090 return true; 1083 1091 } 1084 1092 1085 - static int vhost_process_iotlb_msg(struct vhost_dev *dev, 1093 + static int vhost_process_iotlb_msg(struct vhost_dev *dev, u32 asid, 1086 1094 struct vhost_iotlb_msg *msg) 1087 1095 { 1088 1096 int ret = 0; 1097 + 1098 + if (asid != 0) 1099 + return -EINVAL; 1089 1100 1090 1101 mutex_lock(&dev->mutex); 1091 1102 vhost_dev_lock_vqs(dev); ··· 1136 1141 struct vhost_iotlb_msg msg; 1137 1142 size_t offset; 1138 1143 int type, ret; 1144 + u32 asid = 0; 1139 1145 1140 1146 ret = copy_from_iter(&type, sizeof(type), from); 1141 1147 if (ret != sizeof(type)) { ··· 1152 1156 offset = offsetof(struct vhost_msg, iotlb) - sizeof(int); 1153 1157 break; 1154 1158 case VHOST_IOTLB_MSG_V2: 1155 - offset = sizeof(__u32); 1159 + if (vhost_backend_has_feature(dev->vqs[0], 1160 + VHOST_BACKEND_F_IOTLB_ASID)) { 1161 + ret = copy_from_iter(&asid, sizeof(asid), from); 1162 + if (ret != sizeof(asid)) { 1163 + ret = -EINVAL; 1164 + goto done; 1165 + } 1166 + offset = 0; 1167 + } else 1168 + offset = sizeof(__u32); 1156 1169 break; 1157 1170 default: 1158 1171 ret = -EINVAL; ··· 1183 1178 } 1184 1179 1185 1180 if (dev->msg_handler) 1186 - ret = dev->msg_handler(dev, &msg); 1181 + ret = dev->msg_handler(dev, asid, &msg); 1187 1182 else 1188 - ret = vhost_process_iotlb_msg(dev, &msg); 1183 + ret = vhost_process_iotlb_msg(dev, asid, &msg); 1189 1184 if (ret) { 1190 1185 ret = -EFAULT; 1191 1186 goto done; ··· 1724 1719 mutex_unlock(&vq->mutex); 1725 1720 1726 1721 if (pollstop && vq->handle_kick) 1727 - vhost_poll_flush(&vq->poll); 1722 + vhost_dev_flush(vq->poll.dev); 1728 1723 return r; 1729 1724 } 1730 1725 EXPORT_SYMBOL_GPL(vhost_vring_ioctl);
+3 -4
drivers/vhost/vhost.h
··· 44 44 __poll_t mask, struct vhost_dev *dev); 45 45 int vhost_poll_start(struct vhost_poll *poll, struct file *file); 46 46 void vhost_poll_stop(struct vhost_poll *poll); 47 - void vhost_poll_flush(struct vhost_poll *poll); 48 47 void vhost_poll_queue(struct vhost_poll *poll); 49 - void vhost_work_dev_flush(struct vhost_dev *dev); 48 + void vhost_dev_flush(struct vhost_dev *dev); 50 49 51 50 struct vhost_log { 52 51 u64 addr; ··· 160 161 int byte_weight; 161 162 u64 kcov_handle; 162 163 bool use_worker; 163 - int (*msg_handler)(struct vhost_dev *dev, 164 + int (*msg_handler)(struct vhost_dev *dev, u32 asid, 164 165 struct vhost_iotlb_msg *msg); 165 166 }; 166 167 ··· 168 169 void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, 169 170 int nvqs, int iov_limit, int weight, int byte_weight, 170 171 bool use_worker, 171 - int (*msg_handler)(struct vhost_dev *dev, 172 + int (*msg_handler)(struct vhost_dev *dev, u32 asid, 172 173 struct vhost_iotlb_msg *msg)); 173 174 long vhost_dev_set_owner(struct vhost_dev *dev); 174 175 bool vhost_dev_has_owner(struct vhost_dev *dev);
+1 -6
drivers/vhost/vsock.c
··· 705 705 706 706 static void vhost_vsock_flush(struct vhost_vsock *vsock) 707 707 { 708 - int i; 709 - 710 - for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) 711 - if (vsock->vqs[i].handle_kick) 712 - vhost_poll_flush(&vsock->vqs[i].poll); 713 - vhost_work_dev_flush(&vsock->dev); 708 + vhost_dev_flush(&vsock->dev); 714 709 } 715 710 716 711 static void vhost_vsock_reset_orphans(struct sock *sk)
+21 -11
drivers/virtio/virtio.c
··· 169 169 /* Do some validation, then set FEATURES_OK */ 170 170 static int virtio_features_ok(struct virtio_device *dev) 171 171 { 172 - unsigned status; 172 + unsigned int status; 173 173 int ret; 174 174 175 175 might_sleep(); ··· 220 220 * */ 221 221 void virtio_reset_device(struct virtio_device *dev) 222 222 { 223 + /* 224 + * The below virtio_synchronize_cbs() guarantees that any 225 + * interrupt for this line arriving after 226 + * virtio_synchronize_vqs() has completed is guaranteed to see 227 + * vq->broken as true. 228 + */ 229 + virtio_break_device(dev); 230 + virtio_synchronize_cbs(dev); 231 + 223 232 dev->config->reset(dev); 224 233 } 225 234 EXPORT_SYMBOL_GPL(virtio_reset_device); ··· 422 413 device_initialize(&dev->dev); 423 414 424 415 /* Assign a unique device index and hence name. */ 425 - err = ida_simple_get(&virtio_index_ida, 0, 0, GFP_KERNEL); 416 + err = ida_alloc(&virtio_index_ida, GFP_KERNEL); 426 417 if (err < 0) 427 418 goto out; 428 419 ··· 437 428 dev->config_enabled = false; 438 429 dev->config_change_pending = false; 439 430 431 + INIT_LIST_HEAD(&dev->vqs); 432 + spin_lock_init(&dev->vqs_list_lock); 433 + 440 434 /* We always start by resetting the device, in case a previous 441 435 * driver messed it up. This also tests that code path a little. */ 442 - dev->config->reset(dev); 436 + virtio_reset_device(dev); 443 437 444 438 /* Acknowledge that we've seen the device. */ 445 439 virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); 446 - 447 - INIT_LIST_HEAD(&dev->vqs); 448 - spin_lock_init(&dev->vqs_list_lock); 449 440 450 441 /* 451 442 * device_add() causes the bus infrastructure to look for a matching ··· 460 451 out_of_node_put: 461 452 of_node_put(dev->dev.of_node); 462 453 out_ida_remove: 463 - ida_simple_remove(&virtio_index_ida, dev->index); 454 + ida_free(&virtio_index_ida, dev->index); 464 455 out: 465 456 virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); 466 457 return err; ··· 478 469 int index = dev->index; /* save for after device release */ 479 470 480 471 device_unregister(&dev->dev); 481 - ida_simple_remove(&virtio_index_ida, index); 472 + ida_free(&virtio_index_ida, index); 482 473 } 483 474 EXPORT_SYMBOL_GPL(unregister_virtio_device); 484 475 ··· 505 496 506 497 /* We always start by resetting the device, in case a previous 507 498 * driver messed it up. */ 508 - dev->config->reset(dev); 499 + virtio_reset_device(dev); 509 500 510 501 /* Acknowledge that we've seen the device. */ 511 502 virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); ··· 535 526 goto err; 536 527 } 537 528 538 - /* Finally, tell the device we're all set */ 539 - virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); 529 + /* If restore didn't do it, mark device DRIVER_OK ourselves. */ 530 + if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK)) 531 + virtio_device_ready(dev); 540 532 541 533 virtio_config_enable(dev); 542 534
+6 -6
drivers/virtio/virtio_balloon.c
··· 27 27 * multiple balloon pages. All memory counters in this driver are in balloon 28 28 * page units. 29 29 */ 30 - #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT) 30 + #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned int)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT) 31 31 #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256 32 32 /* Maximum number of (4k) pages to deflate on OOM notifications. */ 33 33 #define VIRTIO_BALLOON_OOM_NR_PAGES 256 ··· 208 208 page_to_balloon_pfn(page) + i); 209 209 } 210 210 211 - static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) 211 + static unsigned int fill_balloon(struct virtio_balloon *vb, size_t num) 212 212 { 213 - unsigned num_allocated_pages; 214 - unsigned num_pfns; 213 + unsigned int num_allocated_pages; 214 + unsigned int num_pfns; 215 215 struct page *page; 216 216 LIST_HEAD(pages); 217 217 ··· 272 272 } 273 273 } 274 274 275 - static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) 275 + static unsigned int leak_balloon(struct virtio_balloon *vb, size_t num) 276 276 { 277 - unsigned num_freed_pages; 277 + unsigned int num_freed_pages; 278 278 struct page *page; 279 279 struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info; 280 280 LIST_HEAD(pages);
+20 -7
drivers/virtio/virtio_mmio.c
··· 144 144 return 0; 145 145 } 146 146 147 - static void vm_get(struct virtio_device *vdev, unsigned offset, 148 - void *buf, unsigned len) 147 + static void vm_get(struct virtio_device *vdev, unsigned int offset, 148 + void *buf, unsigned int len) 149 149 { 150 150 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); 151 151 void __iomem *base = vm_dev->base + VIRTIO_MMIO_CONFIG; ··· 186 186 } 187 187 } 188 188 189 - static void vm_set(struct virtio_device *vdev, unsigned offset, 190 - const void *buf, unsigned len) 189 + static void vm_set(struct virtio_device *vdev, unsigned int offset, 190 + const void *buf, unsigned int len) 191 191 { 192 192 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); 193 193 void __iomem *base = vm_dev->base + VIRTIO_MMIO_CONFIG; ··· 253 253 /* We should never be setting status to 0. */ 254 254 BUG_ON(status == 0); 255 255 256 + /* 257 + * Per memory-barriers.txt, wmb() is not needed to guarantee 258 + * that the the cache coherent memory writes have completed 259 + * before writing to the MMIO region. 260 + */ 256 261 writel(status, vm_dev->base + VIRTIO_MMIO_STATUS); 257 262 } 258 263 ··· 350 345 free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev); 351 346 } 352 347 353 - static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, 348 + static void vm_synchronize_cbs(struct virtio_device *vdev) 349 + { 350 + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); 351 + 352 + synchronize_irq(platform_get_irq(vm_dev->pdev, 0)); 353 + } 354 + 355 + static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int index, 354 356 void (*callback)(struct virtqueue *vq), 355 357 const char *name, bool ctx) 356 358 { ··· 467 455 return ERR_PTR(err); 468 456 } 469 457 470 - static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, 458 + static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs, 471 459 struct virtqueue *vqs[], 472 460 vq_callback_t *callbacks[], 473 461 const char * const names[], ··· 553 541 .finalize_features = vm_finalize_features, 554 542 .bus_name = vm_bus_name, 555 543 .get_shm_region = vm_get_shm_region, 544 + .synchronize_cbs = vm_synchronize_cbs, 556 545 }; 557 546 558 547 ··· 670 657 int err; 671 658 struct resource resources[2] = {}; 672 659 char *str; 673 - long long int base, size; 660 + long long base, size; 674 661 unsigned int irq; 675 662 int processed, consumed = 0; 676 663 struct platform_device *pdev;
+7 -8
drivers/virtio/virtio_pci_common.c
··· 104 104 { 105 105 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 106 106 const char *name = dev_name(&vp_dev->vdev.dev); 107 - unsigned flags = PCI_IRQ_MSIX; 108 - unsigned i, v; 107 + unsigned int flags = PCI_IRQ_MSIX; 108 + unsigned int i, v; 109 109 int err = -ENOMEM; 110 110 111 111 vp_dev->msix_vectors = nvectors; ··· 171 171 return err; 172 172 } 173 173 174 - static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index, 174 + static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int index, 175 175 void (*callback)(struct virtqueue *vq), 176 176 const char *name, 177 177 bool ctx, ··· 254 254 255 255 if (vp_dev->msix_affinity_masks) { 256 256 for (i = 0; i < vp_dev->msix_vectors; i++) 257 - if (vp_dev->msix_affinity_masks[i]) 258 - free_cpumask_var(vp_dev->msix_affinity_masks[i]); 257 + free_cpumask_var(vp_dev->msix_affinity_masks[i]); 259 258 } 260 259 261 260 if (vp_dev->msix_enabled) { ··· 275 276 vp_dev->vqs = NULL; 276 277 } 277 278 278 - static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, 279 + static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, 279 280 struct virtqueue *vqs[], vq_callback_t *callbacks[], 280 281 const char * const names[], bool per_vq_vectors, 281 282 const bool *ctx, ··· 349 350 return err; 350 351 } 351 352 352 - static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs, 353 + static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, 353 354 struct virtqueue *vqs[], vq_callback_t *callbacks[], 354 355 const char * const names[], const bool *ctx) 355 356 { ··· 388 389 } 389 390 390 391 /* the config->find_vqs() implementation */ 391 - int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, 392 + int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, 392 393 struct virtqueue *vqs[], vq_callback_t *callbacks[], 393 394 const char * const names[], const bool *ctx, 394 395 struct irq_affinity *desc)
+5 -5
drivers/virtio/virtio_pci_common.h
··· 38 38 struct list_head node; 39 39 40 40 /* MSI-X vector (or none) */ 41 - unsigned msix_vector; 41 + unsigned int msix_vector; 42 42 }; 43 43 44 44 /* Our device structure */ ··· 68 68 * and I'm too lazy to allocate each name separately. */ 69 69 char (*msix_names)[256]; 70 70 /* Number of available vectors */ 71 - unsigned msix_vectors; 71 + unsigned int msix_vectors; 72 72 /* Vectors allocated, excluding per-vq vectors if any */ 73 - unsigned msix_used_vectors; 73 + unsigned int msix_used_vectors; 74 74 75 75 /* Whether we have vector per vq */ 76 76 bool per_vq_vectors; 77 77 78 78 struct virtqueue *(*setup_vq)(struct virtio_pci_device *vp_dev, 79 79 struct virtio_pci_vq_info *info, 80 - unsigned idx, 80 + unsigned int idx, 81 81 void (*callback)(struct virtqueue *vq), 82 82 const char *name, 83 83 bool ctx, ··· 108 108 /* the config->del_vqs() implementation */ 109 109 void vp_del_vqs(struct virtio_device *vdev); 110 110 /* the config->find_vqs() implementation */ 111 - int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, 111 + int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, 112 112 struct virtqueue *vqs[], vq_callback_t *callbacks[], 113 113 const char * const names[], const bool *ctx, 114 114 struct irq_affinity *desc);
+6 -5
drivers/virtio/virtio_pci_legacy.c
··· 45 45 } 46 46 47 47 /* virtio config->get() implementation */ 48 - static void vp_get(struct virtio_device *vdev, unsigned offset, 49 - void *buf, unsigned len) 48 + static void vp_get(struct virtio_device *vdev, unsigned int offset, 49 + void *buf, unsigned int len) 50 50 { 51 51 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 52 52 void __iomem *ioaddr = vp_dev->ldev.ioaddr + ··· 61 61 62 62 /* the config->set() implementation. it's symmetric to the config->get() 63 63 * implementation */ 64 - static void vp_set(struct virtio_device *vdev, unsigned offset, 65 - const void *buf, unsigned len) 64 + static void vp_set(struct virtio_device *vdev, unsigned int offset, 65 + const void *buf, unsigned int len) 66 66 { 67 67 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 68 68 void __iomem *ioaddr = vp_dev->ldev.ioaddr + ··· 109 109 110 110 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, 111 111 struct virtio_pci_vq_info *info, 112 - unsigned index, 112 + unsigned int index, 113 113 void (*callback)(struct virtqueue *vq), 114 114 const char *name, 115 115 bool ctx, ··· 192 192 .reset = vp_reset, 193 193 .find_vqs = vp_find_vqs, 194 194 .del_vqs = vp_del_vqs, 195 + .synchronize_cbs = vp_synchronize_vectors, 195 196 .get_features = vp_get_features, 196 197 .finalize_features = vp_finalize_features, 197 198 .bus_name = vp_bus_name,
+8 -6
drivers/virtio/virtio_pci_modern.c
··· 60 60 } 61 61 62 62 /* virtio config->get() implementation */ 63 - static void vp_get(struct virtio_device *vdev, unsigned offset, 64 - void *buf, unsigned len) 63 + static void vp_get(struct virtio_device *vdev, unsigned int offset, 64 + void *buf, unsigned int len) 65 65 { 66 66 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 67 67 struct virtio_pci_modern_device *mdev = &vp_dev->mdev; ··· 98 98 99 99 /* the config->set() implementation. it's symmetric to the config->get() 100 100 * implementation */ 101 - static void vp_set(struct virtio_device *vdev, unsigned offset, 102 - const void *buf, unsigned len) 101 + static void vp_set(struct virtio_device *vdev, unsigned int offset, 102 + const void *buf, unsigned int len) 103 103 { 104 104 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 105 105 struct virtio_pci_modern_device *mdev = &vp_dev->mdev; ··· 183 183 184 184 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, 185 185 struct virtio_pci_vq_info *info, 186 - unsigned index, 186 + unsigned int index, 187 187 void (*callback)(struct virtqueue *vq), 188 188 const char *name, 189 189 bool ctx, ··· 248 248 return ERR_PTR(err); 249 249 } 250 250 251 - static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs, 251 + static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned int nvqs, 252 252 struct virtqueue *vqs[], 253 253 vq_callback_t *callbacks[], 254 254 const char * const names[], const bool *ctx, ··· 394 394 .reset = vp_reset, 395 395 .find_vqs = vp_modern_find_vqs, 396 396 .del_vqs = vp_del_vqs, 397 + .synchronize_cbs = vp_synchronize_vectors, 397 398 .get_features = vp_get_features, 398 399 .finalize_features = vp_finalize_features, 399 400 .bus_name = vp_bus_name, ··· 412 411 .reset = vp_reset, 413 412 .find_vqs = vp_modern_find_vqs, 414 413 .del_vqs = vp_del_vqs, 414 + .synchronize_cbs = vp_synchronize_vectors, 415 415 .get_features = vp_get_features, 416 416 .finalize_features = vp_finalize_features, 417 417 .bus_name = vp_bus_name,
+6
drivers/virtio/virtio_pci_modern_dev.c
··· 347 347 err_map_isr: 348 348 pci_iounmap(pci_dev, mdev->common); 349 349 err_map_common: 350 + pci_release_selected_regions(pci_dev, mdev->modern_bars); 350 351 return err; 351 352 } 352 353 EXPORT_SYMBOL_GPL(vp_modern_probe); ··· 467 466 { 468 467 struct virtio_pci_common_cfg __iomem *cfg = mdev->common; 469 468 469 + /* 470 + * Per memory-barriers.txt, wmb() is not needed to guarantee 471 + * that the the cache coherent memory writes have completed 472 + * before writing to the MMIO region. 473 + */ 470 474 vp_iowrite8(status, &cfg->device_status); 471 475 } 472 476 EXPORT_SYMBOL_GPL(vp_modern_set_status);
+39 -16
drivers/virtio/virtio_ring.c
··· 205 205 206 206 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 207 207 208 - static inline bool virtqueue_use_indirect(struct virtqueue *_vq, 208 + static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq, 209 209 unsigned int total_sg) 210 210 { 211 - struct vring_virtqueue *vq = to_vvq(_vq); 212 - 213 211 /* 214 212 * If the host supports indirect descriptor tables, and we have multiple 215 213 * buffers, then go indirect. FIXME: tune this threshold ··· 497 499 498 500 head = vq->free_head; 499 501 500 - if (virtqueue_use_indirect(_vq, total_sg)) 502 + if (virtqueue_use_indirect(vq, total_sg)) 501 503 desc = alloc_indirect_split(_vq, total_sg, gfp); 502 504 else { 503 505 desc = NULL; ··· 517 519 descs_used = total_sg; 518 520 } 519 521 520 - if (vq->vq.num_free < descs_used) { 522 + if (unlikely(vq->vq.num_free < descs_used)) { 521 523 pr_debug("Can't add buf len %i - avail = %i\n", 522 524 descs_used, vq->vq.num_free); 523 525 /* FIXME: for historical reasons, we force a notify here if ··· 809 811 } 810 812 } 811 813 812 - static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 814 + static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 813 815 { 814 816 struct vring_virtqueue *vq = to_vvq(_vq); 815 817 u16 last_used_idx; ··· 834 836 return last_used_idx; 835 837 } 836 838 837 - static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) 839 + static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx) 838 840 { 839 841 struct vring_virtqueue *vq = to_vvq(_vq); 840 842 ··· 1176 1178 1177 1179 BUG_ON(total_sg == 0); 1178 1180 1179 - if (virtqueue_use_indirect(_vq, total_sg)) { 1181 + if (virtqueue_use_indirect(vq, total_sg)) { 1180 1182 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, 1181 1183 in_sgs, data, gfp); 1182 1184 if (err != -ENOMEM) { ··· 1486 1488 } 1487 1489 } 1488 1490 1489 - static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 1491 + static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 1490 1492 { 1491 1493 struct vring_virtqueue *vq = to_vvq(_vq); 1492 1494 ··· 1688 1690 vq->we_own_ring = true; 1689 1691 vq->notify = notify; 1690 1692 vq->weak_barriers = weak_barriers; 1691 - vq->broken = false; 1693 + vq->broken = true; 1692 1694 vq->last_used_idx = 0; 1693 1695 vq->event_triggered = false; 1694 1696 vq->num_added = 0; ··· 2025 2027 * Caller must ensure we don't call this with other virtqueue 2026 2028 * operations at the same time (except where noted). 2027 2029 */ 2028 - unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) 2030 + unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq) 2029 2031 { 2030 2032 struct vring_virtqueue *vq = to_vvq(_vq); 2031 2033 ··· 2046 2048 * 2047 2049 * This does not need to be serialized. 2048 2050 */ 2049 - bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) 2051 + bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx) 2050 2052 { 2051 2053 struct vring_virtqueue *vq = to_vvq(_vq); 2052 2054 ··· 2072 2074 */ 2073 2075 bool virtqueue_enable_cb(struct virtqueue *_vq) 2074 2076 { 2075 - unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); 2077 + unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq); 2076 2078 2077 2079 return !virtqueue_poll(_vq, last_used_idx); 2078 2080 } ··· 2134 2136 return IRQ_NONE; 2135 2137 } 2136 2138 2137 - if (unlikely(vq->broken)) 2138 - return IRQ_HANDLED; 2139 + if (unlikely(vq->broken)) { 2140 + dev_warn_once(&vq->vq.vdev->dev, 2141 + "virtio vring IRQ raised before DRIVER_OK"); 2142 + return IRQ_NONE; 2143 + } 2139 2144 2140 2145 /* Just a hint for performance: so it's ok that this can be racy! */ 2141 2146 if (vq->event) ··· 2180 2179 vq->we_own_ring = false; 2181 2180 vq->notify = notify; 2182 2181 vq->weak_barriers = weak_barriers; 2183 - vq->broken = false; 2182 + vq->broken = true; 2184 2183 vq->last_used_idx = 0; 2185 2184 vq->event_triggered = false; 2186 2185 vq->num_added = 0; ··· 2397 2396 spin_unlock(&dev->vqs_list_lock); 2398 2397 } 2399 2398 EXPORT_SYMBOL_GPL(virtio_break_device); 2399 + 2400 + /* 2401 + * This should allow the device to be used by the driver. You may 2402 + * need to grab appropriate locks to flush the write to 2403 + * vq->broken. This should only be used in some specific case e.g 2404 + * (probing and restoring). This function should only be called by the 2405 + * core, not directly by the driver. 2406 + */ 2407 + void __virtio_unbreak_device(struct virtio_device *dev) 2408 + { 2409 + struct virtqueue *_vq; 2410 + 2411 + spin_lock(&dev->vqs_list_lock); 2412 + list_for_each_entry(_vq, &dev->vqs, list) { 2413 + struct vring_virtqueue *vq = to_vvq(_vq); 2414 + 2415 + /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 2416 + WRITE_ONCE(vq->broken, false); 2417 + } 2418 + spin_unlock(&dev->vqs_list_lock); 2419 + } 2420 + EXPORT_SYMBOL_GPL(__virtio_unbreak_device); 2400 2421 2401 2422 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 2402 2423 {
+6 -6
drivers/virtio/virtio_vdpa.c
··· 53 53 return to_virtio_vdpa_device(vdev)->vdpa; 54 54 } 55 55 56 - static void virtio_vdpa_get(struct virtio_device *vdev, unsigned offset, 57 - void *buf, unsigned len) 56 + static void virtio_vdpa_get(struct virtio_device *vdev, unsigned int offset, 57 + void *buf, unsigned int len) 58 58 { 59 59 struct vdpa_device *vdpa = vd_get_vdpa(vdev); 60 60 61 61 vdpa_get_config(vdpa, offset, buf, len); 62 62 } 63 63 64 - static void virtio_vdpa_set(struct virtio_device *vdev, unsigned offset, 65 - const void *buf, unsigned len) 64 + static void virtio_vdpa_set(struct virtio_device *vdev, unsigned int offset, 65 + const void *buf, unsigned int len) 66 66 { 67 67 struct vdpa_device *vdpa = vd_get_vdpa(vdev); 68 68 ··· 184 184 } 185 185 186 186 /* Setup virtqueue callback */ 187 - cb.callback = virtio_vdpa_virtqueue_cb; 187 + cb.callback = callback ? virtio_vdpa_virtqueue_cb : NULL; 188 188 cb.private = info; 189 189 ops->set_vq_cb(vdpa, index, &cb); 190 190 ops->set_vq_num(vdpa, index, virtqueue_get_vring_size(vq)); ··· 263 263 virtio_vdpa_del_vq(vq); 264 264 } 265 265 266 - static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned nvqs, 266 + static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs, 267 267 struct virtqueue *vqs[], 268 268 vq_callback_t *callbacks[], 269 269 const char * const names[],
+1
include/linux/mlx5/mlx5_ifc.h
··· 87 87 enum { 88 88 MLX5_OBJ_TYPE_GENEVE_TLV_OPT = 0x000b, 89 89 MLX5_OBJ_TYPE_VIRTIO_NET_Q = 0x000d, 90 + MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS = 0x001c, 90 91 MLX5_OBJ_TYPE_MATCH_DEFINER = 0x0018, 91 92 MLX5_OBJ_TYPE_MKEY = 0xff01, 92 93 MLX5_OBJ_TYPE_QP = 0xff02,
+39
include/linux/mlx5/mlx5_ifc_vdpa.h
··· 165 165 struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; 166 166 }; 167 167 168 + struct mlx5_ifc_virtio_q_counters_bits { 169 + u8 modify_field_select[0x40]; 170 + u8 reserved_at_40[0x40]; 171 + u8 received_desc[0x40]; 172 + u8 completed_desc[0x40]; 173 + u8 error_cqes[0x20]; 174 + u8 bad_desc_errors[0x20]; 175 + u8 exceed_max_chain[0x20]; 176 + u8 invalid_buffer[0x20]; 177 + u8 reserved_at_180[0x280]; 178 + }; 179 + 180 + struct mlx5_ifc_create_virtio_q_counters_in_bits { 181 + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; 182 + struct mlx5_ifc_virtio_q_counters_bits virtio_q_counters; 183 + }; 184 + 185 + struct mlx5_ifc_create_virtio_q_counters_out_bits { 186 + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; 187 + struct mlx5_ifc_virtio_q_counters_bits virtio_q_counters; 188 + }; 189 + 190 + struct mlx5_ifc_destroy_virtio_q_counters_in_bits { 191 + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; 192 + }; 193 + 194 + struct mlx5_ifc_destroy_virtio_q_counters_out_bits { 195 + struct mlx5_ifc_general_obj_out_cmd_hdr_bits hdr; 196 + }; 197 + 198 + struct mlx5_ifc_query_virtio_q_counters_in_bits { 199 + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; 200 + }; 201 + 202 + struct mlx5_ifc_query_virtio_q_counters_out_bits { 203 + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; 204 + struct mlx5_ifc_virtio_q_counters_bits counters; 205 + }; 206 + 168 207 #endif /* __MLX5_IFC_VDPA_H_ */
+45 -16
include/linux/vdpa.h
··· 66 66 * @dma_dev: the actual device that is performing DMA 67 67 * @driver_override: driver name to force a match 68 68 * @config: the configuration ops for this device. 69 - * @cf_mutex: Protects get and set access to configuration layout. 69 + * @cf_lock: Protects get and set access to configuration layout. 70 70 * @index: device index 71 71 * @features_valid: were features initialized? for legacy guests 72 + * @ngroups: the number of virtqueue groups 73 + * @nas: the number of address spaces 72 74 * @use_va: indicate whether virtual address must be used by this device 73 75 * @nvqs: maximum number of supported virtqueues 74 76 * @mdev: management device pointer; caller must setup when registering device as part ··· 81 79 struct device *dma_dev; 82 80 const char *driver_override; 83 81 const struct vdpa_config_ops *config; 84 - struct mutex cf_mutex; /* Protects get/set config */ 82 + struct rw_semaphore cf_lock; /* Protects get/set config */ 85 83 unsigned int index; 86 84 bool features_valid; 87 85 bool use_va; 88 86 u32 nvqs; 89 87 struct vdpa_mgmt_dev *mdev; 88 + unsigned int ngroups; 89 + unsigned int nas; 90 90 }; 91 91 92 92 /** ··· 176 172 * for the device 177 173 * @vdev: vdpa device 178 174 * Returns virtqueue algin requirement 175 + * @get_vq_group: Get the group id for a specific virtqueue 176 + * @vdev: vdpa device 177 + * @idx: virtqueue index 178 + * Returns u32: group id for this virtqueue 179 179 * @get_device_features: Get virtio features supported by the device 180 180 * @vdev: vdpa device 181 181 * Returns the virtio features support by the ··· 240 232 * @vdev: vdpa device 241 233 * Returns the iova range supported by 242 234 * the device. 235 + * @set_group_asid: Set address space identifier for a 236 + * virtqueue group 237 + * @vdev: vdpa device 238 + * @group: virtqueue group 239 + * @asid: address space id for this group 240 + * Returns integer: success (0) or error (< 0) 243 241 * @set_map: Set device memory mapping (optional) 244 242 * Needed for device that using device 245 243 * specific DMA translation (on-chip IOMMU) 246 244 * @vdev: vdpa device 245 + * @asid: address space identifier 247 246 * @iotlb: vhost memory mapping to be 248 247 * used by the vDPA 249 248 * Returns integer: success (0) or error (< 0) ··· 259 244 * specific DMA translation (on-chip IOMMU) 260 245 * and preferring incremental map. 261 246 * @vdev: vdpa device 247 + * @asid: address space identifier 262 248 * @iova: iova to be mapped 263 249 * @size: size of the area 264 250 * @pa: physical address for the map ··· 271 255 * specific DMA translation (on-chip IOMMU) 272 256 * and preferring incremental unmap. 273 257 * @vdev: vdpa device 258 + * @asid: address space identifier 274 259 * @iova: iova to be unmapped 275 260 * @size: size of the area 276 261 * Returns integer: success (0) or error (< 0) ··· 293 276 const struct vdpa_vq_state *state); 294 277 int (*get_vq_state)(struct vdpa_device *vdev, u16 idx, 295 278 struct vdpa_vq_state *state); 279 + int (*get_vendor_vq_stats)(struct vdpa_device *vdev, u16 idx, 280 + struct sk_buff *msg, 281 + struct netlink_ext_ack *extack); 296 282 struct vdpa_notification_area 297 283 (*get_vq_notification)(struct vdpa_device *vdev, u16 idx); 298 284 /* vq irq is not expected to be changed once DRIVER_OK is set */ ··· 303 283 304 284 /* Device ops */ 305 285 u32 (*get_vq_align)(struct vdpa_device *vdev); 286 + u32 (*get_vq_group)(struct vdpa_device *vdev, u16 idx); 306 287 u64 (*get_device_features)(struct vdpa_device *vdev); 307 288 int (*set_driver_features)(struct vdpa_device *vdev, u64 features); 308 289 u64 (*get_driver_features)(struct vdpa_device *vdev); ··· 325 304 struct vdpa_iova_range (*get_iova_range)(struct vdpa_device *vdev); 326 305 327 306 /* DMA ops */ 328 - int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb); 329 - int (*dma_map)(struct vdpa_device *vdev, u64 iova, u64 size, 330 - u64 pa, u32 perm, void *opaque); 331 - int (*dma_unmap)(struct vdpa_device *vdev, u64 iova, u64 size); 307 + int (*set_map)(struct vdpa_device *vdev, unsigned int asid, 308 + struct vhost_iotlb *iotlb); 309 + int (*dma_map)(struct vdpa_device *vdev, unsigned int asid, 310 + u64 iova, u64 size, u64 pa, u32 perm, void *opaque); 311 + int (*dma_unmap)(struct vdpa_device *vdev, unsigned int asid, 312 + u64 iova, u64 size); 313 + int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group, 314 + unsigned int asid); 332 315 333 316 /* Free device resources */ 334 317 void (*free)(struct vdpa_device *vdev); ··· 340 315 341 316 struct vdpa_device *__vdpa_alloc_device(struct device *parent, 342 317 const struct vdpa_config_ops *config, 318 + unsigned int ngroups, unsigned int nas, 343 319 size_t size, const char *name, 344 320 bool use_va); 345 321 ··· 351 325 * @member: the name of struct vdpa_device within the @dev_struct 352 326 * @parent: the parent device 353 327 * @config: the bus operations that is supported by this device 328 + * @ngroups: the number of virtqueue groups supported by this device 329 + * @nas: the number of address spaces 354 330 * @name: name of the vdpa device 355 331 * @use_va: indicate whether virtual address must be used by this device 356 332 * 357 333 * Return allocated data structure or ERR_PTR upon error 358 334 */ 359 - #define vdpa_alloc_device(dev_struct, member, parent, config, name, use_va) \ 360 - container_of(__vdpa_alloc_device( \ 361 - parent, config, \ 362 - sizeof(dev_struct) + \ 335 + #define vdpa_alloc_device(dev_struct, member, parent, config, ngroups, nas, \ 336 + name, use_va) \ 337 + container_of((__vdpa_alloc_device( \ 338 + parent, config, ngroups, nas, \ 339 + (sizeof(dev_struct) + \ 363 340 BUILD_BUG_ON_ZERO(offsetof( \ 364 - dev_struct, member)), name, use_va), \ 341 + dev_struct, member))), name, use_va)), \ 365 342 dev_struct, member) 366 343 367 344 int vdpa_register_device(struct vdpa_device *vdev, u32 nvqs); ··· 424 395 const struct vdpa_config_ops *ops = vdev->config; 425 396 int ret; 426 397 427 - mutex_lock(&vdev->cf_mutex); 398 + down_write(&vdev->cf_lock); 428 399 vdev->features_valid = false; 429 400 ret = ops->reset(vdev); 430 - mutex_unlock(&vdev->cf_mutex); 401 + up_write(&vdev->cf_lock); 431 402 return ret; 432 403 } 433 404 ··· 446 417 { 447 418 int ret; 448 419 449 - mutex_lock(&vdev->cf_mutex); 420 + down_write(&vdev->cf_lock); 450 421 ret = vdpa_set_features_unlocked(vdev, features); 451 - mutex_unlock(&vdev->cf_mutex); 422 + up_write(&vdev->cf_lock); 452 423 453 424 return ret; 454 425 } ··· 492 463 struct vdpa_mgmt_dev { 493 464 struct device *device; 494 465 const struct vdpa_mgmtdev_ops *ops; 495 - const struct virtio_device_id *id_table; 466 + struct virtio_device_id *id_table; 496 467 u64 config_attr_mask; 497 468 struct list_head list; 498 469 u64 supported_features;
+2
include/linux/vhost_iotlb.h
··· 36 36 u64 addr, unsigned int perm); 37 37 void vhost_iotlb_del_range(struct vhost_iotlb *iotlb, u64 start, u64 last); 38 38 39 + void vhost_iotlb_init(struct vhost_iotlb *iotlb, unsigned int limit, 40 + unsigned int flags); 39 41 struct vhost_iotlb *vhost_iotlb_alloc(unsigned int limit, unsigned int flags); 40 42 void vhost_iotlb_free(struct vhost_iotlb *iotlb); 41 43 void vhost_iotlb_reset(struct vhost_iotlb *iotlb);
+1
include/linux/virtio.h
··· 131 131 bool is_virtio_device(struct device *dev); 132 132 133 133 void virtio_break_device(struct virtio_device *dev); 134 + void __virtio_unbreak_device(struct virtio_device *dev); 134 135 135 136 void virtio_config_changed(struct virtio_device *dev); 136 137 #ifdef CONFIG_PM_SLEEP
+46 -1
include/linux/virtio_config.h
··· 57 57 * include a NULL entry for vqs unused by driver 58 58 * Returns 0 on success or error status 59 59 * @del_vqs: free virtqueues found by find_vqs(). 60 + * @synchronize_cbs: synchronize with the virtqueue callbacks (optional) 61 + * The function guarantees that all memory operations on the 62 + * queue before it are visible to the vring_interrupt() that is 63 + * called after it. 64 + * vdev: the virtio_device 60 65 * @get_features: get the array of feature bits for this device. 61 66 * vdev: the virtio_device 62 67 * Returns the first 64 feature bits (all we currently need). ··· 94 89 const char * const names[], const bool *ctx, 95 90 struct irq_affinity *desc); 96 91 void (*del_vqs)(struct virtio_device *); 92 + void (*synchronize_cbs)(struct virtio_device *); 97 93 u64 (*get_features)(struct virtio_device *vdev); 98 94 int (*finalize_features)(struct virtio_device *vdev); 99 95 const char *(*bus_name)(struct virtio_device *vdev); ··· 224 218 } 225 219 226 220 /** 221 + * virtio_synchronize_cbs - synchronize with virtqueue callbacks 222 + * @vdev: the device 223 + */ 224 + static inline 225 + void virtio_synchronize_cbs(struct virtio_device *dev) 226 + { 227 + if (dev->config->synchronize_cbs) { 228 + dev->config->synchronize_cbs(dev); 229 + } else { 230 + /* 231 + * A best effort fallback to synchronize with 232 + * interrupts, preemption and softirq disabled 233 + * regions. See comment above synchronize_rcu(). 234 + */ 235 + synchronize_rcu(); 236 + } 237 + } 238 + 239 + /** 227 240 * virtio_device_ready - enable vq use in probe function 228 241 * @vdev: the device 229 242 * ··· 255 230 { 256 231 unsigned status = dev->config->get_status(dev); 257 232 258 - BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK); 233 + WARN_ON(status & VIRTIO_CONFIG_S_DRIVER_OK); 234 + 235 + /* 236 + * The virtio_synchronize_cbs() makes sure vring_interrupt() 237 + * will see the driver specific setup if it sees vq->broken 238 + * as false (even if the notifications come before DRIVER_OK). 239 + */ 240 + virtio_synchronize_cbs(dev); 241 + __virtio_unbreak_device(dev); 242 + /* 243 + * The transport should ensure the visibility of vq->broken 244 + * before setting DRIVER_OK. See the comments for the transport 245 + * specific set_status() method. 246 + * 247 + * A well behaved device will only notify a virtqueue after 248 + * DRIVER_OK, this means the device should "see" the coherenct 249 + * memory write that set vq->broken as false which is done by 250 + * the driver when it sees DRIVER_OK, then the following 251 + * driver's vring_interrupt() will see vq->broken as false so 252 + * we won't lose any notification. 253 + */ 259 254 dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK); 260 255 } 261 256
+6
include/uapi/linux/vdpa.h
··· 18 18 VDPA_CMD_DEV_DEL, 19 19 VDPA_CMD_DEV_GET, /* can dump */ 20 20 VDPA_CMD_DEV_CONFIG_GET, /* can dump */ 21 + VDPA_CMD_DEV_VSTATS_GET, 21 22 }; 22 23 23 24 enum vdpa_attr { ··· 47 46 VDPA_ATTR_DEV_NEGOTIATED_FEATURES, /* u64 */ 48 47 VDPA_ATTR_DEV_MGMTDEV_MAX_VQS, /* u32 */ 49 48 VDPA_ATTR_DEV_SUPPORTED_FEATURES, /* u64 */ 49 + 50 + VDPA_ATTR_DEV_QUEUE_INDEX, /* u32 */ 51 + VDPA_ATTR_DEV_VENDOR_ATTR_NAME, /* string */ 52 + VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, /* u64 */ 53 + 50 54 /* new attributes must be added above here */ 51 55 VDPA_ATTR_MAX, 52 56 };
+20 -6
include/uapi/linux/vhost.h
··· 89 89 90 90 /* Set or get vhost backend capability */ 91 91 92 - /* Use message type V2 */ 93 - #define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1 94 - /* IOTLB can accept batching hints */ 95 - #define VHOST_BACKEND_F_IOTLB_BATCH 0x2 96 - 97 92 #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64) 98 93 #define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64) 99 94 ··· 145 150 /* Get the valid iova range */ 146 151 #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ 147 152 struct vhost_vdpa_iova_range) 148 - 149 153 /* Get the config size */ 150 154 #define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) 151 155 152 156 /* Get the count of all virtqueues */ 153 157 #define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) 158 + 159 + /* Get the number of virtqueue groups. */ 160 + #define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) 161 + 162 + /* Get the number of address spaces. */ 163 + #define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int) 164 + 165 + /* Get the group for a virtqueue: read index, write group in num, 166 + * The virtqueue index is stored in the index field of 167 + * vhost_vring_state. The group for this specific virtqueue is 168 + * returned via num field of vhost_vring_state. 169 + */ 170 + #define VHOST_VDPA_GET_VRING_GROUP _IOWR(VHOST_VIRTIO, 0x7B, \ 171 + struct vhost_vring_state) 172 + /* Set the ASID for a virtqueue group. The group index is stored in 173 + * the index field of vhost_vring_state, the ASID associated with this 174 + * group is stored at num field of vhost_vring_state. 175 + */ 176 + #define VHOST_VDPA_SET_GROUP_ASID _IOW(VHOST_VIRTIO, 0x7C, \ 177 + struct vhost_vring_state) 154 178 155 179 #endif
+10 -1
include/uapi/linux/vhost_types.h
··· 87 87 88 88 struct vhost_msg_v2 { 89 89 __u32 type; 90 - __u32 reserved; 90 + __u32 asid; 91 91 union { 92 92 struct vhost_iotlb_msg iotlb; 93 93 __u8 padding[64]; ··· 152 152 #define VHOST_F_LOG_ALL 26 153 153 /* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */ 154 154 #define VHOST_NET_F_VIRTIO_NET_HDR 27 155 + 156 + /* Use message type V2 */ 157 + #define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1 158 + /* IOTLB can accept batching hints */ 159 + #define VHOST_BACKEND_F_IOTLB_BATCH 0x2 160 + /* IOTLB can accept address space identifier through V2 type of IOTLB 161 + * message 162 + */ 163 + #define VHOST_BACKEND_F_IOTLB_ASID 0x3 155 164 156 165 #endif