Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull vhost fixes from Michael Tsirkin:
"Fixes all over the place.

A new UAPI is borderline: can also be considered a new feature but
also seems to be the only way we could come up with to fix addressing
for userspace - and it seems important to switch to it now before
userspace making assumptions about addressing ability of devices is
set in stone"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
vdpasim: allow to assign a MAC address
vdpasim: fix MAC address configuration
vdpa: handle irq bypass register failure case
vdpa_sim: Fix DMA mask
Revert "vhost-vdpa: fix page pinning leakage in error path"
vdpa/mlx5: Fix error return in map_direct_mr()
vhost_vdpa: Return -EFAULT if copy_from_user() fails
vdpa_sim: implement get_iova_range()
vhost: vdpa: report iova range
vdpa: introduce config op to get valid iova range

+154 -79
+2 -3
drivers/vdpa/mlx5/core/mr.c
··· 239 239 u64 paend; 240 240 struct scatterlist *sg; 241 241 struct device *dma = mvdev->mdev->device; 242 - int ret; 243 242 244 243 for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); 245 244 map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) { ··· 276 277 done: 277 278 mr->log_size = log_entity_size; 278 279 mr->nsg = nsg; 279 - ret = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); 280 - if (!ret) 280 + err = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); 281 + if (!err) 281 282 goto err_map; 282 283 283 284 err = create_direct_mr(mvdev, mr);
+30 -3
drivers/vdpa/vdpa_sim/vdpa_sim.c
··· 38 38 module_param(batch_mapping, int, 0444); 39 39 MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable"); 40 40 41 + static char *macaddr; 42 + module_param(macaddr, charp, 0); 43 + MODULE_PARM_DESC(macaddr, "Ethernet MAC address"); 44 + 41 45 struct vdpasim_virtqueue { 42 46 struct vringh vring; 43 47 struct vringh_kiov iov; ··· 64 60 65 61 static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) | 66 62 (1ULL << VIRTIO_F_VERSION_1) | 67 - (1ULL << VIRTIO_F_ACCESS_PLATFORM); 63 + (1ULL << VIRTIO_F_ACCESS_PLATFORM) | 64 + (1ULL << VIRTIO_NET_F_MAC); 68 65 69 66 /* State of each vdpasim device */ 70 67 struct vdpasim { ··· 366 361 spin_lock_init(&vdpasim->iommu_lock); 367 362 368 363 dev = &vdpasim->vdpa.dev; 369 - dev->coherent_dma_mask = DMA_BIT_MASK(64); 364 + dev->dma_mask = &dev->coherent_dma_mask; 365 + if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))) 366 + goto err_iommu; 370 367 set_dma_ops(dev, &vdpasim_dma_ops); 371 368 372 369 vdpasim->iommu = vhost_iotlb_alloc(2048, 0); ··· 379 372 if (!vdpasim->buffer) 380 373 goto err_iommu; 381 374 382 - eth_random_addr(vdpasim->config.mac); 375 + if (macaddr) { 376 + mac_pton(macaddr, vdpasim->config.mac); 377 + if (!is_valid_ether_addr(vdpasim->config.mac)) { 378 + ret = -EADDRNOTAVAIL; 379 + goto err_iommu; 380 + } 381 + } else { 382 + eth_random_addr(vdpasim->config.mac); 383 + } 383 384 384 385 vringh_set_iotlb(&vdpasim->vqs[0].vring, vdpasim->iommu); 385 386 vringh_set_iotlb(&vdpasim->vqs[1].vring, vdpasim->iommu); ··· 589 574 return vdpasim->generation; 590 575 } 591 576 577 + static struct vdpa_iova_range vdpasim_get_iova_range(struct vdpa_device *vdpa) 578 + { 579 + struct vdpa_iova_range range = { 580 + .first = 0ULL, 581 + .last = ULLONG_MAX, 582 + }; 583 + 584 + return range; 585 + } 586 + 592 587 static int vdpasim_set_map(struct vdpa_device *vdpa, 593 588 struct vhost_iotlb *iotlb) 594 589 { ··· 682 657 .get_config = vdpasim_get_config, 683 658 .set_config = vdpasim_set_config, 684 659 .get_generation = vdpasim_get_generation, 660 + .get_iova_range = vdpasim_get_iova_range, 685 661 .dma_map = vdpasim_dma_map, 686 662 .dma_unmap = vdpasim_dma_unmap, 687 663 .free = vdpasim_free, ··· 709 683 .get_config = vdpasim_get_config, 710 684 .set_config = vdpasim_set_config, 711 685 .get_generation = vdpasim_get_generation, 686 + .get_iova_range = vdpasim_get_iova_range, 712 687 .set_map = vdpasim_set_map, 713 688 .free = vdpasim_free, 714 689 };
+94 -73
drivers/vhost/vdpa.c
··· 47 47 int minor; 48 48 struct eventfd_ctx *config_ctx; 49 49 int in_batch; 50 + struct vdpa_iova_range range; 50 51 }; 51 52 52 53 static DEFINE_IDA(vhost_vdpa_ida); ··· 104 103 vq->call_ctx.producer.token = vq->call_ctx.ctx; 105 104 vq->call_ctx.producer.irq = irq; 106 105 ret = irq_bypass_register_producer(&vq->call_ctx.producer); 106 + if (unlikely(ret)) 107 + dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret = %d\n", 108 + qid, vq->call_ctx.producer.token, ret); 107 109 } 108 110 109 111 static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) ··· 341 337 return 0; 342 338 } 343 339 340 + static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp) 341 + { 342 + struct vhost_vdpa_iova_range range = { 343 + .first = v->range.first, 344 + .last = v->range.last, 345 + }; 346 + 347 + return copy_to_user(argp, &range, sizeof(range)); 348 + } 349 + 344 350 static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, 345 351 void __user *argp) 346 352 { ··· 435 421 void __user *argp = (void __user *)arg; 436 422 u64 __user *featurep = argp; 437 423 u64 features; 438 - long r; 424 + long r = 0; 439 425 440 426 if (cmd == VHOST_SET_BACKEND_FEATURES) { 441 - r = copy_from_user(&features, featurep, sizeof(features)); 442 - if (r) 443 - return r; 427 + if (copy_from_user(&features, featurep, sizeof(features))) 428 + return -EFAULT; 444 429 if (features & ~VHOST_VDPA_BACKEND_FEATURES) 445 430 return -EOPNOTSUPP; 446 431 vhost_set_backend_features(&v->vdev, features); ··· 482 469 break; 483 470 case VHOST_GET_BACKEND_FEATURES: 484 471 features = VHOST_VDPA_BACKEND_FEATURES; 485 - r = copy_to_user(featurep, &features, sizeof(features)); 472 + if (copy_to_user(featurep, &features, sizeof(features))) 473 + r = -EFAULT; 474 + break; 475 + case VHOST_VDPA_GET_IOVA_RANGE: 476 + r = vhost_vdpa_get_iova_range(v, argp); 486 477 break; 487 478 default: 488 479 r = vhost_dev_ioctl(&v->vdev, cmd, argp); ··· 605 588 struct vhost_dev *dev = &v->vdev; 606 589 struct vhost_iotlb *iotlb = dev->iotlb; 607 590 struct page **page_list; 608 - struct vm_area_struct **vmas; 591 + unsigned long list_size = PAGE_SIZE / sizeof(struct page *); 609 592 unsigned int gup_flags = FOLL_LONGTERM; 610 - unsigned long map_pfn, last_pfn = 0; 611 - unsigned long npages, lock_limit; 612 - unsigned long i, nmap = 0; 593 + unsigned long npages, cur_base, map_pfn, last_pfn = 0; 594 + unsigned long locked, lock_limit, pinned, i; 613 595 u64 iova = msg->iova; 614 - long pinned; 615 596 int ret = 0; 597 + 598 + if (msg->iova < v->range.first || 599 + msg->iova + msg->size - 1 > v->range.last) 600 + return -EINVAL; 616 601 617 602 if (vhost_iotlb_itree_first(iotlb, msg->iova, 618 603 msg->iova + msg->size - 1)) 619 604 return -EEXIST; 605 + 606 + page_list = (struct page **) __get_free_page(GFP_KERNEL); 607 + if (!page_list) 608 + return -ENOMEM; 620 609 621 610 if (msg->perm & VHOST_ACCESS_WO) 622 611 gup_flags |= FOLL_WRITE; ··· 631 608 if (!npages) 632 609 return -EINVAL; 633 610 634 - page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); 635 - vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *), 636 - GFP_KERNEL); 637 - if (!page_list || !vmas) { 638 - ret = -ENOMEM; 639 - goto free; 640 - } 641 - 642 611 mmap_read_lock(dev->mm); 643 612 613 + locked = atomic64_add_return(npages, &dev->mm->pinned_vm); 644 614 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 645 - if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { 615 + 616 + if (locked > lock_limit) { 646 617 ret = -ENOMEM; 647 - goto unlock; 618 + goto out; 648 619 } 649 620 650 - pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags, 651 - page_list, vmas); 652 - if (npages != pinned) { 653 - if (pinned < 0) { 654 - ret = pinned; 655 - } else { 656 - unpin_user_pages(page_list, pinned); 657 - ret = -ENOMEM; 658 - } 659 - goto unlock; 660 - } 661 - 621 + cur_base = msg->uaddr & PAGE_MASK; 662 622 iova &= PAGE_MASK; 663 - map_pfn = page_to_pfn(page_list[0]); 664 623 665 - /* One more iteration to avoid extra vdpa_map() call out of loop. */ 666 - for (i = 0; i <= npages; i++) { 667 - unsigned long this_pfn; 668 - u64 csize; 624 + while (npages) { 625 + pinned = min_t(unsigned long, npages, list_size); 626 + ret = pin_user_pages(cur_base, pinned, 627 + gup_flags, page_list, NULL); 628 + if (ret != pinned) 629 + goto out; 669 630 670 - /* The last chunk may have no valid PFN next to it */ 671 - this_pfn = i < npages ? page_to_pfn(page_list[i]) : -1UL; 631 + if (!last_pfn) 632 + map_pfn = page_to_pfn(page_list[0]); 672 633 673 - if (last_pfn && (this_pfn == -1UL || 674 - this_pfn != last_pfn + 1)) { 675 - /* Pin a contiguous chunk of memory */ 676 - csize = last_pfn - map_pfn + 1; 677 - ret = vhost_vdpa_map(v, iova, csize << PAGE_SHIFT, 678 - map_pfn << PAGE_SHIFT, 679 - msg->perm); 680 - if (ret) { 681 - /* 682 - * Unpin the rest chunks of memory on the 683 - * flight with no corresponding vdpa_map() 684 - * calls having been made yet. On the other 685 - * hand, vdpa_unmap() in the failure path 686 - * is in charge of accounting the number of 687 - * pinned pages for its own. 688 - * This asymmetrical pattern of accounting 689 - * is for efficiency to pin all pages at 690 - * once, while there is no other callsite 691 - * of vdpa_map() than here above. 692 - */ 693 - unpin_user_pages(&page_list[nmap], 694 - npages - nmap); 695 - goto out; 634 + for (i = 0; i < ret; i++) { 635 + unsigned long this_pfn = page_to_pfn(page_list[i]); 636 + u64 csize; 637 + 638 + if (last_pfn && (this_pfn != last_pfn + 1)) { 639 + /* Pin a contiguous chunk of memory */ 640 + csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; 641 + if (vhost_vdpa_map(v, iova, csize, 642 + map_pfn << PAGE_SHIFT, 643 + msg->perm)) 644 + goto out; 645 + map_pfn = this_pfn; 646 + iova += csize; 696 647 } 697 - atomic64_add(csize, &dev->mm->pinned_vm); 698 - nmap += csize; 699 - iova += csize << PAGE_SHIFT; 700 - map_pfn = this_pfn; 648 + 649 + last_pfn = this_pfn; 701 650 } 702 - last_pfn = this_pfn; 651 + 652 + cur_base += ret << PAGE_SHIFT; 653 + npages -= ret; 703 654 } 704 655 705 - WARN_ON(nmap != npages); 656 + /* Pin the rest chunk */ 657 + ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT, 658 + map_pfn << PAGE_SHIFT, msg->perm); 706 659 out: 707 - if (ret) 660 + if (ret) { 708 661 vhost_vdpa_unmap(v, msg->iova, msg->size); 709 - unlock: 662 + atomic64_sub(npages, &dev->mm->pinned_vm); 663 + } 710 664 mmap_read_unlock(dev->mm); 711 - free: 712 - kvfree(vmas); 713 - kvfree(page_list); 665 + free_page((unsigned long)page_list); 714 666 return ret; 715 667 } 716 668 ··· 781 783 v->domain = NULL; 782 784 } 783 785 786 + static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v) 787 + { 788 + struct vdpa_iova_range *range = &v->range; 789 + struct iommu_domain_geometry geo; 790 + struct vdpa_device *vdpa = v->vdpa; 791 + const struct vdpa_config_ops *ops = vdpa->config; 792 + 793 + if (ops->get_iova_range) { 794 + *range = ops->get_iova_range(vdpa); 795 + } else if (v->domain && 796 + !iommu_domain_get_attr(v->domain, 797 + DOMAIN_ATTR_GEOMETRY, &geo) && 798 + geo.force_aperture) { 799 + range->first = geo.aperture_start; 800 + range->last = geo.aperture_end; 801 + } else { 802 + range->first = 0; 803 + range->last = ULLONG_MAX; 804 + } 805 + } 806 + 784 807 static int vhost_vdpa_open(struct inode *inode, struct file *filep) 785 808 { 786 809 struct vhost_vdpa *v; ··· 841 822 r = vhost_vdpa_alloc_domain(v); 842 823 if (r) 843 824 goto err_init_iotlb; 825 + 826 + vhost_vdpa_set_iova_range(v); 844 827 845 828 filep->private_data = v; 846 829
+15
include/linux/vdpa.h
··· 53 53 }; 54 54 55 55 /** 56 + * vDPA IOVA range - the IOVA range support by the device 57 + * @first: start of the IOVA range 58 + * @last: end of the IOVA range 59 + */ 60 + struct vdpa_iova_range { 61 + u64 first; 62 + u64 last; 63 + }; 64 + 65 + /** 56 66 * vDPA_config_ops - operations for configuring a vDPA device. 57 67 * Note: vDPA device drivers are required to implement all of the 58 68 * operations unless it is mentioned to be optional in the following ··· 161 151 * @get_generation: Get device config generation (optional) 162 152 * @vdev: vdpa device 163 153 * Returns u32: device generation 154 + * @get_iova_range: Get supported iova range (optional) 155 + * @vdev: vdpa device 156 + * Returns the iova range supported by 157 + * the device. 164 158 * @set_map: Set device memory mapping (optional) 165 159 * Needed for device that using device 166 160 * specific DMA translation (on-chip IOMMU) ··· 230 216 void (*set_config)(struct vdpa_device *vdev, unsigned int offset, 231 217 const void *buf, unsigned int len); 232 218 u32 (*get_generation)(struct vdpa_device *vdev); 219 + struct vdpa_iova_range (*get_iova_range)(struct vdpa_device *vdev); 233 220 234 221 /* DMA ops */ 235 222 int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb);
+4
include/uapi/linux/vhost.h
··· 146 146 147 147 /* Set event fd for config interrupt*/ 148 148 #define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int) 149 + 150 + /* Get the valid iova range */ 151 + #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ 152 + struct vhost_vdpa_iova_range) 149 153 #endif
+9
include/uapi/linux/vhost_types.h
··· 138 138 __u8 buf[0]; 139 139 }; 140 140 141 + /* vhost vdpa IOVA range 142 + * @first: First address that can be mapped by vhost-vDPA 143 + * @last: Last address that can be mapped by vhost-vDPA 144 + */ 145 + struct vhost_vdpa_iova_range { 146 + __u64 first; 147 + __u64 last; 148 + }; 149 + 141 150 /* Feature bits */ 142 151 /* Log all write descriptors. Can be changed while device is active. */ 143 152 #define VHOST_F_LOG_ALL 26