Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

+3 -4

drivers/vdpa/Kconfig

··· 30 30 be called ifcvf. 31 31 32 32 config MLX5_VDPA 33 - bool "MLX5 VDPA support library for ConnectX devices" 34 - depends on MLX5_CORE 35 - default n 33 + bool 36 34 help 37 35 Support library for Mellanox VDPA drivers. Provides code that is 38 36 common for all types of VDPA drivers. The following drivers are planned: ··· 38 40 39 41 config MLX5_VDPA_NET 40 42 tristate "vDPA driver for ConnectX devices" 41 - depends on MLX5_VDPA 43 + select MLX5_VDPA 44 + depends on MLX5_CORE 42 45 default n 43 46 help 44 47 VDPA network driver for ConnectX6 and newer. Provides offloading

+14 -6

drivers/vdpa/mlx5/net/mlx5_vnet.c

··· 1133 1133 if (!mvq->initialized) 1134 1134 return; 1135 1135 1136 - if (query_virtqueue(ndev, mvq, &attr)) { 1137 - mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n"); 1138 - return; 1139 - } 1140 1136 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) 1141 1137 return; 1142 1138 1143 1139 if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)) 1144 1140 mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n"); 1141 + 1142 + if (query_virtqueue(ndev, mvq, &attr)) { 1143 + mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n"); 1144 + return; 1145 + } 1146 + mvq->avail_idx = attr.available_index; 1145 1147 } 1146 1148 1147 1149 static void suspend_vqs(struct mlx5_vdpa_net *ndev) ··· 1413 1411 struct mlx5_virtq_attr attr; 1414 1412 int err; 1415 1413 1416 - if (!mvq->initialized) 1417 - return -EAGAIN; 1414 + /* If the virtq object was destroyed, use the value saved at 1415 + * the last minute of suspend_vq. This caters for userspace 1416 + * that cares about emulating the index after vq is stopped. 1417 + */ 1418 + if (!mvq->initialized) { 1419 + state->avail_index = mvq->avail_idx; 1420 + return 0; 1421 + } 1418 1422 1419 1423 err = query_virtqueue(ndev, mvq, &attr); 1420 1424 if (err) {

+77 -50

drivers/vhost/vdpa.c

··· 565 565 perm_to_iommu_flags(perm)); 566 566 } 567 567 568 + if (r) 569 + vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); 570 + 568 571 return r; 569 572 } 570 573 ··· 595 592 struct vhost_dev *dev = &v->vdev; 596 593 struct vhost_iotlb *iotlb = dev->iotlb; 597 594 struct page **page_list; 598 - unsigned long list_size = PAGE_SIZE / sizeof(struct page *); 595 + struct vm_area_struct **vmas; 599 596 unsigned int gup_flags = FOLL_LONGTERM; 600 - unsigned long npages, cur_base, map_pfn, last_pfn = 0; 601 - unsigned long locked, lock_limit, pinned, i; 597 + unsigned long map_pfn, last_pfn = 0; 598 + unsigned long npages, lock_limit; 599 + unsigned long i, nmap = 0; 602 600 u64 iova = msg->iova; 601 + long pinned; 603 602 int ret = 0; 604 603 605 604 if (vhost_iotlb_itree_first(iotlb, msg->iova, 606 605 msg->iova + msg->size - 1)) 607 606 return -EEXIST; 608 - 609 - page_list = (struct page **) __get_free_page(GFP_KERNEL); 610 - if (!page_list) 611 - return -ENOMEM; 612 607 613 608 if (msg->perm & VHOST_ACCESS_WO) 614 609 gup_flags |= FOLL_WRITE; ··· 615 614 if (!npages) 616 615 return -EINVAL; 617 616 617 + page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); 618 + vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *), 619 + GFP_KERNEL); 620 + if (!page_list || !vmas) { 621 + ret = -ENOMEM; 622 + goto free; 623 + } 624 + 618 625 mmap_read_lock(dev->mm); 619 626 620 - locked = atomic64_add_return(npages, &dev->mm->pinned_vm); 621 627 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 622 - 623 - if (locked > lock_limit) { 628 + if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { 624 629 ret = -ENOMEM; 625 - goto out; 630 + goto unlock; 626 631 } 627 632 628 - cur_base = msg->uaddr & PAGE_MASK; 629 - iova &= PAGE_MASK; 630 - 631 - while (npages) { 632 - pinned = min_t(unsigned long, npages, list_size); 633 - ret = pin_user_pages(cur_base, pinned, 634 - gup_flags, page_list, NULL); 635 - if (ret != pinned) 636 - goto out; 637 - 638 - if (!last_pfn) 639 - map_pfn = page_to_pfn(page_list[0]); 640 - 641 - for (i = 0; i < ret; i++) { 642 - unsigned long this_pfn = page_to_pfn(page_list[i]); 643 - u64 csize; 644 - 645 - if (last_pfn && (this_pfn != last_pfn + 1)) { 646 - /* Pin a contiguous chunk of memory */ 647 - csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; 648 - if (vhost_vdpa_map(v, iova, csize, 649 - map_pfn << PAGE_SHIFT, 650 - msg->perm)) 651 - goto out; 652 - map_pfn = this_pfn; 653 - iova += csize; 654 - } 655 - 656 - last_pfn = this_pfn; 633 + pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags, 634 + page_list, vmas); 635 + if (npages != pinned) { 636 + if (pinned < 0) { 637 + ret = pinned; 638 + } else { 639 + unpin_user_pages(page_list, pinned); 640 + ret = -ENOMEM; 657 641 } 658 - 659 - cur_base += ret << PAGE_SHIFT; 660 - npages -= ret; 642 + goto unlock; 661 643 } 662 644 663 - /* Pin the rest chunk */ 664 - ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT, 665 - map_pfn << PAGE_SHIFT, msg->perm); 645 + iova &= PAGE_MASK; 646 + map_pfn = page_to_pfn(page_list[0]); 647 + 648 + /* One more iteration to avoid extra vdpa_map() call out of loop. */ 649 + for (i = 0; i <= npages; i++) { 650 + unsigned long this_pfn; 651 + u64 csize; 652 + 653 + /* The last chunk may have no valid PFN next to it */ 654 + this_pfn = i < npages ? page_to_pfn(page_list[i]) : -1UL; 655 + 656 + if (last_pfn && (this_pfn == -1UL || 657 + this_pfn != last_pfn + 1)) { 658 + /* Pin a contiguous chunk of memory */ 659 + csize = last_pfn - map_pfn + 1; 660 + ret = vhost_vdpa_map(v, iova, csize << PAGE_SHIFT, 661 + map_pfn << PAGE_SHIFT, 662 + msg->perm); 663 + if (ret) { 664 + /* 665 + * Unpin the rest chunks of memory on the 666 + * flight with no corresponding vdpa_map() 667 + * calls having been made yet. On the other 668 + * hand, vdpa_unmap() in the failure path 669 + * is in charge of accounting the number of 670 + * pinned pages for its own. 671 + * This asymmetrical pattern of accounting 672 + * is for efficiency to pin all pages at 673 + * once, while there is no other callsite 674 + * of vdpa_map() than here above. 675 + */ 676 + unpin_user_pages(&page_list[nmap], 677 + npages - nmap); 678 + goto out; 679 + } 680 + atomic64_add(csize, &dev->mm->pinned_vm); 681 + nmap += csize; 682 + iova += csize << PAGE_SHIFT; 683 + map_pfn = this_pfn; 684 + } 685 + last_pfn = this_pfn; 686 + } 687 + 688 + WARN_ON(nmap != npages); 666 689 out: 667 - if (ret) { 690 + if (ret) 668 691 vhost_vdpa_unmap(v, msg->iova, msg->size); 669 - atomic64_sub(npages, &dev->mm->pinned_vm); 670 - } 692 + unlock: 671 693 mmap_read_unlock(dev->mm); 672 - free_page((unsigned long)page_list); 694 + free: 695 + kvfree(vmas); 696 + kvfree(page_list); 673 697 return ret; 674 698 } 675 699 ··· 836 810 837 811 err_init_iotlb: 838 812 vhost_dev_cleanup(&v->vdev); 813 + kfree(vqs); 839 814 err: 840 815 atomic_dec(&v->opened); 841 816 return r;

+23 -10

drivers/vhost/vhost.c

··· 1290 1290 vring_used_t __user *used) 1291 1291 1292 1292 { 1293 + /* If an IOTLB device is present, the vring addresses are 1294 + * GIOVAs. Access validation occurs at prefetch time. */ 1295 + if (vq->iotlb) 1296 + return true; 1297 + 1293 1298 return access_ok(desc, vhost_get_desc_size(vq, num)) && 1294 1299 access_ok(avail, vhost_get_avail_size(vq, num)) && 1295 1300 access_ok(used, vhost_get_used_size(vq, num)); ··· 1370 1365 } 1371 1366 EXPORT_SYMBOL_GPL(vhost_log_access_ok); 1372 1367 1368 + static bool vq_log_used_access_ok(struct vhost_virtqueue *vq, 1369 + void __user *log_base, 1370 + bool log_used, 1371 + u64 log_addr) 1372 + { 1373 + /* If an IOTLB device is present, log_addr is a GIOVA that 1374 + * will never be logged by log_used(). */ 1375 + if (vq->iotlb) 1376 + return true; 1377 + 1378 + return !log_used || log_access_ok(log_base, log_addr, 1379 + vhost_get_used_size(vq, vq->num)); 1380 + } 1381 + 1373 1382 /* Verify access for write logging. */ 1374 1383 /* Caller should have vq mutex and device mutex */ 1375 1384 static bool vq_log_access_ok(struct vhost_virtqueue *vq, ··· 1391 1372 { 1392 1373 return vq_memory_access_ok(log_base, vq->umem, 1393 1374 vhost_has_feature(vq, VHOST_F_LOG_ALL)) && 1394 - (!vq->log_used || log_access_ok(log_base, vq->log_addr, 1395 - vhost_get_used_size(vq, vq->num))); 1375 + vq_log_used_access_ok(vq, log_base, vq->log_used, vq->log_addr); 1396 1376 } 1397 1377 1398 1378 /* Can we start vq? */ ··· 1400 1382 { 1401 1383 if (!vq_log_access_ok(vq, vq->log_base)) 1402 1384 return false; 1403 - 1404 - /* Access validation occurs at prefetch time with IOTLB */ 1405 - if (vq->iotlb) 1406 - return true; 1407 1385 1408 1386 return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used); 1409 1387 } ··· 1530 1516 return -EINVAL; 1531 1517 1532 1518 /* Also validate log access for used ring if enabled. */ 1533 - if ((a.flags & (0x1 << VHOST_VRING_F_LOG)) && 1534 - !log_access_ok(vq->log_base, a.log_guest_addr, 1535 - sizeof *vq->used + 1536 - vq->num * sizeof *vq->used->ring)) 1519 + if (!vq_log_used_access_ok(vq, vq->log_base, 1520 + a.flags & (0x1 << VHOST_VRING_F_LOG), 1521 + a.log_guest_addr)) 1537 1522 return -EINVAL; 1538 1523 } 1539 1524

Configure Feed

Configure Feed