Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:
"One core bug fix and a few driver ones

- FRWR memory registration for hfi1/qib didn't work with with some
iovas causing a NFSoRDMA failure regression due to a fix in the NFS
side

- A command flow error in mlx5 allowed user space to send a corrupt
command (and also smash the kernel stack we've since learned)

- Fix a regression and some bugs with device hot unplug that was
discovered while reviewing Andrea's patches

- hns has a failure if the user asks for certain QP configurations"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
RDMA/hns: Bugfix for mapping user db
RDMA/ucontext: Fix regression with disassociate
RDMA/mlx5: Use rdma_user_map_io for mapping BAR pages
RDMA/mlx5: Do not allow the user to write to the clock page
IB/mlx5: Fix scatter to CQE in DCT QP creation
IB/rdmavt: Fix frwr memory registration

+76 -20
+1
drivers/infiniband/core/uverbs.h
··· 160 160 161 161 struct mutex umap_lock; 162 162 struct list_head umaps; 163 + struct page *disassociate_page; 163 164 164 165 struct idr idr; 165 166 /* spinlock protects write access to idr */
+49 -3
drivers/infiniband/core/uverbs_main.c
··· 208 208 kref_put(&file->async_file->ref, 209 209 ib_uverbs_release_async_event_file); 210 210 put_device(&file->device->dev); 211 + 212 + if (file->disassociate_page) 213 + __free_pages(file->disassociate_page, 0); 211 214 kfree(file); 212 215 } 213 216 ··· 880 877 kfree(priv); 881 878 } 882 879 880 + /* 881 + * Once the zap_vma_ptes has been called touches to the VMA will come here and 882 + * we return a dummy writable zero page for all the pfns. 883 + */ 884 + static vm_fault_t rdma_umap_fault(struct vm_fault *vmf) 885 + { 886 + struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data; 887 + struct rdma_umap_priv *priv = vmf->vma->vm_private_data; 888 + vm_fault_t ret = 0; 889 + 890 + if (!priv) 891 + return VM_FAULT_SIGBUS; 892 + 893 + /* Read only pages can just use the system zero page. */ 894 + if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) { 895 + vmf->page = ZERO_PAGE(vmf->vm_start); 896 + get_page(vmf->page); 897 + return 0; 898 + } 899 + 900 + mutex_lock(&ufile->umap_lock); 901 + if (!ufile->disassociate_page) 902 + ufile->disassociate_page = 903 + alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0); 904 + 905 + if (ufile->disassociate_page) { 906 + /* 907 + * This VMA is forced to always be shared so this doesn't have 908 + * to worry about COW. 909 + */ 910 + vmf->page = ufile->disassociate_page; 911 + get_page(vmf->page); 912 + } else { 913 + ret = VM_FAULT_SIGBUS; 914 + } 915 + mutex_unlock(&ufile->umap_lock); 916 + 917 + return ret; 918 + } 919 + 883 920 static const struct vm_operations_struct rdma_umap_ops = { 884 921 .open = rdma_umap_open, 885 922 .close = rdma_umap_close, 923 + .fault = rdma_umap_fault, 886 924 }; 887 925 888 926 static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext, ··· 932 888 { 933 889 struct ib_uverbs_file *ufile = ucontext->ufile; 934 890 struct rdma_umap_priv *priv; 891 + 892 + if (!(vma->vm_flags & VM_SHARED)) 893 + return ERR_PTR(-EINVAL); 935 894 936 895 if (vma->vm_end - vma->vm_start != size) 937 896 return ERR_PTR(-EINVAL); ··· 1039 992 * at a time to get the lock ordering right. Typically there 1040 993 * will only be one mm, so no big deal. 1041 994 */ 1042 - down_write(&mm->mmap_sem); 995 + down_read(&mm->mmap_sem); 1043 996 if (!mmget_still_valid(mm)) 1044 997 goto skip_mm; 1045 998 mutex_lock(&ufile->umap_lock); ··· 1053 1006 1054 1007 zap_vma_ptes(vma, vma->vm_start, 1055 1008 vma->vm_end - vma->vm_start); 1056 - vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); 1057 1009 } 1058 1010 mutex_unlock(&ufile->umap_lock); 1059 1011 skip_mm: 1060 - up_write(&mm->mmap_sem); 1012 + up_read(&mm->mmap_sem); 1061 1013 mmput(mm); 1062 1014 } 1063 1015 }
+1 -1
drivers/infiniband/hw/hns/hns_roce_qp.c
··· 533 533 534 534 static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr) 535 535 { 536 - if (attr->qp_type == IB_QPT_XRC_TGT) 536 + if (attr->qp_type == IB_QPT_XRC_TGT || !attr->cap.max_send_wr) 537 537 return 0; 538 538 539 539 return 1;
+7 -5
drivers/infiniband/hw/mlx5/main.c
··· 1119 1119 if (MLX5_CAP_GEN(mdev, qp_packet_based)) 1120 1120 resp.flags |= 1121 1121 MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE; 1122 + 1123 + resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT; 1122 1124 } 1123 1125 1124 1126 if (field_avail(typeof(resp), sw_parsing_caps, ··· 2068 2066 2069 2067 if (vma->vm_flags & VM_WRITE) 2070 2068 return -EPERM; 2069 + vma->vm_flags &= ~VM_MAYWRITE; 2071 2070 2072 2071 if (!dev->mdev->clock_info_page) 2073 2072 return -EOPNOTSUPP; ··· 2234 2231 2235 2232 if (vma->vm_flags & VM_WRITE) 2236 2233 return -EPERM; 2234 + vma->vm_flags &= ~VM_MAYWRITE; 2237 2235 2238 2236 /* Don't expose to user-space information it shouldn't have */ 2239 2237 if (PAGE_SIZE > 4096) 2240 2238 return -EOPNOTSUPP; 2241 2239 2242 - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 2243 2240 pfn = (dev->mdev->iseg_base + 2244 2241 offsetof(struct mlx5_init_seg, internal_timer_h)) >> 2245 2242 PAGE_SHIFT; 2246 - if (io_remap_pfn_range(vma, vma->vm_start, pfn, 2247 - PAGE_SIZE, vma->vm_page_prot)) 2248 - return -EAGAIN; 2249 - break; 2243 + return rdma_user_mmap_io(&context->ibucontext, vma, pfn, 2244 + PAGE_SIZE, 2245 + pgprot_noncached(vma->vm_page_prot)); 2250 2246 case MLX5_IB_MMAP_CLOCK_INFO: 2251 2247 return mlx5_ib_mmap_clock_info_page(dev, vma, context); 2252 2248
+7 -4
drivers/infiniband/hw/mlx5/qp.c
··· 1818 1818 1819 1819 rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq); 1820 1820 1821 - if (rcqe_sz == 128) { 1822 - MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE); 1821 + if (init_attr->qp_type == MLX5_IB_QPT_DCT) { 1822 + if (rcqe_sz == 128) 1823 + MLX5_SET(dctc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE); 1824 + 1823 1825 return; 1824 1826 } 1825 1827 1826 - if (init_attr->qp_type != MLX5_IB_QPT_DCT) 1827 - MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE); 1828 + MLX5_SET(qpc, qpc, cs_res, 1829 + rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE : 1830 + MLX5_RES_SCAT_DATA32_CQE); 1828 1831 } 1829 1832 1830 1833 static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev,
+10 -7
drivers/infiniband/sw/rdmavt/mr.c
··· 608 608 if (unlikely(mapped_segs == mr->mr.max_segs)) 609 609 return -ENOMEM; 610 610 611 - if (mr->mr.length == 0) { 612 - mr->mr.user_base = addr; 613 - mr->mr.iova = addr; 614 - } 615 - 616 611 m = mapped_segs / RVT_SEGSZ; 617 612 n = mapped_segs % RVT_SEGSZ; 618 613 mr->mr.map[m]->segs[n].vaddr = (void *)addr; ··· 625 630 * @sg_nents: number of entries in sg 626 631 * @sg_offset: offset in bytes into sg 627 632 * 633 + * Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages. 634 + * 628 635 * Return: number of sg elements mapped to the memory region 629 636 */ 630 637 int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 631 638 int sg_nents, unsigned int *sg_offset) 632 639 { 633 640 struct rvt_mr *mr = to_imr(ibmr); 641 + int ret; 634 642 635 643 mr->mr.length = 0; 636 644 mr->mr.page_shift = PAGE_SHIFT; 637 - return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, 638 - rvt_set_page); 645 + ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page); 646 + mr->mr.user_base = ibmr->iova; 647 + mr->mr.iova = ibmr->iova; 648 + mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr; 649 + mr->mr.length = (size_t)ibmr->length; 650 + return ret; 639 651 } 640 652 641 653 /** ··· 673 671 ibmr->rkey = key; 674 672 mr->mr.lkey = key; 675 673 mr->mr.access_flags = access; 674 + mr->mr.iova = ibmr->iova; 676 675 atomic_set(&mr->mr.lkey_invalid, 0); 677 676 678 677 return 0;
+1
include/uapi/rdma/mlx5-abi.h
··· 238 238 MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0, 239 239 MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1, 240 240 MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE = 1 << 2, 241 + MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT = 1 << 3, 241 242 }; 242 243 243 244 enum mlx5_ib_tunnel_offloads {