Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull rdma fixes from Doug Ledford:
"Final set of -rc fixes for 4.6.

I've collected up a number of patches that are all pretty small with
the exception of only a couple. The hfi1 driver has a number of
important patches, and it is what really drives the line count of this
pull request up. These are all small and I've got this kernel built
and running in the test lab (I have most of the hardware, I think nes
is the only thing in this patch set that I can't say I've personally
tested and have up and running).

Summary:

- A number of collected fixes for oopses, memory corruptions,
deadlocks, etc. All of these fixes are small (many only 5-10
lines), obvious, and tested.

- Fix for the security issue related to the use of write for
bi-directional communications"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma:
RDMA/nes: don't leak skb if carrier down
IB/security: Restrict use of the write() interface
IB/hfi1: Use kernel default llseek for ui device
IB/hfi1: Don't attempt to free resources if initialization failed
IB/hfi1: Fix missing lock/unlock in verbs drain callback
IB/rdmavt: Fix send scheduling
IB/hfi1: Prevent unpinning of wrong pages
IB/hfi1: Fix deadlock caused by locking with wrong scope
IB/hfi1: Prevent NULL pointer deferences in caching code
MAINTAINERS: Update iser/isert maintainer contact info
IB/mlx5: Expose correct max_sge_rd limit
RDMA/iw_cxgb4: Fix bar2 virt addr calculation for T4 chips
iw_cxgb4: handle draining an idle qp
iw_cxgb3: initialize ibdev.iwcm->ifname for port mapping
iw_cxgb4: initialize ibdev.iwcm->ifname for port mapping
IB/core: Don't drain non-existent rq queue-pair
IB/core: Fix oops in ib_cache_gid_set_default_gid

+173 -102
+2 -2
MAINTAINERS
··· 6027 6027 6028 6028 ISCSI EXTENSIONS FOR RDMA (ISER) INITIATOR 6029 6029 M: Or Gerlitz <ogerlitz@mellanox.com> 6030 - M: Sagi Grimberg <sagig@mellanox.com> 6030 + M: Sagi Grimberg <sagi@grimberg.me> 6031 6031 M: Roi Dayan <roid@mellanox.com> 6032 6032 L: linux-rdma@vger.kernel.org 6033 6033 S: Supported ··· 6037 6037 F: drivers/infiniband/ulp/iser/ 6038 6038 6039 6039 ISCSI EXTENSIONS FOR RDMA (ISER) TARGET 6040 - M: Sagi Grimberg <sagig@mellanox.com> 6040 + M: Sagi Grimberg <sagi@grimberg.me> 6041 6041 T: git git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git master 6042 6042 L: linux-rdma@vger.kernel.org 6043 6043 L: target-devel@vger.kernel.org
+2 -1
drivers/infiniband/core/cache.c
··· 691 691 NULL); 692 692 693 693 /* Coudn't find default GID location */ 694 - WARN_ON(ix < 0); 694 + if (WARN_ON(ix < 0)) 695 + goto release; 695 696 696 697 zattr_type.gid_type = gid_type; 697 698
+4
drivers/infiniband/core/ucm.c
··· 48 48 49 49 #include <asm/uaccess.h> 50 50 51 + #include <rdma/ib.h> 51 52 #include <rdma/ib_cm.h> 52 53 #include <rdma/ib_user_cm.h> 53 54 #include <rdma/ib_marshall.h> ··· 1103 1102 struct ib_ucm_file *file = filp->private_data; 1104 1103 struct ib_ucm_cmd_hdr hdr; 1105 1104 ssize_t result; 1105 + 1106 + if (WARN_ON_ONCE(!ib_safe_file_access(filp))) 1107 + return -EACCES; 1106 1108 1107 1109 if (len < sizeof(hdr)) 1108 1110 return -EINVAL;
+3
drivers/infiniband/core/ucma.c
··· 1574 1574 struct rdma_ucm_cmd_hdr hdr; 1575 1575 ssize_t ret; 1576 1576 1577 + if (WARN_ON_ONCE(!ib_safe_file_access(filp))) 1578 + return -EACCES; 1579 + 1577 1580 if (len < sizeof(hdr)) 1578 1581 return -EINVAL; 1579 1582
+5
drivers/infiniband/core/uverbs_main.c
··· 48 48 49 49 #include <asm/uaccess.h> 50 50 51 + #include <rdma/ib.h> 52 + 51 53 #include "uverbs.h" 52 54 53 55 MODULE_AUTHOR("Roland Dreier"); ··· 710 708 __u32 flags; 711 709 int srcu_key; 712 710 ssize_t ret; 711 + 712 + if (WARN_ON_ONCE(!ib_safe_file_access(filp))) 713 + return -EACCES; 713 714 714 715 if (count < sizeof hdr) 715 716 return -EINVAL;
+2 -1
drivers/infiniband/core/verbs.c
··· 1860 1860 void ib_drain_qp(struct ib_qp *qp) 1861 1861 { 1862 1862 ib_drain_sq(qp); 1863 - ib_drain_rq(qp); 1863 + if (!qp->srq) 1864 + ib_drain_rq(qp); 1864 1865 } 1865 1866 EXPORT_SYMBOL(ib_drain_qp);
+2
drivers/infiniband/hw/cxgb3/iwch_provider.c
··· 1390 1390 dev->ibdev.iwcm->add_ref = iwch_qp_add_ref; 1391 1391 dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref; 1392 1392 dev->ibdev.iwcm->get_qp = iwch_get_qp; 1393 + memcpy(dev->ibdev.iwcm->ifname, dev->rdev.t3cdev_p->lldev->name, 1394 + sizeof(dev->ibdev.iwcm->ifname)); 1393 1395 1394 1396 ret = ib_register_device(&dev->ibdev, NULL); 1395 1397 if (ret)
+1 -1
drivers/infiniband/hw/cxgb4/cq.c
··· 162 162 cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS, 163 163 &cq->bar2_qid, 164 164 user ? &cq->bar2_pa : NULL); 165 - if (user && !cq->bar2_va) { 165 + if (user && !cq->bar2_pa) { 166 166 pr_warn(MOD "%s: cqid %u not in BAR2 range.\n", 167 167 pci_name(rdev->lldi.pdev), cq->cqid); 168 168 ret = -EINVAL;
+2
drivers/infiniband/hw/cxgb4/provider.c
··· 580 580 dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref; 581 581 dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref; 582 582 dev->ibdev.iwcm->get_qp = c4iw_get_qp; 583 + memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name, 584 + sizeof(dev->ibdev.iwcm->ifname)); 583 585 584 586 ret = ib_register_device(&dev->ibdev, NULL); 585 587 if (ret)
+21 -3
drivers/infiniband/hw/cxgb4/qp.c
··· 185 185 186 186 if (pbar2_pa) 187 187 *pbar2_pa = (rdev->bar2_pa + bar2_qoffset) & PAGE_MASK; 188 + 189 + if (is_t4(rdev->lldi.adapter_type)) 190 + return NULL; 191 + 188 192 return rdev->bar2_kva + bar2_qoffset; 189 193 } 190 194 ··· 274 270 /* 275 271 * User mode must have bar2 access. 276 272 */ 277 - if (user && (!wq->sq.bar2_va || !wq->rq.bar2_va)) { 273 + if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) { 278 274 pr_warn(MOD "%s: sqid %u or rqid %u not in BAR2 range.\n", 279 275 pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid); 280 276 goto free_dma; ··· 1899 1895 void c4iw_drain_sq(struct ib_qp *ibqp) 1900 1896 { 1901 1897 struct c4iw_qp *qp = to_c4iw_qp(ibqp); 1898 + unsigned long flag; 1899 + bool need_to_wait; 1902 1900 1903 - wait_for_completion(&qp->sq_drained); 1901 + spin_lock_irqsave(&qp->lock, flag); 1902 + need_to_wait = !t4_sq_empty(&qp->wq); 1903 + spin_unlock_irqrestore(&qp->lock, flag); 1904 + 1905 + if (need_to_wait) 1906 + wait_for_completion(&qp->sq_drained); 1904 1907 } 1905 1908 1906 1909 void c4iw_drain_rq(struct ib_qp *ibqp) 1907 1910 { 1908 1911 struct c4iw_qp *qp = to_c4iw_qp(ibqp); 1912 + unsigned long flag; 1913 + bool need_to_wait; 1909 1914 1910 - wait_for_completion(&qp->rq_drained); 1915 + spin_lock_irqsave(&qp->lock, flag); 1916 + need_to_wait = !t4_rq_empty(&qp->wq); 1917 + spin_unlock_irqrestore(&qp->lock, flag); 1918 + 1919 + if (need_to_wait) 1920 + wait_for_completion(&qp->rq_drained); 1911 1921 }
+1 -1
drivers/infiniband/hw/mlx5/main.c
··· 530 530 sizeof(struct mlx5_wqe_ctrl_seg)) / 531 531 sizeof(struct mlx5_wqe_data_seg); 532 532 props->max_sge = min(max_rq_sg, max_sq_sg); 533 - props->max_sge_rd = props->max_sge; 533 + props->max_sge_rd = MLX5_MAX_SGE_RD; 534 534 props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); 535 535 props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; 536 536 props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
-3
drivers/infiniband/hw/nes/nes_nic.c
··· 500 500 * skb_shinfo(skb)->nr_frags, skb_is_gso(skb)); 501 501 */ 502 502 503 - if (!netif_carrier_ok(netdev)) 504 - return NETDEV_TX_OK; 505 - 506 503 if (netif_queue_stopped(netdev)) 507 504 return NETDEV_TX_BUSY; 508 505
+5
drivers/infiniband/hw/qib/qib_file_ops.c
··· 45 45 #include <linux/export.h> 46 46 #include <linux/uio.h> 47 47 48 + #include <rdma/ib.h> 49 + 48 50 #include "qib.h" 49 51 #include "qib_common.h" 50 52 #include "qib_user_sdma.h" ··· 2068 2066 struct qib_cmd cmd; 2069 2067 ssize_t ret = 0; 2070 2068 void *dest; 2069 + 2070 + if (WARN_ON_ONCE(!ib_safe_file_access(fp))) 2071 + return -EACCES; 2071 2072 2072 2073 if (count < sizeof(cmd.type)) { 2073 2074 ret = -EINVAL;
+2 -2
drivers/infiniband/sw/rdmavt/qp.c
··· 1637 1637 spin_unlock_irqrestore(&qp->s_hlock, flags); 1638 1638 if (nreq) { 1639 1639 if (call_send) 1640 - rdi->driver_f.schedule_send_no_lock(qp); 1641 - else 1642 1640 rdi->driver_f.do_send(qp); 1641 + else 1642 + rdi->driver_f.schedule_send_no_lock(qp); 1643 1643 } 1644 1644 return err; 1645 1645 }
+1 -1
drivers/staging/rdma/hfi1/TODO
··· 3 3 - Remove unneeded file entries in sysfs 4 4 - Remove software processing of IB protocol and place in library for use 5 5 by qib, ipath (if still present), hfi1, and eventually soft-roce 6 - 6 + - Replace incorrect uAPI
+35 -56
drivers/staging/rdma/hfi1/file_ops.c
··· 49 49 #include <linux/vmalloc.h> 50 50 #include <linux/io.h> 51 51 52 + #include <rdma/ib.h> 53 + 52 54 #include "hfi.h" 53 55 #include "pio.h" 54 56 #include "device.h" ··· 191 189 __u64 user_val = 0; 192 190 int uctxt_required = 1; 193 191 int must_be_root = 0; 192 + 193 + /* FIXME: This interface cannot continue out of staging */ 194 + if (WARN_ON_ONCE(!ib_safe_file_access(fp))) 195 + return -EACCES; 194 196 195 197 if (count < sizeof(cmd)) { 196 198 ret = -EINVAL; ··· 797 791 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 798 792 799 793 dd->rcd[uctxt->ctxt] = NULL; 794 + 795 + hfi1_user_exp_rcv_free(fdata); 796 + hfi1_clear_ctxt_pkey(dd, uctxt->ctxt); 797 + 800 798 uctxt->rcvwait_to = 0; 801 799 uctxt->piowait_to = 0; 802 800 uctxt->rcvnowait = 0; 803 801 uctxt->pionowait = 0; 804 802 uctxt->event_flags = 0; 805 - 806 - hfi1_user_exp_rcv_free(fdata); 807 - hfi1_clear_ctxt_pkey(dd, uctxt->ctxt); 808 803 809 804 hfi1_stats.sps_ctxts--; 810 805 if (++dd->freectxts == dd->num_user_contexts) ··· 1134 1127 1135 1128 static int user_init(struct file *fp) 1136 1129 { 1137 - int ret; 1138 1130 unsigned int rcvctrl_ops = 0; 1139 1131 struct hfi1_filedata *fd = fp->private_data; 1140 1132 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1141 1133 1142 1134 /* make sure that the context has already been setup */ 1143 - if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags)) { 1144 - ret = -EFAULT; 1145 - goto done; 1146 - } 1147 - 1148 - /* 1149 - * Subctxts don't need to initialize anything since master 1150 - * has done it. 1151 - */ 1152 - if (fd->subctxt) { 1153 - ret = wait_event_interruptible(uctxt->wait, !test_bit( 1154 - HFI1_CTXT_MASTER_UNINIT, 1155 - &uctxt->event_flags)); 1156 - goto expected; 1157 - } 1135 + if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags)) 1136 + return -EFAULT; 1158 1137 1159 1138 /* initialize poll variables... */ 1160 1139 uctxt->urgent = 0; ··· 1195 1202 wake_up(&uctxt->wait); 1196 1203 } 1197 1204 1198 - expected: 1199 - /* 1200 - * Expected receive has to be setup for all processes (including 1201 - * shared contexts). However, it has to be done after the master 1202 - * context has been fully configured as it depends on the 1203 - * eager/expected split of the RcvArray entries. 1204 - * Setting it up here ensures that the subcontexts will be waiting 1205 - * (due to the above wait_event_interruptible() until the master 1206 - * is setup. 1207 - */ 1208 - ret = hfi1_user_exp_rcv_init(fp); 1209 - done: 1210 - return ret; 1205 + return 0; 1211 1206 } 1212 1207 1213 1208 static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len) ··· 1242 1261 int ret = 0; 1243 1262 1244 1263 /* 1245 - * Context should be set up only once (including allocation and 1264 + * Context should be set up only once, including allocation and 1246 1265 * programming of eager buffers. This is done if context sharing 1247 1266 * is not requested or by the master process. 1248 1267 */ ··· 1263 1282 if (ret) 1264 1283 goto done; 1265 1284 } 1285 + } else { 1286 + ret = wait_event_interruptible(uctxt->wait, !test_bit( 1287 + HFI1_CTXT_MASTER_UNINIT, 1288 + &uctxt->event_flags)); 1289 + if (ret) 1290 + goto done; 1266 1291 } 1292 + 1267 1293 ret = hfi1_user_sdma_alloc_queues(uctxt, fp); 1294 + if (ret) 1295 + goto done; 1296 + /* 1297 + * Expected receive has to be setup for all processes (including 1298 + * shared contexts). However, it has to be done after the master 1299 + * context has been fully configured as it depends on the 1300 + * eager/expected split of the RcvArray entries. 1301 + * Setting it up here ensures that the subcontexts will be waiting 1302 + * (due to the above wait_event_interruptible() until the master 1303 + * is setup. 1304 + */ 1305 + ret = hfi1_user_exp_rcv_init(fp); 1268 1306 if (ret) 1269 1307 goto done; 1270 1308 ··· 1565 1565 { 1566 1566 struct hfi1_devdata *dd = filp->private_data; 1567 1567 1568 - switch (whence) { 1569 - case SEEK_SET: 1570 - break; 1571 - case SEEK_CUR: 1572 - offset += filp->f_pos; 1573 - break; 1574 - case SEEK_END: 1575 - offset = ((dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE) - 1576 - offset; 1577 - break; 1578 - default: 1579 - return -EINVAL; 1580 - } 1581 - 1582 - if (offset < 0) 1583 - return -EINVAL; 1584 - 1585 - if (offset >= (dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE) 1586 - return -EINVAL; 1587 - 1588 - filp->f_pos = offset; 1589 - 1590 - return filp->f_pos; 1568 + return fixed_size_llseek(filp, offset, whence, 1569 + (dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE); 1591 1570 } 1592 1571 1593 1572 /* NOTE: assumes unsigned long is 8 bytes */
+25 -15
drivers/staging/rdma/hfi1/mmu_rb.c
··· 71 71 struct mm_struct *, 72 72 unsigned long, unsigned long); 73 73 static void mmu_notifier_mem_invalidate(struct mmu_notifier *, 74 + struct mm_struct *, 74 75 unsigned long, unsigned long); 75 76 static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *, 76 77 unsigned long, unsigned long); ··· 138 137 rbnode = rb_entry(node, struct mmu_rb_node, node); 139 138 rb_erase(node, root); 140 139 if (handler->ops->remove) 141 - handler->ops->remove(root, rbnode, false); 140 + handler->ops->remove(root, rbnode, NULL); 142 141 } 143 142 } 144 143 ··· 177 176 return ret; 178 177 } 179 178 180 - /* Caller must host handler lock */ 179 + /* Caller must hold handler lock */ 181 180 static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, 182 181 unsigned long addr, 183 182 unsigned long len) ··· 201 200 return node; 202 201 } 203 202 203 + /* Caller must *not* hold handler lock. */ 204 204 static void __mmu_rb_remove(struct mmu_rb_handler *handler, 205 - struct mmu_rb_node *node, bool arg) 205 + struct mmu_rb_node *node, struct mm_struct *mm) 206 206 { 207 + unsigned long flags; 208 + 207 209 /* Validity of handler and node pointers has been checked by caller. */ 208 210 hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr, 209 211 node->len); 212 + spin_lock_irqsave(&handler->lock, flags); 210 213 __mmu_int_rb_remove(node, handler->root); 214 + spin_unlock_irqrestore(&handler->lock, flags); 215 + 211 216 if (handler->ops->remove) 212 - handler->ops->remove(handler->root, node, arg); 217 + handler->ops->remove(handler->root, node, mm); 213 218 } 214 219 215 220 struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr, ··· 238 231 void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node) 239 232 { 240 233 struct mmu_rb_handler *handler = find_mmu_handler(root); 241 - unsigned long flags; 242 234 243 235 if (!handler || !node) 244 236 return; 245 237 246 - spin_lock_irqsave(&handler->lock, flags); 247 - __mmu_rb_remove(handler, node, false); 248 - spin_unlock_irqrestore(&handler->lock, flags); 238 + __mmu_rb_remove(handler, node, NULL); 249 239 } 250 240 251 241 static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root) ··· 264 260 static inline void mmu_notifier_page(struct mmu_notifier *mn, 265 261 struct mm_struct *mm, unsigned long addr) 266 262 { 267 - mmu_notifier_mem_invalidate(mn, addr, addr + PAGE_SIZE); 263 + mmu_notifier_mem_invalidate(mn, mm, addr, addr + PAGE_SIZE); 268 264 } 269 265 270 266 static inline void mmu_notifier_range_start(struct mmu_notifier *mn, ··· 272 268 unsigned long start, 273 269 unsigned long end) 274 270 { 275 - mmu_notifier_mem_invalidate(mn, start, end); 271 + mmu_notifier_mem_invalidate(mn, mm, start, end); 276 272 } 277 273 278 274 static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, 275 + struct mm_struct *mm, 279 276 unsigned long start, unsigned long end) 280 277 { 281 278 struct mmu_rb_handler *handler = 282 279 container_of(mn, struct mmu_rb_handler, mn); 283 280 struct rb_root *root = handler->root; 284 - struct mmu_rb_node *node; 281 + struct mmu_rb_node *node, *ptr = NULL; 285 282 unsigned long flags; 286 283 287 284 spin_lock_irqsave(&handler->lock, flags); 288 - for (node = __mmu_int_rb_iter_first(root, start, end - 1); node; 289 - node = __mmu_int_rb_iter_next(node, start, end - 1)) { 285 + for (node = __mmu_int_rb_iter_first(root, start, end - 1); 286 + node; node = ptr) { 287 + /* Guard against node removal. */ 288 + ptr = __mmu_int_rb_iter_next(node, start, end - 1); 290 289 hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u", 291 290 node->addr, node->len); 292 - if (handler->ops->invalidate(root, node)) 293 - __mmu_rb_remove(handler, node, true); 291 + if (handler->ops->invalidate(root, node)) { 292 + spin_unlock_irqrestore(&handler->lock, flags); 293 + __mmu_rb_remove(handler, node, mm); 294 + spin_lock_irqsave(&handler->lock, flags); 295 + } 294 296 } 295 297 spin_unlock_irqrestore(&handler->lock, flags); 296 298 }
+2 -1
drivers/staging/rdma/hfi1/mmu_rb.h
··· 59 59 struct mmu_rb_ops { 60 60 bool (*filter)(struct mmu_rb_node *, unsigned long, unsigned long); 61 61 int (*insert)(struct rb_root *, struct mmu_rb_node *); 62 - void (*remove)(struct rb_root *, struct mmu_rb_node *, bool); 62 + void (*remove)(struct rb_root *, struct mmu_rb_node *, 63 + struct mm_struct *); 63 64 int (*invalidate)(struct rb_root *, struct mmu_rb_node *); 64 65 }; 65 66
+2
drivers/staging/rdma/hfi1/qp.c
··· 519 519 * do the flush work until that QP's 520 520 * sdma work has finished. 521 521 */ 522 + spin_lock(&qp->s_lock); 522 523 if (qp->s_flags & RVT_S_WAIT_DMA) { 523 524 qp->s_flags &= ~RVT_S_WAIT_DMA; 524 525 hfi1_schedule_send(qp); 525 526 } 527 + spin_unlock(&qp->s_lock); 526 528 } 527 529 528 530 /**
+7 -4
drivers/staging/rdma/hfi1/user_exp_rcv.c
··· 87 87 static int set_rcvarray_entry(struct file *, unsigned long, u32, 88 88 struct tid_group *, struct page **, unsigned); 89 89 static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *); 90 - static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *, bool); 90 + static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *, 91 + struct mm_struct *); 91 92 static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *); 92 93 static int program_rcvarray(struct file *, unsigned long, struct tid_group *, 93 94 struct tid_pageset *, unsigned, u16, struct page **, ··· 255 254 struct hfi1_ctxtdata *uctxt = fd->uctxt; 256 255 struct tid_group *grp, *gptr; 257 256 257 + if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags)) 258 + return 0; 258 259 /* 259 260 * The notifier would have been removed when the process'es mm 260 261 * was freed. ··· 902 899 if (!node || node->rcventry != (uctxt->expected_base + rcventry)) 903 900 return -EBADF; 904 901 if (HFI1_CAP_IS_USET(TID_UNMAP)) 905 - mmu_rb_remove(&fd->tid_rb_root, &node->mmu, false); 902 + mmu_rb_remove(&fd->tid_rb_root, &node->mmu, NULL); 906 903 else 907 904 hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu); 908 905 ··· 968 965 continue; 969 966 if (HFI1_CAP_IS_USET(TID_UNMAP)) 970 967 mmu_rb_remove(&fd->tid_rb_root, 971 - &node->mmu, false); 968 + &node->mmu, NULL); 972 969 else 973 970 hfi1_mmu_rb_remove(&fd->tid_rb_root, 974 971 &node->mmu); ··· 1035 1032 } 1036 1033 1037 1034 static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node, 1038 - bool notifier) 1035 + struct mm_struct *mm) 1039 1036 { 1040 1037 struct hfi1_filedata *fdata = 1041 1038 container_of(root, struct hfi1_filedata, tid_rb_root);
+22 -11
drivers/staging/rdma/hfi1/user_sdma.c
··· 278 278 static void user_sdma_free_request(struct user_sdma_request *, bool); 279 279 static int pin_vector_pages(struct user_sdma_request *, 280 280 struct user_sdma_iovec *); 281 - static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned); 281 + static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned, 282 + unsigned); 282 283 static int check_header_template(struct user_sdma_request *, 283 284 struct hfi1_pkt_header *, u32, u32); 284 285 static int set_txreq_header(struct user_sdma_request *, ··· 300 299 static void activate_packet_queue(struct iowait *, int); 301 300 static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long); 302 301 static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *); 303 - static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *, bool); 302 + static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *, 303 + struct mm_struct *); 304 304 static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *); 305 305 306 306 static struct mmu_rb_ops sdma_rb_ops = { ··· 1065 1063 rb_node = hfi1_mmu_rb_search(&pq->sdma_rb_root, 1066 1064 (unsigned long)iovec->iov.iov_base, 1067 1065 iovec->iov.iov_len); 1068 - if (rb_node) 1066 + if (rb_node && !IS_ERR(rb_node)) 1069 1067 node = container_of(rb_node, struct sdma_mmu_node, rb); 1068 + else 1069 + rb_node = NULL; 1070 1070 1071 1071 if (!node) { 1072 1072 node = kzalloc(sizeof(*node), GFP_KERNEL); ··· 1111 1107 goto bail; 1112 1108 } 1113 1109 if (pinned != npages) { 1114 - unpin_vector_pages(current->mm, pages, pinned); 1110 + unpin_vector_pages(current->mm, pages, node->npages, 1111 + pinned); 1115 1112 ret = -EFAULT; 1116 1113 goto bail; 1117 1114 } ··· 1152 1147 } 1153 1148 1154 1149 static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, 1155 - unsigned npages) 1150 + unsigned start, unsigned npages) 1156 1151 { 1157 - hfi1_release_user_pages(mm, pages, npages, 0); 1152 + hfi1_release_user_pages(mm, pages + start, npages, 0); 1158 1153 kfree(pages); 1159 1154 } 1160 1155 ··· 1507 1502 &req->pq->sdma_rb_root, 1508 1503 (unsigned long)req->iovs[i].iov.iov_base, 1509 1504 req->iovs[i].iov.iov_len); 1510 - if (!mnode) 1505 + if (!mnode || IS_ERR(mnode)) 1511 1506 continue; 1512 1507 1513 1508 node = container_of(mnode, struct sdma_mmu_node, rb); ··· 1552 1547 } 1553 1548 1554 1549 static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode, 1555 - bool notifier) 1550 + struct mm_struct *mm) 1556 1551 { 1557 1552 struct sdma_mmu_node *node = 1558 1553 container_of(mnode, struct sdma_mmu_node, rb); ··· 1562 1557 node->pq->n_locked -= node->npages; 1563 1558 spin_unlock(&node->pq->evict_lock); 1564 1559 1565 - unpin_vector_pages(notifier ? NULL : current->mm, node->pages, 1560 + /* 1561 + * If mm is set, we are being called by the MMU notifier and we 1562 + * should not pass a mm_struct to unpin_vector_page(). This is to 1563 + * prevent a deadlock when hfi1_release_user_pages() attempts to 1564 + * take the mmap_sem, which the MMU notifier has already taken. 1565 + */ 1566 + unpin_vector_pages(mm ? NULL : current->mm, node->pages, 0, 1566 1567 node->npages); 1567 1568 /* 1568 1569 * If called by the MMU notifier, we have to adjust the pinned 1569 1570 * page count ourselves. 1570 1571 */ 1571 - if (notifier) 1572 - current->mm->pinned_vm -= node->npages; 1572 + if (mm) 1573 + mm->pinned_vm -= node->npages; 1573 1574 kfree(node); 1574 1575 } 1575 1576
+11
include/linux/mlx5/device.h
··· 392 392 MLX5_CAP_OFF_CMDIF_CSUM = 46, 393 393 }; 394 394 395 + enum { 396 + /* 397 + * Max wqe size for rdma read is 512 bytes, so this 398 + * limits our max_sge_rd as the wqe needs to fit: 399 + * - ctrl segment (16 bytes) 400 + * - rdma segment (16 bytes) 401 + * - scatter elements (16 bytes each) 402 + */ 403 + MLX5_MAX_SGE_RD = (512 - 16 - 16) / 16 404 + }; 405 + 395 406 struct mlx5_inbox_hdr { 396 407 __be16 opcode; 397 408 u8 rsvd[4];
+16
include/rdma/ib.h
··· 34 34 #define _RDMA_IB_H 35 35 36 36 #include <linux/types.h> 37 + #include <linux/sched.h> 37 38 38 39 struct ib_addr { 39 40 union { ··· 86 85 __be64 sib_sid_mask; 87 86 __u64 sib_scope_id; 88 87 }; 88 + 89 + /* 90 + * The IB interfaces that use write() as bi-directional ioctl() are 91 + * fundamentally unsafe, since there are lots of ways to trigger "write()" 92 + * calls from various contexts with elevated privileges. That includes the 93 + * traditional suid executable error message writes, but also various kernel 94 + * interfaces that can write to file descriptors. 95 + * 96 + * This function provides protection for the legacy API by restricting the 97 + * calling context. 98 + */ 99 + static inline bool ib_safe_file_access(struct file *filp) 100 + { 101 + return filp->f_cred == current_cred() && segment_eq(get_fs(), USER_DS); 102 + } 89 103 90 104 #endif /* _RDMA_IB_H */