Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:
"Some medium sized bugs in the various drivers. A couple are more
recent regressions:

- Fix two panics in hfi1 and two allocation problems

- Send the IGMP to the correct address in cma

- Squash a syzkaller bug related to races reading the multicast list

- Memory leak in siw and cm

- Fix a corner case spec compliance for HFI/QIB

- Correct the implementation of fences in siw

- Error unwind bug in mlx4"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
RDMA/mlx4: Don't continue event handler after memory allocation failure
RDMA/siw: Fix broken RDMA Read Fence/Resume logic.
IB/rdmavt: Validate remote_addr during loopback atomic tests
IB/cm: Release previously acquired reference counter in the cm_id_priv
RDMA/siw: Fix refcounting leak in siw_create_qp()
RDMA/ucma: Protect mc during concurrent multicast leaves
RDMA/cma: Use correct address when leaving multicast group
IB/hfi1: Fix tstats alloc and dealloc
IB/hfi1: Fix AIP early init panic
IB/hfi1: Fix alloc failure with larger txqueuelen
IB/hfi1: Fix panic with larger ipoib send_queue_size

+95 -64
+1 -1
drivers/infiniband/core/cm.c
··· 3322 3322 ret = cm_init_av_by_path(param->alternate_path, NULL, &alt_av); 3323 3323 if (ret) { 3324 3324 rdma_destroy_ah_attr(&ah_attr); 3325 - return -EINVAL; 3325 + goto deref; 3326 3326 } 3327 3327 3328 3328 spin_lock_irq(&cm_id_priv->lock);
+12 -10
drivers/infiniband/core/cma.c
··· 67 67 [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", 68 68 }; 69 69 70 - static void cma_set_mgid(struct rdma_id_private *id_priv, struct sockaddr *addr, 71 - union ib_gid *mgid); 70 + static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, 71 + enum ib_gid_type gid_type); 72 72 73 73 const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) 74 74 { ··· 1846 1846 if (dev_addr->bound_dev_if) 1847 1847 ndev = dev_get_by_index(dev_addr->net, 1848 1848 dev_addr->bound_dev_if); 1849 - if (ndev) { 1849 + if (ndev && !send_only) { 1850 + enum ib_gid_type gid_type; 1850 1851 union ib_gid mgid; 1851 1852 1852 - cma_set_mgid(id_priv, (struct sockaddr *)&mc->addr, 1853 - &mgid); 1854 - 1855 - if (!send_only) 1856 - cma_igmp_send(ndev, &mgid, false); 1857 - 1858 - dev_put(ndev); 1853 + gid_type = id_priv->cma_dev->default_gid_type 1854 + [id_priv->id.port_num - 1855 + rdma_start_port( 1856 + id_priv->cma_dev->device)]; 1857 + cma_iboe_set_mgid((struct sockaddr *)&mc->addr, &mgid, 1858 + gid_type); 1859 + cma_igmp_send(ndev, &mgid, false); 1859 1860 } 1861 + dev_put(ndev); 1860 1862 1861 1863 cancel_work_sync(&mc->iboe_join.work); 1862 1864 }
+23 -11
drivers/infiniband/core/ucma.c
··· 95 95 u64 uid; 96 96 97 97 struct list_head list; 98 + struct list_head mc_list; 98 99 struct work_struct close_work; 99 100 }; 100 101 ··· 106 105 107 106 u64 uid; 108 107 u8 join_state; 108 + struct list_head list; 109 109 struct sockaddr_storage addr; 110 110 }; 111 111 ··· 200 198 201 199 INIT_WORK(&ctx->close_work, ucma_close_id); 202 200 init_completion(&ctx->comp); 201 + INIT_LIST_HEAD(&ctx->mc_list); 203 202 /* So list_del() will work if we don't do ucma_finish_ctx() */ 204 203 INIT_LIST_HEAD(&ctx->list); 205 204 ctx->file = file; ··· 487 484 488 485 static void ucma_cleanup_multicast(struct ucma_context *ctx) 489 486 { 490 - struct ucma_multicast *mc; 491 - unsigned long index; 487 + struct ucma_multicast *mc, *tmp; 492 488 493 - xa_for_each(&multicast_table, index, mc) { 494 - if (mc->ctx != ctx) 495 - continue; 489 + xa_lock(&multicast_table); 490 + list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { 491 + list_del(&mc->list); 496 492 /* 497 493 * At this point mc->ctx->ref is 0 so the mc cannot leave the 498 494 * lock on the reader and this is enough serialization 499 495 */ 500 - xa_erase(&multicast_table, index); 496 + __xa_erase(&multicast_table, mc->id); 501 497 kfree(mc); 502 498 } 499 + xa_unlock(&multicast_table); 503 500 } 504 501 505 502 static void ucma_cleanup_mc_events(struct ucma_multicast *mc) ··· 1472 1469 mc->uid = cmd->uid; 1473 1470 memcpy(&mc->addr, addr, cmd->addr_size); 1474 1471 1475 - if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, 1472 + xa_lock(&multicast_table); 1473 + if (__xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, 1476 1474 GFP_KERNEL)) { 1477 1475 ret = -ENOMEM; 1478 1476 goto err_free_mc; 1479 1477 } 1478 + 1479 + list_add_tail(&mc->list, &ctx->mc_list); 1480 + xa_unlock(&multicast_table); 1480 1481 1481 1482 mutex_lock(&ctx->mutex); 1482 1483 ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, ··· 1507 1500 mutex_unlock(&ctx->mutex); 1508 1501 ucma_cleanup_mc_events(mc); 1509 1502 err_xa_erase: 1510 - xa_erase(&multicast_table, mc->id); 1503 + xa_lock(&multicast_table); 1504 + list_del(&mc->list); 1505 + __xa_erase(&multicast_table, mc->id); 1511 1506 err_free_mc: 1507 + xa_unlock(&multicast_table); 1512 1508 kfree(mc); 1513 1509 err_put_ctx: 1514 1510 ucma_put_ctx(ctx); ··· 1579 1569 mc = ERR_PTR(-EINVAL); 1580 1570 else if (!refcount_inc_not_zero(&mc->ctx->ref)) 1581 1571 mc = ERR_PTR(-ENXIO); 1582 - else 1583 - __xa_erase(&multicast_table, mc->id); 1584 - xa_unlock(&multicast_table); 1585 1572 1586 1573 if (IS_ERR(mc)) { 1574 + xa_unlock(&multicast_table); 1587 1575 ret = PTR_ERR(mc); 1588 1576 goto out; 1589 1577 } 1578 + 1579 + list_del(&mc->list); 1580 + __xa_erase(&multicast_table, mc->id); 1581 + xa_unlock(&multicast_table); 1590 1582 1591 1583 mutex_lock(&mc->ctx->mutex); 1592 1584 rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
+1 -1
drivers/infiniband/hw/hfi1/ipoib.h
··· 55 55 */ 56 56 struct ipoib_txreq { 57 57 struct sdma_txreq txreq; 58 - struct hfi1_sdma_header sdma_hdr; 58 + struct hfi1_sdma_header *sdma_hdr; 59 59 int sdma_status; 60 60 int complete; 61 61 struct hfi1_ipoib_dev_priv *priv;
+15 -12
drivers/infiniband/hw/hfi1/ipoib_main.c
··· 22 22 int ret; 23 23 24 24 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 25 + if (!dev->tstats) 26 + return -ENOMEM; 25 27 26 28 ret = priv->netdev_ops->ndo_init(dev); 27 29 if (ret) 28 - return ret; 30 + goto out_ret; 29 31 30 32 ret = hfi1_netdev_add_data(priv->dd, 31 33 qpn_from_mac(priv->netdev->dev_addr), 32 34 dev); 33 35 if (ret < 0) { 34 36 priv->netdev_ops->ndo_uninit(dev); 35 - return ret; 37 + goto out_ret; 36 38 } 37 39 38 40 return 0; 41 + out_ret: 42 + free_percpu(dev->tstats); 43 + dev->tstats = NULL; 44 + return ret; 39 45 } 40 46 41 47 static void hfi1_ipoib_dev_uninit(struct net_device *dev) 42 48 { 43 49 struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); 50 + 51 + free_percpu(dev->tstats); 52 + dev->tstats = NULL; 44 53 45 54 hfi1_netdev_remove_data(priv->dd, qpn_from_mac(priv->netdev->dev_addr)); 46 55 ··· 175 166 hfi1_ipoib_rxq_deinit(priv->netdev); 176 167 177 168 free_percpu(dev->tstats); 178 - } 179 - 180 - static void hfi1_ipoib_free_rdma_netdev(struct net_device *dev) 181 - { 182 - hfi1_ipoib_netdev_dtor(dev); 183 - free_netdev(dev); 169 + dev->tstats = NULL; 184 170 } 185 171 186 172 static void hfi1_ipoib_set_id(struct net_device *dev, int id) ··· 215 211 priv->port_num = port_num; 216 212 priv->netdev_ops = netdev->netdev_ops; 217 213 218 - netdev->netdev_ops = &hfi1_ipoib_netdev_ops; 219 - 220 214 ib_query_pkey(device, port_num, priv->pkey_index, &priv->pkey); 221 215 222 216 rc = hfi1_ipoib_txreq_init(priv); 223 217 if (rc) { 224 218 dd_dev_err(dd, "IPoIB netdev TX init - failed(%d)\n", rc); 225 - hfi1_ipoib_free_rdma_netdev(netdev); 226 219 return rc; 227 220 } 228 221 229 222 rc = hfi1_ipoib_rxq_init(netdev); 230 223 if (rc) { 231 224 dd_dev_err(dd, "IPoIB netdev RX init - failed(%d)\n", rc); 232 - hfi1_ipoib_free_rdma_netdev(netdev); 225 + hfi1_ipoib_txreq_deinit(priv); 233 226 return rc; 234 227 } 228 + 229 + netdev->netdev_ops = &hfi1_ipoib_netdev_ops; 235 230 236 231 netdev->priv_destructor = hfi1_ipoib_netdev_dtor; 237 232 netdev->needs_free_netdev = true;
+26 -12
drivers/infiniband/hw/hfi1/ipoib_tx.c
··· 122 122 dd_dev_warn(priv->dd, 123 123 "%s: Status = 0x%x pbc 0x%llx txq = %d sde = %d\n", 124 124 __func__, tx->sdma_status, 125 - le64_to_cpu(tx->sdma_hdr.pbc), tx->txq->q_idx, 125 + le64_to_cpu(tx->sdma_hdr->pbc), tx->txq->q_idx, 126 126 tx->txq->sde->this_idx); 127 127 } 128 128 ··· 231 231 { 232 232 struct hfi1_devdata *dd = txp->dd; 233 233 struct sdma_txreq *txreq = &tx->txreq; 234 - struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr; 234 + struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr; 235 235 u16 pkt_bytes = 236 236 sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2) + tx->skb->len; 237 237 int ret; ··· 256 256 struct ipoib_txparms *txp) 257 257 { 258 258 struct hfi1_ipoib_dev_priv *priv = tx->txq->priv; 259 - struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr; 259 + struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr; 260 260 struct sk_buff *skb = tx->skb; 261 261 struct hfi1_pportdata *ppd = ppd_from_ibp(txp->ibp); 262 262 struct rdma_ah_attr *ah_attr = txp->ah_attr; ··· 483 483 if (likely(!ret)) { 484 484 tx_ok: 485 485 trace_sdma_output_ibhdr(txq->priv->dd, 486 - &tx->sdma_hdr.hdr, 486 + &tx->sdma_hdr->hdr, 487 487 ib_is_sc5(txp->flow.sc5)); 488 488 hfi1_ipoib_check_queue_depth(txq); 489 489 return NETDEV_TX_OK; ··· 547 547 hfi1_ipoib_check_queue_depth(txq); 548 548 549 549 trace_sdma_output_ibhdr(txq->priv->dd, 550 - &tx->sdma_hdr.hdr, 550 + &tx->sdma_hdr->hdr, 551 551 ib_is_sc5(txp->flow.sc5)); 552 552 553 553 if (!netdev_xmit_more()) ··· 683 683 { 684 684 struct net_device *dev = priv->netdev; 685 685 u32 tx_ring_size, tx_item_size; 686 - int i; 686 + struct hfi1_ipoib_circ_buf *tx_ring; 687 + int i, j; 687 688 688 689 /* 689 690 * Ring holds 1 less than tx_ring_size ··· 702 701 703 702 for (i = 0; i < dev->num_tx_queues; i++) { 704 703 struct hfi1_ipoib_txq *txq = &priv->txqs[i]; 704 + struct ipoib_txreq *tx; 705 705 706 + tx_ring = &txq->tx_ring; 706 707 iowait_init(&txq->wait, 707 708 0, 708 709 hfi1_ipoib_flush_txq, ··· 728 725 priv->dd->node); 729 726 730 727 txq->tx_ring.items = 731 - kcalloc_node(tx_ring_size, tx_item_size, 732 - GFP_KERNEL, priv->dd->node); 728 + kvzalloc_node(array_size(tx_ring_size, tx_item_size), 729 + GFP_KERNEL, priv->dd->node); 733 730 if (!txq->tx_ring.items) 734 731 goto free_txqs; 735 732 736 733 txq->tx_ring.max_items = tx_ring_size; 737 - txq->tx_ring.shift = ilog2(tx_ring_size); 734 + txq->tx_ring.shift = ilog2(tx_item_size); 738 735 txq->tx_ring.avail = hfi1_ipoib_ring_hwat(txq); 736 + tx_ring = &txq->tx_ring; 737 + for (j = 0; j < tx_ring_size; j++) 738 + hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr = 739 + kzalloc_node(sizeof(*tx->sdma_hdr), 740 + GFP_KERNEL, priv->dd->node); 739 741 740 742 netif_tx_napi_add(dev, &txq->napi, 741 743 hfi1_ipoib_poll_tx_ring, ··· 754 746 struct hfi1_ipoib_txq *txq = &priv->txqs[i]; 755 747 756 748 netif_napi_del(&txq->napi); 757 - kfree(txq->tx_ring.items); 749 + tx_ring = &txq->tx_ring; 750 + for (j = 0; j < tx_ring_size; j++) 751 + kfree(hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr); 752 + kvfree(tx_ring->items); 758 753 } 759 754 760 755 kfree(priv->txqs); ··· 791 780 792 781 void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv) 793 782 { 794 - int i; 783 + int i, j; 795 784 796 785 for (i = 0; i < priv->netdev->num_tx_queues; i++) { 797 786 struct hfi1_ipoib_txq *txq = &priv->txqs[i]; 787 + struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; 798 788 799 789 iowait_cancel_work(&txq->wait); 800 790 iowait_sdma_drain(&txq->wait); 801 791 hfi1_ipoib_drain_tx_list(txq); 802 792 netif_napi_del(&txq->napi); 803 793 hfi1_ipoib_drain_tx_ring(txq); 804 - kfree(txq->tx_ring.items); 794 + for (j = 0; j < tx_ring->max_items; j++) 795 + kfree(hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr); 796 + kvfree(tx_ring->items); 805 797 } 806 798 807 799 kfree(priv->txqs);
+1 -1
drivers/infiniband/hw/mlx4/main.c
··· 3237 3237 case MLX4_DEV_EVENT_PORT_MGMT_CHANGE: 3238 3238 ew = kmalloc(sizeof *ew, GFP_ATOMIC); 3239 3239 if (!ew) 3240 - break; 3240 + return; 3241 3241 3242 3242 INIT_WORK(&ew->work, handle_port_mgmt_change_event); 3243 3243 memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
+2
drivers/infiniband/sw/rdmavt/qp.c
··· 3073 3073 case IB_WR_ATOMIC_FETCH_AND_ADD: 3074 3074 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) 3075 3075 goto inv_err; 3076 + if (unlikely(wqe->atomic_wr.remote_addr & (sizeof(u64) - 1))) 3077 + goto inv_err; 3076 3078 if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), 3077 3079 wqe->atomic_wr.remote_addr, 3078 3080 wqe->atomic_wr.rkey,
+1 -6
drivers/infiniband/sw/siw/siw.h
··· 644 644 return &qp->orq[qp->orq_get % qp->attrs.orq_size]; 645 645 } 646 646 647 - static inline struct siw_sqe *orq_get_tail(struct siw_qp *qp) 648 - { 649 - return &qp->orq[qp->orq_put % qp->attrs.orq_size]; 650 - } 651 - 652 647 static inline struct siw_sqe *orq_get_free(struct siw_qp *qp) 653 648 { 654 - struct siw_sqe *orq_e = orq_get_tail(qp); 649 + struct siw_sqe *orq_e = &qp->orq[qp->orq_put % qp->attrs.orq_size]; 655 650 656 651 if (READ_ONCE(orq_e->flags) == 0) 657 652 return orq_e;
+11 -9
drivers/infiniband/sw/siw/siw_qp_rx.c
··· 1153 1153 1154 1154 spin_lock_irqsave(&qp->orq_lock, flags); 1155 1155 1156 - rreq = orq_get_current(qp); 1157 - 1158 1156 /* free current orq entry */ 1157 + rreq = orq_get_current(qp); 1159 1158 WRITE_ONCE(rreq->flags, 0); 1159 + 1160 + qp->orq_get++; 1160 1161 1161 1162 if (qp->tx_ctx.orq_fence) { 1162 1163 if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) { ··· 1166 1165 rv = -EPROTO; 1167 1166 goto out; 1168 1167 } 1169 - /* resume SQ processing */ 1168 + /* resume SQ processing, if possible */ 1170 1169 if (tx_waiting->sqe.opcode == SIW_OP_READ || 1171 1170 tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) { 1172 - rreq = orq_get_tail(qp); 1171 + 1172 + /* SQ processing was stopped because of a full ORQ */ 1173 + rreq = orq_get_free(qp); 1173 1174 if (unlikely(!rreq)) { 1174 1175 pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp)); 1175 1176 rv = -EPROTO; ··· 1184 1181 resume_tx = 1; 1185 1182 1186 1183 } else if (siw_orq_empty(qp)) { 1184 + /* 1185 + * SQ processing was stopped by fenced work request. 1186 + * Resume since all previous Read's are now completed. 1187 + */ 1187 1188 qp->tx_ctx.orq_fence = 0; 1188 1189 resume_tx = 1; 1189 - } else { 1190 - pr_warn("siw: [QP %u]: fence resume: orq idx: %d:%d\n", 1191 - qp_id(qp), qp->orq_get, qp->orq_put); 1192 - rv = -EPROTO; 1193 1190 } 1194 1191 } 1195 - qp->orq_get++; 1196 1192 out: 1197 1193 spin_unlock_irqrestore(&qp->orq_lock, flags); 1198 1194
+2 -1
drivers/infiniband/sw/siw/siw_verbs.c
··· 313 313 314 314 if (atomic_inc_return(&sdev->num_qp) > SIW_MAX_QP) { 315 315 siw_dbg(base_dev, "too many QP's\n"); 316 - return -ENOMEM; 316 + rv = -ENOMEM; 317 + goto err_atomic; 317 318 } 318 319 if (attrs->qp_type != IB_QPT_RC) { 319 320 siw_dbg(base_dev, "only RC QP's supported\n");