Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:
"Primarily rtrs and irdma fixes:

- Fix uninitialized value in ib_get_eth_speed()

- Fix hns refusing to work if userspace doesn't select the correct
congestion control algorithm

- Several irdma fixes - unreliable Send Queue Drain, use after free,
64k page size bugs, device removal races

- Several rtrs bug fixes - crashes, memory leaks, use after free, bad
credit accounting, bogus WARN_ON

- Typos and a MAINTAINER update"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
RDMA/irdma: Avoid free the non-cqp_request scratch
RDMA/irdma: Fix support for 64k pages
RDMA/irdma: Ensure iWarp QP queue memory is OS paged aligned
RDMA/core: Fix umem iterator when PAGE_SIZE is greater then HCA pgsz
RDMA/irdma: Fix UAF in irdma_sc_ccq_get_cqe_info()
RDMA/bnxt_re: Correct module description string
RDMA/rtrs-clt: Remove the warnings for req in_use check
RDMA/rtrs-clt: Fix the max_send_wr setting
RDMA/rtrs-srv: Destroy path files after making sure no IOs in-flight
RDMA/rtrs-srv: Free srv_mr iu only when always_invalidate is true
RDMA/rtrs-srv: Check return values while processing info request
RDMA/rtrs-clt: Start hb after path_up
RDMA/rtrs-srv: Do not unconditionally enable irq
MAINTAINERS: Add Chengchang Tang as Hisilicon RoCE maintainer
RDMA/irdma: Add wait for suspend on SQD
RDMA/irdma: Do not modify to SQD on error
RDMA/hns: Fix unnecessary err return when using invalid congest control algorithm
RDMA/core: Fix uninit-value access in ib_get_eth_speed()

+90 -44
+1
MAINTAINERS
··· 9573 9573 F: include/linux/hisi_acc_qm.h 9574 9574 9575 9575 HISILICON ROCE DRIVER 9576 + M: Chengchang Tang <tangchengchang@huawei.com> 9576 9577 M: Junxian Huang <huangjunxian6@hisilicon.com> 9577 9578 L: linux-rdma@vger.kernel.org 9578 9579 S: Maintained
-6
drivers/infiniband/core/umem.c
··· 96 96 return page_size; 97 97 } 98 98 99 - /* rdma_for_each_block() has a bug if the page size is smaller than the 100 - * page size used to build the umem. For now prevent smaller page sizes 101 - * from being returned. 102 - */ 103 - pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT); 104 - 105 99 /* The best result is the smallest page size that results in the minimum 106 100 * number of required pages. Compute the largest page size that could 107 101 * work based on VA address bits that don't change.
+1 -1
drivers/infiniband/core/verbs.c
··· 1971 1971 int rc; 1972 1972 u32 netdev_speed; 1973 1973 struct net_device *netdev; 1974 - struct ethtool_link_ksettings lksettings; 1974 + struct ethtool_link_ksettings lksettings = {}; 1975 1975 1976 1976 if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET) 1977 1977 return -EINVAL;
+1 -1
drivers/infiniband/hw/bnxt_re/main.c
··· 71 71 BNXT_RE_DESC "\n"; 72 72 73 73 MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>"); 74 - MODULE_DESCRIPTION(BNXT_RE_DESC " Driver"); 74 + MODULE_DESCRIPTION(BNXT_RE_DESC); 75 75 MODULE_LICENSE("Dual BSD/GPL"); 76 76 77 77 /* globals */
+9 -4
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
··· 4760 4760 cong_alg->wnd_mode_sel = WND_LIMIT; 4761 4761 break; 4762 4762 default: 4763 - ibdev_err(&hr_dev->ib_dev, 4764 - "error type(%u) for congestion selection.\n", 4765 - hr_dev->caps.cong_type); 4766 - return -EINVAL; 4763 + ibdev_warn(&hr_dev->ib_dev, 4764 + "invalid type(%u) for congestion selection.\n", 4765 + hr_dev->caps.cong_type); 4766 + hr_dev->caps.cong_type = CONG_TYPE_DCQCN; 4767 + cong_alg->alg_sel = CONG_DCQCN; 4768 + cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL; 4769 + cong_alg->dip_vld = DIP_INVALID; 4770 + cong_alg->wnd_mode_sel = WND_LIMIT; 4771 + break; 4767 4772 } 4768 4773 4769 4774 return 0;
+9 -7
drivers/infiniband/hw/irdma/hw.c
··· 321 321 break; 322 322 case IRDMA_AE_QP_SUSPEND_COMPLETE: 323 323 if (iwqp->iwdev->vsi.tc_change_pending) { 324 - atomic_dec(&iwqp->sc_qp.vsi->qp_suspend_reqs); 324 + if (!atomic_dec_return(&qp->vsi->qp_suspend_reqs)) 325 + wake_up(&iwqp->iwdev->suspend_wq); 326 + } 327 + if (iwqp->suspend_pending) { 328 + iwqp->suspend_pending = false; 325 329 wake_up(&iwqp->iwdev->suspend_wq); 326 330 } 327 331 break; ··· 585 581 struct irdma_cqp *cqp = &rf->cqp; 586 582 int status = 0; 587 583 588 - if (rf->cqp_cmpl_wq) 589 - destroy_workqueue(rf->cqp_cmpl_wq); 590 - 591 584 status = irdma_sc_cqp_destroy(dev->cqp); 592 585 if (status) 593 586 ibdev_dbg(to_ibdev(dev), "ERR: Destroy CQP failed %d\n", status); ··· 748 747 struct irdma_sc_dev *dev = &rf->sc_dev; 749 748 struct irdma_ccq *ccq = &rf->ccq; 750 749 int status = 0; 750 + 751 + if (rf->cqp_cmpl_wq) 752 + destroy_workqueue(rf->cqp_cmpl_wq); 751 753 752 754 if (!rf->reset) 753 755 status = irdma_sc_ccq_destroy(dev->ccq, 0, true); ··· 1184 1180 int status; 1185 1181 struct irdma_ceq_init_info info = {}; 1186 1182 struct irdma_sc_dev *dev = &rf->sc_dev; 1187 - u64 scratch; 1188 1183 u32 ceq_size; 1189 1184 1190 1185 info.ceq_id = ceq_id; ··· 1204 1201 iwceq->sc_ceq.ceq_id = ceq_id; 1205 1202 info.dev = dev; 1206 1203 info.vsi = vsi; 1207 - scratch = (uintptr_t)&rf->cqp.sc_cqp; 1208 1204 status = irdma_sc_ceq_init(&iwceq->sc_ceq, &info); 1209 1205 if (!status) { 1210 1206 if (dev->ceq_valid) 1211 1207 status = irdma_cqp_ceq_cmd(&rf->sc_dev, &iwceq->sc_ceq, 1212 1208 IRDMA_OP_CEQ_CREATE); 1213 1209 else 1214 - status = irdma_sc_cceq_create(&iwceq->sc_ceq, scratch); 1210 + status = irdma_sc_cceq_create(&iwceq->sc_ceq, 0); 1215 1211 } 1216 1212 1217 1213 if (status) {
+1 -1
drivers/infiniband/hw/irdma/main.c
··· 48 48 /* Wait for all qp's to suspend */ 49 49 wait_event_timeout(iwdev->suspend_wq, 50 50 !atomic_read(&iwdev->vsi.qp_suspend_reqs), 51 - IRDMA_EVENT_TIMEOUT); 51 + msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS)); 52 52 irdma_ws_reset(&iwdev->vsi); 53 53 } 54 54
+1 -1
drivers/infiniband/hw/irdma/main.h
··· 78 78 79 79 #define MAX_DPC_ITERATIONS 128 80 80 81 - #define IRDMA_EVENT_TIMEOUT 50000 81 + #define IRDMA_EVENT_TIMEOUT_MS 5000 82 82 #define IRDMA_VCHNL_EVENT_TIMEOUT 100000 83 83 #define IRDMA_RST_TIMEOUT_HZ 4 84 84
+27 -8
drivers/infiniband/hw/irdma/verbs.c
··· 1157 1157 return prio; 1158 1158 } 1159 1159 1160 + static int irdma_wait_for_suspend(struct irdma_qp *iwqp) 1161 + { 1162 + if (!wait_event_timeout(iwqp->iwdev->suspend_wq, 1163 + !iwqp->suspend_pending, 1164 + msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS))) { 1165 + iwqp->suspend_pending = false; 1166 + ibdev_warn(&iwqp->iwdev->ibdev, 1167 + "modify_qp timed out waiting for suspend. qp_id = %d, last_ae = 0x%x\n", 1168 + iwqp->ibqp.qp_num, iwqp->last_aeq); 1169 + return -EBUSY; 1170 + } 1171 + 1172 + return 0; 1173 + } 1174 + 1160 1175 /** 1161 1176 * irdma_modify_qp_roce - modify qp request 1162 1177 * @ibqp: qp's pointer for modify ··· 1435 1420 1436 1421 info.next_iwarp_state = IRDMA_QP_STATE_SQD; 1437 1422 issue_modify_qp = 1; 1423 + iwqp->suspend_pending = true; 1438 1424 break; 1439 1425 case IB_QPS_SQE: 1440 1426 case IB_QPS_ERR: 1441 1427 case IB_QPS_RESET: 1442 - if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS) { 1443 - spin_unlock_irqrestore(&iwqp->lock, flags); 1444 - info.next_iwarp_state = IRDMA_QP_STATE_SQD; 1445 - irdma_hw_modify_qp(iwdev, iwqp, &info, true); 1446 - spin_lock_irqsave(&iwqp->lock, flags); 1447 - } 1448 - 1449 1428 if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) { 1450 1429 spin_unlock_irqrestore(&iwqp->lock, flags); 1451 1430 if (udata && udata->inlen) { ··· 1476 1467 ctx_info->rem_endpoint_idx = udp_info->arp_idx; 1477 1468 if (irdma_hw_modify_qp(iwdev, iwqp, &info, true)) 1478 1469 return -EINVAL; 1470 + if (info.next_iwarp_state == IRDMA_QP_STATE_SQD) { 1471 + ret = irdma_wait_for_suspend(iwqp); 1472 + if (ret) 1473 + return ret; 1474 + } 1479 1475 spin_lock_irqsave(&iwqp->lock, flags); 1480 1476 if (iwqp->iwarp_state == info.curr_iwarp_state) { 1481 1477 iwqp->iwarp_state = info.next_iwarp_state; ··· 2914 2900 iwmr->type = reg_type; 2915 2901 2916 2902 pgsz_bitmap = (reg_type == IRDMA_MEMREG_TYPE_MEM) ? 2917 - iwdev->rf->sc_dev.hw_attrs.page_size_cap : PAGE_SIZE; 2903 + iwdev->rf->sc_dev.hw_attrs.page_size_cap : SZ_4K; 2918 2904 2919 2905 iwmr->page_size = ib_umem_find_best_pgsz(region, pgsz_bitmap, virt); 2920 2906 if (unlikely(!iwmr->page_size)) { ··· 2945 2931 u32 total; 2946 2932 int err; 2947 2933 u8 lvl; 2934 + 2935 + /* iWarp: Catch page not starting on OS page boundary */ 2936 + if (!rdma_protocol_roce(&iwdev->ibdev, 1) && 2937 + ib_umem_offset(iwmr->region)) 2938 + return -EINVAL; 2948 2939 2949 2940 total = req.sq_pages + req.rq_pages + 1; 2950 2941 if (total > iwmr->page_cnt)
+1
drivers/infiniband/hw/irdma/verbs.h
··· 198 198 u8 flush_issued : 1; 199 199 u8 sig_all : 1; 200 200 u8 pau_mode : 1; 201 + u8 suspend_pending : 1; 201 202 u8 rsvd : 1; 202 203 u8 iwarp_state; 203 204 u16 term_sq_flush_code;
+3 -4
drivers/infiniband/ulp/rtrs/rtrs-clt.c
··· 384 384 struct rtrs_clt_path *clt_path; 385 385 int err; 386 386 387 - if (WARN_ON(!req->in_use)) 387 + if (!req->in_use) 388 388 return; 389 389 if (WARN_ON(!req->con)) 390 390 return; ··· 1699 1699 clt_path->s.dev_ref++; 1700 1700 max_send_wr = min_t(int, wr_limit, 1701 1701 /* QD * (REQ + RSP + FR REGS or INVS) + drain */ 1702 - clt_path->queue_depth * 3 + 1); 1702 + clt_path->queue_depth * 4 + 1); 1703 1703 max_recv_wr = min_t(int, wr_limit, 1704 1704 clt_path->queue_depth * 3 + 1); 1705 1705 max_send_sge = 2; ··· 2350 2350 if (err) 2351 2351 goto destroy; 2352 2352 2353 - rtrs_start_hb(&clt_path->s); 2354 - 2355 2353 return 0; 2356 2354 2357 2355 destroy: ··· 2623 2625 goto out; 2624 2626 } 2625 2627 rtrs_clt_path_up(clt_path); 2628 + rtrs_start_hb(&clt_path->s); 2626 2629 out: 2627 2630 mutex_unlock(&clt_path->init_mutex); 2628 2631
+27 -10
drivers/infiniband/ulp/rtrs/rtrs-srv.c
··· 65 65 { 66 66 enum rtrs_srv_state old_state; 67 67 bool changed = false; 68 + unsigned long flags; 68 69 69 - spin_lock_irq(&srv_path->state_lock); 70 + spin_lock_irqsave(&srv_path->state_lock, flags); 70 71 old_state = srv_path->state; 71 72 switch (new_state) { 72 73 case RTRS_SRV_CONNECTED: ··· 88 87 } 89 88 if (changed) 90 89 srv_path->state = new_state; 91 - spin_unlock_irq(&srv_path->state_lock); 90 + spin_unlock_irqrestore(&srv_path->state_lock, flags); 92 91 93 92 return changed; 94 93 } ··· 551 550 struct rtrs_srv_mr *srv_mr; 552 551 553 552 srv_mr = &srv_path->mrs[i]; 554 - rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1); 553 + 554 + if (always_invalidate) 555 + rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1); 556 + 555 557 ib_dereg_mr(srv_mr->mr); 556 558 ib_dma_unmap_sg(srv_path->s.dev->ib_dev, srv_mr->sgt.sgl, 557 559 srv_mr->sgt.nents, DMA_BIDIRECTIONAL); ··· 713 709 WARN_ON(wc->opcode != IB_WC_SEND); 714 710 } 715 711 716 - static void rtrs_srv_path_up(struct rtrs_srv_path *srv_path) 712 + static int rtrs_srv_path_up(struct rtrs_srv_path *srv_path) 717 713 { 718 714 struct rtrs_srv_sess *srv = srv_path->srv; 719 715 struct rtrs_srv_ctx *ctx = srv->ctx; 720 - int up; 716 + int up, ret = 0; 721 717 722 718 mutex_lock(&srv->paths_ev_mutex); 723 719 up = ++srv->paths_up; 724 720 if (up == 1) 725 - ctx->ops.link_ev(srv, RTRS_SRV_LINK_EV_CONNECTED, NULL); 721 + ret = ctx->ops.link_ev(srv, RTRS_SRV_LINK_EV_CONNECTED, NULL); 726 722 mutex_unlock(&srv->paths_ev_mutex); 727 723 728 724 /* Mark session as established */ 729 - srv_path->established = true; 725 + if (!ret) 726 + srv_path->established = true; 727 + 728 + return ret; 730 729 } 731 730 732 731 static void rtrs_srv_path_down(struct rtrs_srv_path *srv_path) ··· 858 851 goto iu_free; 859 852 kobject_get(&srv_path->kobj); 860 853 get_device(&srv_path->srv->dev); 861 - rtrs_srv_change_state(srv_path, RTRS_SRV_CONNECTED); 854 + err = rtrs_srv_change_state(srv_path, RTRS_SRV_CONNECTED); 855 + if (!err) { 856 + rtrs_err(s, "rtrs_srv_change_state(), err: %d\n", err); 857 + goto iu_free; 858 + } 859 + 862 860 rtrs_srv_start_hb(srv_path); 863 861 864 862 /* ··· 872 860 * all connections are successfully established. Thus, simply notify 873 861 * listener with a proper event if we are the first path. 874 862 */ 875 - rtrs_srv_path_up(srv_path); 863 + err = rtrs_srv_path_up(srv_path); 864 + if (err) { 865 + rtrs_err(s, "rtrs_srv_path_up(), err: %d\n", err); 866 + goto iu_free; 867 + } 876 868 877 869 ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev, 878 870 tx_iu->dma_addr, ··· 1532 1516 1533 1517 srv_path = container_of(work, typeof(*srv_path), close_work); 1534 1518 1535 - rtrs_srv_destroy_path_files(srv_path); 1536 1519 rtrs_srv_stop_hb(srv_path); 1537 1520 1538 1521 for (i = 0; i < srv_path->s.con_num; i++) { ··· 1550 1535 1551 1536 /* Wait for all completion */ 1552 1537 wait_for_completion(&srv_path->complete_done); 1538 + 1539 + rtrs_srv_destroy_path_files(srv_path); 1553 1540 1554 1541 /* Notify upper layer if we are the last path */ 1555 1542 rtrs_srv_path_down(srv_path);
+8 -1
include/rdma/ib_umem.h
··· 77 77 { 78 78 __rdma_block_iter_start(biter, umem->sgt_append.sgt.sgl, 79 79 umem->sgt_append.sgt.nents, pgsz); 80 + biter->__sg_advance = ib_umem_offset(umem) & ~(pgsz - 1); 81 + biter->__sg_numblocks = ib_umem_num_dma_blocks(umem, pgsz); 82 + } 83 + 84 + static inline bool __rdma_umem_block_iter_next(struct ib_block_iter *biter) 85 + { 86 + return __rdma_block_iter_next(biter) && biter->__sg_numblocks--; 80 87 } 81 88 82 89 /** ··· 99 92 */ 100 93 #define rdma_umem_for_each_dma_block(umem, biter, pgsz) \ 101 94 for (__rdma_umem_block_iter_start(biter, umem, pgsz); \ 102 - __rdma_block_iter_next(biter);) 95 + __rdma_umem_block_iter_next(biter);) 103 96 104 97 #ifdef CONFIG_INFINIBAND_USER_MEM 105 98
+1
include/rdma/ib_verbs.h
··· 2850 2850 /* internal states */ 2851 2851 struct scatterlist *__sg; /* sg holding the current aligned block */ 2852 2852 dma_addr_t __dma_addr; /* unaligned DMA address of this block */ 2853 + size_t __sg_numblocks; /* ib_umem_num_dma_blocks() */ 2853 2854 unsigned int __sg_nents; /* number of SG entries */ 2854 2855 unsigned int __sg_advance; /* number of bytes to advance in sg in next step */ 2855 2856 unsigned int __pg_bit; /* alignment of current block */