Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

RDMA/bnxt_re: Pull psn buffer dynamically based on prod

Changing the PSN management memory buffers from statically initialized to
dynamic pull scheme.

During create qp only the start pointers are initialized and during
post-send the psn buffer is pulled based on current producer index.

Adjusting post_send code to accommodate dynamic psn-pull and changing
post_recv code to match post-send code wrt pseudo flush wqe generation.

Link: https://lore.kernel.org/r/1594822619-4098-4-git-send-email-devesh.sharma@broadcom.com
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

authored by

Devesh Sharma and committed by
Jason Gunthorpe
5ac5396a 159fb4ce

+74 -58
+71 -58
drivers/infiniband/hw/bnxt_re/qplib_fp.c
··· 929 929 { 930 930 struct bnxt_qplib_hwq *hwq; 931 931 struct bnxt_qplib_q *sq; 932 - u64 fpsne, psne, psn_pg; 933 - u16 indx_pad = 0, indx; 934 - u16 pg_num, pg_indx; 935 - u64 *page; 932 + u64 fpsne, psn_pg; 933 + u16 indx_pad = 0; 936 934 937 935 sq = &qp->sq; 938 936 hwq = &sq->hwq; 939 - 940 - fpsne = (u64)bnxt_qplib_get_qe(hwq, hwq->max_elements, &psn_pg); 937 + fpsne = (u64)bnxt_qplib_get_qe(hwq, hwq->depth, &psn_pg); 941 938 if (!IS_ALIGNED(fpsne, PAGE_SIZE)) 942 939 indx_pad = ALIGN(fpsne, PAGE_SIZE) / size; 943 940 944 - page = (u64 *)psn_pg; 945 - for (indx = 0; indx < hwq->max_elements; indx++) { 946 - pg_num = (indx + indx_pad) / (PAGE_SIZE / size); 947 - pg_indx = (indx + indx_pad) % (PAGE_SIZE / size); 948 - psne = page[pg_num] + pg_indx * size; 949 - sq->swq[indx].psn_ext = (struct sq_psn_search_ext *)psne; 950 - sq->swq[indx].psn_search = (struct sq_psn_search *)psne; 951 - } 941 + hwq->pad_pgofft = indx_pad; 942 + hwq->pad_pg = (u64 *)psn_pg; 943 + hwq->pad_stride = size; 952 944 } 953 945 954 946 int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) ··· 1547 1555 u32 flg_npsn; 1548 1556 u32 op_spsn; 1549 1557 1558 + if (!swq->psn_search) 1559 + return; 1550 1560 psns = swq->psn_search; 1551 1561 psns_ext = swq->psn_ext; 1552 1562 ··· 1568 1574 } 1569 1575 } 1570 1576 1577 + static void bnxt_qplib_pull_psn_buff(struct bnxt_qplib_q *sq, 1578 + struct bnxt_qplib_swq *swq, u32 tail) 1579 + { 1580 + struct bnxt_qplib_hwq *hwq; 1581 + u32 pg_num, pg_indx; 1582 + void *buff; 1583 + 1584 + hwq = &sq->hwq; 1585 + if (!hwq->pad_pg) 1586 + return; 1587 + pg_num = (tail + hwq->pad_pgofft) / (PAGE_SIZE / hwq->pad_stride); 1588 + pg_indx = (tail + hwq->pad_pgofft) % (PAGE_SIZE / hwq->pad_stride); 1589 + buff = (void *)(hwq->pad_pg[pg_num] + pg_indx * hwq->pad_stride); 1590 + swq->psn_ext = buff; 1591 + swq->psn_search = buff; 1592 + } 1593 + 1571 1594 void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp) 1572 1595 { 1573 1596 struct bnxt_qplib_q *sq = &qp->sq; ··· 1599 1588 int i, rc = 0, data_len = 0, pkt_num = 0; 1600 1589 struct bnxt_qplib_q *sq = &qp->sq; 1601 1590 struct sq_send *hw_sq_send_hdr; 1591 + struct bnxt_qplib_hwq *hwq; 1602 1592 struct bnxt_qplib_swq *swq; 1603 1593 bool sch_handler = false; 1604 1594 struct sq_sge *hw_sge; ··· 1607 1595 __le32 temp32; 1608 1596 u32 sw_prod; 1609 1597 1610 - if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS) { 1611 - if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { 1612 - sch_handler = true; 1613 - dev_dbg(&sq->hwq.pdev->dev, 1614 - "%s Error QP. Scheduling for poll_cq\n", 1615 - __func__); 1616 - goto queue_err; 1617 - } 1598 + hwq = &sq->hwq; 1599 + if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS && 1600 + qp->state != CMDQ_MODIFY_QP_NEW_STATE_ERR) { 1601 + dev_err(&hwq->pdev->dev, 1602 + "QPLIB: FP: QP (0x%x) is in the 0x%x state", 1603 + qp->id, qp->state); 1604 + rc = -EINVAL; 1605 + goto done; 1618 1606 } 1619 1607 1620 1608 if (bnxt_qplib_queue_full(sq)) { 1621 - dev_err(&sq->hwq.pdev->dev, 1609 + dev_err(&hwq->pdev->dev, 1622 1610 "prod = %#x cons = %#x qdepth = %#x delta = %#x\n", 1623 - sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements, 1611 + hwq->prod, hwq->cons, hwq->max_elements, 1624 1612 sq->q_full_delta); 1625 1613 rc = -ENOMEM; 1626 1614 goto done; 1627 1615 } 1616 + 1628 1617 sw_prod = sq->hwq.prod; 1629 1618 swq = bnxt_qplib_get_swqe(sq, NULL); 1619 + bnxt_qplib_pull_psn_buff(sq, swq, sw_prod); 1630 1620 swq->wr_id = wqe->wr_id; 1631 1621 swq->type = wqe->type; 1632 1622 swq->flags = wqe->flags; 1623 + swq->start_psn = sq->psn & BTH_PSN_MASK; 1633 1624 if (qp->sig_type) 1634 1625 swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP; 1635 - swq->start_psn = sq->psn & BTH_PSN_MASK; 1636 1626 1637 - hw_sq_send_hdr = bnxt_qplib_get_qe(&sq->hwq, sw_prod, NULL); 1627 + hw_sq_send_hdr = bnxt_qplib_get_qe(hwq, sw_prod, NULL); 1638 1628 memset(hw_sq_send_hdr, 0, sq->wqe_size); 1629 + if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { 1630 + sch_handler = true; 1631 + dev_dbg(&hwq->pdev->dev, 1632 + "%s Error QP. Scheduling for poll_cq\n", __func__); 1633 + goto queue_err; 1634 + } 1639 1635 1640 1636 if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) { 1641 1637 /* Copy the inline data */ 1642 1638 if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) { 1643 - dev_warn(&sq->hwq.pdev->dev, 1639 + dev_warn(&hwq->pdev->dev, 1644 1640 "Inline data length > 96 detected\n"); 1645 1641 data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH; 1646 1642 } else { ··· 1830 1810 goto done; 1831 1811 } 1832 1812 swq->next_psn = sq->psn & BTH_PSN_MASK; 1833 - if (qp->type == CMDQ_CREATE_QP_TYPE_RC) 1834 - bnxt_qplib_fill_psn_search(qp, wqe, swq); 1813 + bnxt_qplib_fill_psn_search(qp, wqe, swq); 1835 1814 queue_err: 1836 - if (sch_handler) { 1837 - /* Store the ULP info in the software structures */ 1838 - sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); 1839 - swq = &sq->swq[sw_prod]; 1840 - swq->wr_id = wqe->wr_id; 1841 - swq->type = wqe->type; 1842 - swq->flags = wqe->flags; 1843 - if (qp->sig_type) 1844 - swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP; 1845 - swq->start_psn = sq->psn & BTH_PSN_MASK; 1846 - } 1847 1815 bnxt_qplib_swq_mod_start(sq, sw_prod); 1848 1816 bnxt_qplib_hwq_incr_prod(&sq->hwq, 1); 1849 1817 qp->wqe_cnt++; 1850 - 1851 1818 done: 1852 1819 if (sch_handler) { 1853 1820 nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC); ··· 1844 1837 INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task); 1845 1838 queue_work(qp->scq->nq->cqn_wq, &nq_work->work); 1846 1839 } else { 1847 - dev_err(&sq->hwq.pdev->dev, 1840 + dev_err(&hwq->pdev->dev, 1848 1841 "FP: Failed to allocate SQ nq_work!\n"); 1849 1842 rc = -ENOMEM; 1850 1843 } ··· 1865 1858 struct bnxt_qplib_nq_work *nq_work = NULL; 1866 1859 struct bnxt_qplib_q *rq = &qp->rq; 1867 1860 struct bnxt_qplib_swq *swq; 1861 + struct bnxt_qplib_hwq *hwq; 1868 1862 bool sch_handler = false; 1869 1863 struct sq_sge *hw_sge; 1870 1864 struct rq_wqe *rqe; 1871 1865 int i, rc = 0; 1872 1866 u32 sw_prod; 1873 1867 1874 - if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { 1875 - sch_handler = true; 1876 - dev_dbg(&rq->hwq.pdev->dev, 1877 - "%s: Error QP. Scheduling for poll_cq\n", __func__); 1878 - goto queue_err; 1868 + hwq = &rq->hwq; 1869 + if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_RESET) { 1870 + dev_err(&hwq->pdev->dev, 1871 + "QPLIB: FP: QP (0x%x) is in the 0x%x state", 1872 + qp->id, qp->state); 1873 + rc = -EINVAL; 1874 + goto done; 1879 1875 } 1876 + 1880 1877 if (bnxt_qplib_queue_full(rq)) { 1881 - dev_err(&rq->hwq.pdev->dev, 1878 + dev_err(&hwq->pdev->dev, 1882 1879 "FP: QP (0x%x) RQ is full!\n", qp->id); 1883 1880 rc = -EINVAL; 1884 1881 goto done; 1885 1882 } 1883 + 1886 1884 sw_prod = rq->hwq.prod; 1887 1885 swq = bnxt_qplib_get_swqe(rq, NULL); 1888 1886 swq->wr_id = wqe->wr_id; 1889 1887 1890 - rqe = bnxt_qplib_get_qe(&rq->hwq, sw_prod, NULL); 1888 + if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { 1889 + sch_handler = true; 1890 + dev_dbg(&hwq->pdev->dev, 1891 + "%s: Error QP. Scheduling for poll_cq\n", __func__); 1892 + goto queue_err; 1893 + } 1894 + 1895 + rqe = bnxt_qplib_get_qe(hwq, sw_prod, NULL); 1891 1896 memset(rqe, 0, rq->wqe_size); 1892 1897 1893 1898 /* Calculate wqe_size16 and data_len */ ··· 1923 1904 rqe->wr_id[0] = cpu_to_le32(sw_prod); 1924 1905 1925 1906 queue_err: 1926 - if (sch_handler) { 1927 - /* Store the ULP info in the software structures */ 1928 - sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); 1929 - swq = bnxt_qplib_get_swqe(rq, NULL); 1930 - swq->wr_id = wqe->wr_id; 1931 - } 1932 - 1933 1907 bnxt_qplib_swq_mod_start(rq, sw_prod); 1934 1908 bnxt_qplib_hwq_incr_prod(&rq->hwq, 1); 1909 + done: 1935 1910 if (sch_handler) { 1936 1911 nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC); 1937 1912 if (nq_work) { ··· 1934 1921 INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task); 1935 1922 queue_work(qp->rcq->nq->cqn_wq, &nq_work->work); 1936 1923 } else { 1937 - dev_err(&rq->hwq.pdev->dev, 1924 + dev_err(&hwq->pdev->dev, 1938 1925 "FP: Failed to allocate RQ nq_work!\n"); 1939 1926 rc = -ENOMEM; 1940 1927 } 1941 1928 } 1942 - done: 1929 + 1943 1930 return rc; 1944 1931 } 1945 1932
+3
drivers/infiniband/hw/bnxt_re/qplib_res.h
··· 163 163 u32 cons; /* raw */ 164 164 u8 cp_bit; 165 165 u8 is_user; 166 + u64 *pad_pg; 167 + u32 pad_stride; 168 + u32 pad_pgofft; 166 169 }; 167 170 168 171 struct bnxt_qplib_db_info {