Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

net/mlx5: Reimplement write combining test

The test of write combining was added before in mlx5_ib driver. It
opens UD QP and posts NOP WQEs, and uses BlueFlame doorbell. When
BlueFlame is used, WQEs get written directly to a PCI BAR of the
device (in addition to memory) so that the device handles them without
having to access memory.

In this test, the WQEs written in memory are different from the ones
written to the BlueFlame which request CQE update. By checking the
completion reports posted on CQ, we can know if BlueFlame succeeds or
not. The write combining must be supported if BlueFlame succeeds as
its register is written using write combining.

This patch reimplements the test in the same way, but using a pair of
SQ and CQ only. It is moved to mlx5_core as a general feature used by
both mlx5_core and mlx5_ib.

Besides, save write combine test result of the PCI function, so that
its thousands of child functions such as SF can query without paying
the time and resource penalty by itself. The test function is called
only after failing to get the cached result. With this enhancement,
all thousands of SFs of the PF attached to same driver no longer need
to perform WC check explicitly, which is already done in the system.
This saves several commands per SF, thereby speeds up SF creation and
also saves completion EQ creation.

Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://lore.kernel.org/r/4ff5a8cc4c5b5b0d98397baa45a5019bcdbf096e.1717409369.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>

authored by

Jianbo Liu and committed by
Leon Romanovsky
d98995b4 83a7eefe

+451 -234
+3 -16
drivers/infiniband/hw/mlx5/main.c
··· 1810 1810 } 1811 1811 1812 1812 resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); 1813 - if (dev->wc_support) 1813 + if (mlx5_wc_support_get(dev->mdev)) 1814 1814 resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, 1815 1815 log_bf_reg_size); 1816 1816 resp->cache_line_size = cache_line_size(); ··· 2337 2337 switch (command) { 2338 2338 case MLX5_IB_MMAP_WC_PAGE: 2339 2339 case MLX5_IB_MMAP_ALLOC_WC: 2340 - if (!dev->wc_support) 2340 + if (!mlx5_wc_support_get(dev->mdev)) 2341 2341 return -EPERM; 2342 2342 fallthrough; 2343 2343 case MLX5_IB_MMAP_NC_PAGE: ··· 3612 3612 alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC) 3613 3613 return -EOPNOTSUPP; 3614 3614 3615 - if (!to_mdev(c->ibucontext.device)->wc_support && 3615 + if (!mlx5_wc_support_get(to_mdev(c->ibucontext.device)->mdev) && 3616 3616 alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF) 3617 3617 return -EOPNOTSUPP; 3618 3618 ··· 3766 3766 return err; 3767 3767 } 3768 3768 3769 - static int mlx5_ib_enable_driver(struct ib_device *dev) 3770 - { 3771 - struct mlx5_ib_dev *mdev = to_mdev(dev); 3772 - int ret; 3773 - 3774 - ret = mlx5_ib_test_wc(mdev); 3775 - mlx5_ib_dbg(mdev, "Write-Combining %s", 3776 - mdev->wc_support ? "supported" : "not supported"); 3777 - 3778 - return ret; 3779 - } 3780 - 3781 3769 static const struct ib_device_ops mlx5_ib_dev_ops = { 3782 3770 .owner = THIS_MODULE, 3783 3771 .driver_id = RDMA_DRIVER_MLX5, ··· 3796 3808 .drain_rq = mlx5_ib_drain_rq, 3797 3809 .drain_sq = mlx5_ib_drain_sq, 3798 3810 .device_group = &mlx5_attr_group, 3799 - .enable_driver = mlx5_ib_enable_driver, 3800 3811 .get_dev_fw_str = get_dev_fw_str, 3801 3812 .get_dma_mr = mlx5_ib_get_dma_mr, 3802 3813 .get_link_layer = mlx5_ib_port_link_layer,
-198
drivers/infiniband/hw/mlx5/mem.c
··· 30 30 * SOFTWARE. 31 31 */ 32 32 33 - #include <linux/io.h> 34 33 #include <rdma/ib_umem_odp.h> 35 34 #include "mlx5_ib.h" 36 - #include <linux/jiffies.h> 37 35 38 36 /* 39 37 * Fill in a physical address list. ib_umem_num_dma_blocks() entries will be ··· 92 94 if (WARN_ON(*page_offset_quantized > page_offset_mask)) 93 95 return 0; 94 96 return page_size; 95 - } 96 - 97 - #define WR_ID_BF 0xBF 98 - #define WR_ID_END 0xBAD 99 - #define TEST_WC_NUM_WQES 255 100 - #define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100) 101 - static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id, 102 - bool signaled) 103 - { 104 - struct mlx5_ib_qp *qp = to_mqp(ibqp); 105 - struct mlx5_wqe_ctrl_seg *ctrl; 106 - struct mlx5_bf *bf = &qp->bf; 107 - __be32 mmio_wqe[16] = {}; 108 - unsigned long flags; 109 - unsigned int idx; 110 - 111 - if (unlikely(dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)) 112 - return -EIO; 113 - 114 - spin_lock_irqsave(&qp->sq.lock, flags); 115 - 116 - idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); 117 - ctrl = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx); 118 - 119 - memset(ctrl, 0, sizeof(struct mlx5_wqe_ctrl_seg)); 120 - ctrl->fm_ce_se = signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0; 121 - ctrl->opmod_idx_opcode = 122 - cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | MLX5_OPCODE_NOP); 123 - ctrl->qpn_ds = cpu_to_be32((sizeof(struct mlx5_wqe_ctrl_seg) / 16) | 124 - (qp->trans_qp.base.mqp.qpn << 8)); 125 - 126 - qp->sq.wrid[idx] = wr_id; 127 - qp->sq.w_list[idx].opcode = MLX5_OPCODE_NOP; 128 - qp->sq.wqe_head[idx] = qp->sq.head + 1; 129 - qp->sq.cur_post += DIV_ROUND_UP(sizeof(struct mlx5_wqe_ctrl_seg), 130 - MLX5_SEND_WQE_BB); 131 - qp->sq.w_list[idx].next = qp->sq.cur_post; 132 - qp->sq.head++; 133 - 134 - memcpy(mmio_wqe, ctrl, sizeof(*ctrl)); 135 - ((struct mlx5_wqe_ctrl_seg *)&mmio_wqe)->fm_ce_se |= 136 - MLX5_WQE_CTRL_CQ_UPDATE; 137 - 138 - /* Make sure that descriptors are written before 139 - * updating doorbell record and ringing the doorbell 140 - */ 141 - wmb(); 142 - 143 - qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); 144 - 145 - /* Make sure doorbell record is visible to the HCA before 146 - * we hit doorbell 147 - */ 148 - wmb(); 149 - __iowrite64_copy(bf->bfreg->map + bf->offset, mmio_wqe, 150 - sizeof(mmio_wqe) / 8); 151 - 152 - bf->offset ^= bf->buf_size; 153 - 154 - spin_unlock_irqrestore(&qp->sq.lock, flags); 155 - 156 - return 0; 157 - } 158 - 159 - static int test_wc_poll_cq_result(struct mlx5_ib_dev *dev, struct ib_cq *cq) 160 - { 161 - int ret; 162 - struct ib_wc wc = {}; 163 - unsigned long end = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES; 164 - 165 - do { 166 - ret = ib_poll_cq(cq, 1, &wc); 167 - if (ret < 0 || wc.status) 168 - return ret < 0 ? ret : -EINVAL; 169 - if (ret) 170 - break; 171 - } while (!time_after(jiffies, end)); 172 - 173 - if (!ret) 174 - return -ETIMEDOUT; 175 - 176 - if (wc.wr_id != WR_ID_BF) 177 - ret = 0; 178 - 179 - return ret; 180 - } 181 - 182 - static int test_wc_do_send(struct mlx5_ib_dev *dev, struct ib_qp *qp) 183 - { 184 - int err, i; 185 - 186 - for (i = 0; i < TEST_WC_NUM_WQES; i++) { 187 - err = post_send_nop(dev, qp, WR_ID_BF, false); 188 - if (err) 189 - return err; 190 - } 191 - 192 - return post_send_nop(dev, qp, WR_ID_END, true); 193 - } 194 - 195 - int mlx5_ib_test_wc(struct mlx5_ib_dev *dev) 196 - { 197 - struct ib_cq_init_attr cq_attr = { .cqe = TEST_WC_NUM_WQES + 1 }; 198 - int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type); 199 - struct ib_qp_init_attr qp_init_attr = { 200 - .cap = { .max_send_wr = TEST_WC_NUM_WQES }, 201 - .qp_type = IB_QPT_UD, 202 - .sq_sig_type = IB_SIGNAL_REQ_WR, 203 - .create_flags = MLX5_IB_QP_CREATE_WC_TEST, 204 - }; 205 - struct ib_qp_attr qp_attr = { .port_num = 1 }; 206 - struct ib_device *ibdev = &dev->ib_dev; 207 - struct ib_qp *qp; 208 - struct ib_cq *cq; 209 - struct ib_pd *pd; 210 - int ret; 211 - 212 - if (!MLX5_CAP_GEN(dev->mdev, bf)) 213 - return 0; 214 - 215 - if (!dev->mdev->roce.roce_en && 216 - port_type_cap == MLX5_CAP_PORT_TYPE_ETH) { 217 - if (mlx5_core_is_pf(dev->mdev)) 218 - dev->wc_support = arch_can_pci_mmap_wc(); 219 - return 0; 220 - } 221 - 222 - ret = mlx5_alloc_bfreg(dev->mdev, &dev->wc_bfreg, true, false); 223 - if (ret) 224 - goto print_err; 225 - 226 - if (!dev->wc_bfreg.wc) 227 - goto out1; 228 - 229 - pd = ib_alloc_pd(ibdev, 0); 230 - if (IS_ERR(pd)) { 231 - ret = PTR_ERR(pd); 232 - goto out1; 233 - } 234 - 235 - cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr); 236 - if (IS_ERR(cq)) { 237 - ret = PTR_ERR(cq); 238 - goto out2; 239 - } 240 - 241 - qp_init_attr.recv_cq = cq; 242 - qp_init_attr.send_cq = cq; 243 - qp = ib_create_qp(pd, &qp_init_attr); 244 - if (IS_ERR(qp)) { 245 - ret = PTR_ERR(qp); 246 - goto out3; 247 - } 248 - 249 - qp_attr.qp_state = IB_QPS_INIT; 250 - ret = ib_modify_qp(qp, &qp_attr, 251 - IB_QP_STATE | IB_QP_PORT | IB_QP_PKEY_INDEX | 252 - IB_QP_QKEY); 253 - if (ret) 254 - goto out4; 255 - 256 - qp_attr.qp_state = IB_QPS_RTR; 257 - ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 258 - if (ret) 259 - goto out4; 260 - 261 - qp_attr.qp_state = IB_QPS_RTS; 262 - ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); 263 - if (ret) 264 - goto out4; 265 - 266 - ret = test_wc_do_send(dev, qp); 267 - if (ret < 0) 268 - goto out4; 269 - 270 - ret = test_wc_poll_cq_result(dev, cq); 271 - if (ret > 0) { 272 - dev->wc_support = true; 273 - ret = 0; 274 - } 275 - 276 - out4: 277 - ib_destroy_qp(qp); 278 - out3: 279 - ib_destroy_cq(cq); 280 - out2: 281 - ib_dealloc_pd(pd); 282 - out1: 283 - mlx5_free_bfreg(dev->mdev, &dev->wc_bfreg); 284 - print_err: 285 - if (ret) 286 - mlx5_ib_err( 287 - dev, 288 - "Error %d while trying to test write-combining support\n", 289 - ret); 290 - return ret; 291 97 }
-3
drivers/infiniband/hw/mlx5/mlx5_ib.h
··· 341 341 * rely on the range reserved for that use in the ib_qp_create_flags enum. 342 342 */ 343 343 #define MLX5_IB_QP_CREATE_SQPN_QP1 IB_QP_CREATE_RESERVED_START 344 - #define MLX5_IB_QP_CREATE_WC_TEST (IB_QP_CREATE_RESERVED_START << 1) 345 344 346 345 struct wr_list { 347 346 u16 opcode; ··· 1122 1123 u8 ib_active:1; 1123 1124 u8 is_rep:1; 1124 1125 u8 lag_active:1; 1125 - u8 wc_support:1; 1126 1126 u8 fill_delay; 1127 1127 struct umr_common umrc; 1128 1128 /* sync used page count stats ··· 1147 1149 /* Array with num_ports elements */ 1148 1150 struct mlx5_ib_port *port; 1149 1151 struct mlx5_sq_bfreg bfreg; 1150 - struct mlx5_sq_bfreg wc_bfreg; 1151 1152 struct mlx5_sq_bfreg fp_bfreg; 1152 1153 struct mlx5_ib_delay_drop delay_drop; 1153 1154 const struct mlx5_ib_profile *profile;
-16
drivers/infiniband/hw/mlx5/qp.c
··· 1107 1107 1108 1108 if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) 1109 1109 qp->bf.bfreg = &dev->fp_bfreg; 1110 - else if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST) 1111 - qp->bf.bfreg = &dev->wc_bfreg; 1112 1110 else 1113 1111 qp->bf.bfreg = &dev->bfreg; 1114 1112 ··· 2957 2959 return; 2958 2960 } 2959 2961 2960 - if (flag == MLX5_IB_QP_CREATE_WC_TEST) { 2961 - /* 2962 - * Special case, if condition didn't meet, it won't be error, 2963 - * just different in-kernel flow. 2964 - */ 2965 - *flags &= ~MLX5_IB_QP_CREATE_WC_TEST; 2966 - return; 2967 - } 2968 2962 mlx5_ib_dbg(dev, "Verbs create QP flag 0x%X is not supported\n", flag); 2969 2963 } 2970 2964 ··· 3017 3027 IB_QP_CREATE_PCI_WRITE_END_PADDING, 3018 3028 MLX5_CAP_GEN(mdev, end_pad), qp); 3019 3029 3020 - process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_WC_TEST, 3021 - qp_type != MLX5_IB_QPT_REG_UMR, qp); 3022 3030 process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1, 3023 3031 true, qp); 3024 3032 ··· 4595 4607 return true; 4596 4608 4597 4609 if (qp->type == IB_QPT_RAW_PACKET || qp->type == MLX5_IB_QPT_REG_UMR) 4598 - return true; 4599 - 4600 - /* Internal QP used for wc testing, with NOPs in wq */ 4601 - if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST) 4602 4610 return true; 4603 4611 4604 4612 return false;
+1 -1
drivers/net/ethernet/mellanox/mlx5/core/Makefile
··· 17 17 fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \ 18 18 lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ 19 19 diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \ 20 - fw_reset.o qos.o lib/tout.o lib/aso.o 20 + fw_reset.o qos.o lib/tout.o lib/aso.o wc.o 21 21 22 22 # 23 23 # Netdev basic
+2
drivers/net/ethernet/mellanox/mlx5/core/main.c
··· 1819 1819 mutex_init(&dev->intf_state_mutex); 1820 1820 lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key); 1821 1821 mutex_init(&dev->mlx5e_res.uplink_netdev_lock); 1822 + mutex_init(&dev->wc_state_lock); 1822 1823 1823 1824 mutex_init(&priv->bfregs.reg_head.lock); 1824 1825 mutex_init(&priv->bfregs.wc_head.lock); ··· 1917 1916 mutex_destroy(&priv->alloc_mutex); 1918 1917 mutex_destroy(&priv->bfregs.wc_head.lock); 1919 1918 mutex_destroy(&priv->bfregs.reg_head.lock); 1919 + mutex_destroy(&dev->wc_state_lock); 1920 1920 mutex_destroy(&dev->mlx5e_res.uplink_netdev_lock); 1921 1921 mutex_destroy(&dev->intf_state_mutex); 1922 1922 lockdep_unregister_key(&dev->lock_key);
+434
drivers/net/ethernet/mellanox/mlx5/core/wc.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 + // Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 + 4 + #include <linux/io.h> 5 + #include <linux/mlx5/transobj.h> 6 + #include "lib/clock.h" 7 + #include "mlx5_core.h" 8 + #include "wq.h" 9 + 10 + #define TEST_WC_NUM_WQES 255 11 + #define TEST_WC_LOG_CQ_SZ (order_base_2(TEST_WC_NUM_WQES)) 12 + #define TEST_WC_SQ_LOG_WQ_SZ TEST_WC_LOG_CQ_SZ 13 + #define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100) 14 + 15 + struct mlx5_wc_cq { 16 + /* data path - accessed per cqe */ 17 + struct mlx5_cqwq wq; 18 + 19 + /* data path - accessed per napi poll */ 20 + struct mlx5_core_cq mcq; 21 + 22 + /* control */ 23 + struct mlx5_core_dev *mdev; 24 + struct mlx5_wq_ctrl wq_ctrl; 25 + }; 26 + 27 + struct mlx5_wc_sq { 28 + /* data path */ 29 + u16 cc; 30 + u16 pc; 31 + 32 + /* read only */ 33 + struct mlx5_wq_cyc wq; 34 + u32 sqn; 35 + 36 + /* control path */ 37 + struct mlx5_wq_ctrl wq_ctrl; 38 + 39 + struct mlx5_wc_cq cq; 40 + struct mlx5_sq_bfreg bfreg; 41 + }; 42 + 43 + static int mlx5_wc_create_cqwq(struct mlx5_core_dev *mdev, void *cqc, 44 + struct mlx5_wc_cq *cq) 45 + { 46 + struct mlx5_core_cq *mcq = &cq->mcq; 47 + struct mlx5_wq_param param = {}; 48 + int err; 49 + u32 i; 50 + 51 + err = mlx5_cqwq_create(mdev, &param, cqc, &cq->wq, &cq->wq_ctrl); 52 + if (err) 53 + return err; 54 + 55 + mcq->cqe_sz = 64; 56 + mcq->set_ci_db = cq->wq_ctrl.db.db; 57 + mcq->arm_db = cq->wq_ctrl.db.db + 1; 58 + 59 + for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { 60 + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); 61 + 62 + cqe->op_own = 0xf1; 63 + } 64 + 65 + cq->mdev = mdev; 66 + 67 + return 0; 68 + } 69 + 70 + static int create_wc_cq(struct mlx5_wc_cq *cq, void *cqc_data) 71 + { 72 + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; 73 + struct mlx5_core_dev *mdev = cq->mdev; 74 + struct mlx5_core_cq *mcq = &cq->mcq; 75 + int err, inlen, eqn; 76 + void *in, *cqc; 77 + 78 + err = mlx5_comp_eqn_get(mdev, 0, &eqn); 79 + if (err) 80 + return err; 81 + 82 + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 83 + sizeof(u64) * cq->wq_ctrl.buf.npages; 84 + in = kvzalloc(inlen, GFP_KERNEL); 85 + if (!in) 86 + return -ENOMEM; 87 + 88 + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 89 + 90 + memcpy(cqc, cqc_data, MLX5_ST_SZ_BYTES(cqc)); 91 + 92 + mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, 93 + (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); 94 + 95 + MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); 96 + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); 97 + MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); 98 + MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - 99 + MLX5_ADAPTER_PAGE_SHIFT); 100 + MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); 101 + 102 + err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out)); 103 + 104 + kvfree(in); 105 + 106 + return err; 107 + } 108 + 109 + static int mlx5_wc_create_cq(struct mlx5_core_dev *mdev, struct mlx5_wc_cq *cq) 110 + { 111 + void *cqc; 112 + int err; 113 + 114 + cqc = kvzalloc(MLX5_ST_SZ_BYTES(cqc), GFP_KERNEL); 115 + if (!cqc) 116 + return -ENOMEM; 117 + 118 + MLX5_SET(cqc, cqc, log_cq_size, TEST_WC_LOG_CQ_SZ); 119 + MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); 120 + if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128) 121 + MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD); 122 + 123 + err = mlx5_wc_create_cqwq(mdev, cqc, cq); 124 + if (err) { 125 + mlx5_core_err(mdev, "Failed to create wc cq wq, err=%d\n", err); 126 + goto err_create_cqwq; 127 + } 128 + 129 + err = create_wc_cq(cq, cqc); 130 + if (err) { 131 + mlx5_core_err(mdev, "Failed to create wc cq, err=%d\n", err); 132 + goto err_create_cq; 133 + } 134 + 135 + kvfree(cqc); 136 + return 0; 137 + 138 + err_create_cq: 139 + mlx5_wq_destroy(&cq->wq_ctrl); 140 + err_create_cqwq: 141 + kvfree(cqc); 142 + return err; 143 + } 144 + 145 + static void mlx5_wc_destroy_cq(struct mlx5_wc_cq *cq) 146 + { 147 + mlx5_core_destroy_cq(cq->mdev, &cq->mcq); 148 + mlx5_wq_destroy(&cq->wq_ctrl); 149 + } 150 + 151 + static int create_wc_sq(struct mlx5_core_dev *mdev, void *sqc_data, 152 + struct mlx5_wc_sq *sq) 153 + { 154 + void *in, *sqc, *wq; 155 + int inlen, err; 156 + u8 ts_format; 157 + 158 + inlen = MLX5_ST_SZ_BYTES(create_sq_in) + 159 + sizeof(u64) * sq->wq_ctrl.buf.npages; 160 + in = kvzalloc(inlen, GFP_KERNEL); 161 + if (!in) 162 + return -ENOMEM; 163 + 164 + sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); 165 + wq = MLX5_ADDR_OF(sqc, sqc, wq); 166 + 167 + memcpy(sqc, sqc_data, MLX5_ST_SZ_BYTES(sqc)); 168 + MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); 169 + 170 + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); 171 + MLX5_SET(sqc, sqc, flush_in_error_en, 1); 172 + 173 + ts_format = mlx5_is_real_time_sq(mdev) ? 174 + MLX5_TIMESTAMP_FORMAT_REAL_TIME : 175 + MLX5_TIMESTAMP_FORMAT_FREE_RUNNING; 176 + MLX5_SET(sqc, sqc, ts_format, ts_format); 177 + 178 + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); 179 + MLX5_SET(wq, wq, uar_page, sq->bfreg.index); 180 + MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - 181 + MLX5_ADAPTER_PAGE_SHIFT); 182 + MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); 183 + 184 + mlx5_fill_page_frag_array(&sq->wq_ctrl.buf, 185 + (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); 186 + 187 + err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn); 188 + if (err) { 189 + mlx5_core_err(mdev, "Failed to create wc sq, err=%d\n", err); 190 + goto err_create_sq; 191 + } 192 + 193 + memset(in, 0, MLX5_ST_SZ_BYTES(modify_sq_in)); 194 + MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RST); 195 + sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); 196 + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY); 197 + 198 + err = mlx5_core_modify_sq(mdev, sq->sqn, in); 199 + if (err) { 200 + mlx5_core_err(mdev, "Failed to set wc sq(sqn=0x%x) ready, err=%d\n", 201 + sq->sqn, err); 202 + goto err_modify_sq; 203 + } 204 + 205 + kvfree(in); 206 + return 0; 207 + 208 + err_modify_sq: 209 + mlx5_core_destroy_sq(mdev, sq->sqn); 210 + err_create_sq: 211 + kvfree(in); 212 + return err; 213 + } 214 + 215 + static int mlx5_wc_create_sq(struct mlx5_core_dev *mdev, struct mlx5_wc_sq *sq) 216 + { 217 + struct mlx5_wq_param param = {}; 218 + void *sqc_data, *wq; 219 + int err; 220 + 221 + sqc_data = kvzalloc(MLX5_ST_SZ_BYTES(sqc), GFP_KERNEL); 222 + if (!sqc_data) 223 + return -ENOMEM; 224 + 225 + wq = MLX5_ADDR_OF(sqc, sqc_data, wq); 226 + MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); 227 + MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn); 228 + MLX5_SET(wq, wq, log_wq_sz, TEST_WC_SQ_LOG_WQ_SZ); 229 + 230 + err = mlx5_wq_cyc_create(mdev, &param, wq, &sq->wq, &sq->wq_ctrl); 231 + if (err) { 232 + mlx5_core_err(mdev, "Failed to create wc sq wq, err=%d\n", err); 233 + goto err_create_wq_cyc; 234 + } 235 + 236 + err = create_wc_sq(mdev, sqc_data, sq); 237 + if (err) 238 + goto err_create_sq; 239 + 240 + mlx5_core_dbg(mdev, "wc sq->sqn = 0x%x created\n", sq->sqn); 241 + 242 + kvfree(sqc_data); 243 + return 0; 244 + 245 + err_create_sq: 246 + mlx5_wq_destroy(&sq->wq_ctrl); 247 + err_create_wq_cyc: 248 + kvfree(sqc_data); 249 + return err; 250 + } 251 + 252 + static void mlx5_wc_destroy_sq(struct mlx5_wc_sq *sq) 253 + { 254 + mlx5_core_destroy_sq(sq->cq.mdev, sq->sqn); 255 + mlx5_wq_destroy(&sq->wq_ctrl); 256 + } 257 + 258 + static void mlx5_wc_post_nop(struct mlx5_wc_sq *sq, bool signaled) 259 + { 260 + int buf_size = (1 << MLX5_CAP_GEN(sq->cq.mdev, log_bf_reg_size)) / 2; 261 + struct mlx5_wqe_ctrl_seg *ctrl; 262 + __be32 mmio_wqe[16] = {}; 263 + u16 pi; 264 + 265 + pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); 266 + ctrl = mlx5_wq_cyc_get_wqe(&sq->wq, pi); 267 + memset(ctrl, 0, sizeof(*ctrl)); 268 + ctrl->opmod_idx_opcode = 269 + cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_NOP); 270 + ctrl->qpn_ds = 271 + cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | 272 + DIV_ROUND_UP(sizeof(struct mlx5_wqe_ctrl_seg), MLX5_SEND_WQE_DS)); 273 + if (signaled) 274 + ctrl->fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE; 275 + 276 + memcpy(mmio_wqe, ctrl, sizeof(*ctrl)); 277 + ((struct mlx5_wqe_ctrl_seg *)&mmio_wqe)->fm_ce_se |= 278 + MLX5_WQE_CTRL_CQ_UPDATE; 279 + 280 + /* ensure wqe is visible to device before updating doorbell record */ 281 + dma_wmb(); 282 + 283 + sq->pc++; 284 + sq->wq.db[MLX5_SND_DBR] = cpu_to_be32(sq->pc); 285 + 286 + /* ensure doorbell record is visible to device before ringing the 287 + * doorbell 288 + */ 289 + wmb(); 290 + 291 + __iowrite64_copy(sq->bfreg.map + sq->bfreg.offset, mmio_wqe, 292 + sizeof(mmio_wqe) / 8); 293 + 294 + sq->bfreg.offset ^= buf_size; 295 + } 296 + 297 + static int mlx5_wc_poll_cq(struct mlx5_wc_sq *sq) 298 + { 299 + struct mlx5_wc_cq *cq = &sq->cq; 300 + struct mlx5_cqe64 *cqe; 301 + 302 + cqe = mlx5_cqwq_get_cqe(&cq->wq); 303 + if (!cqe) 304 + return -ETIMEDOUT; 305 + 306 + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), 307 + * otherwise a cq overrun may occur 308 + */ 309 + mlx5_cqwq_pop(&cq->wq); 310 + 311 + if (get_cqe_opcode(cqe) == MLX5_CQE_REQ) { 312 + int wqe_counter = be16_to_cpu(cqe->wqe_counter); 313 + struct mlx5_core_dev *mdev = cq->mdev; 314 + 315 + if (wqe_counter == TEST_WC_NUM_WQES - 1) 316 + mdev->wc_state = MLX5_WC_STATE_UNSUPPORTED; 317 + else 318 + mdev->wc_state = MLX5_WC_STATE_SUPPORTED; 319 + 320 + mlx5_core_dbg(mdev, "wc wqe_counter = 0x%x\n", wqe_counter); 321 + } 322 + 323 + mlx5_cqwq_update_db_record(&cq->wq); 324 + 325 + /* ensure cq space is freed before enabling more cqes */ 326 + wmb(); 327 + 328 + sq->cc++; 329 + 330 + return 0; 331 + } 332 + 333 + static void mlx5_core_test_wc(struct mlx5_core_dev *mdev) 334 + { 335 + unsigned long expires; 336 + struct mlx5_wc_sq *sq; 337 + int i, err; 338 + 339 + if (mdev->wc_state != MLX5_WC_STATE_UNINITIALIZED) 340 + return; 341 + 342 + sq = kzalloc(sizeof(*sq), GFP_KERNEL); 343 + if (!sq) 344 + return; 345 + 346 + err = mlx5_alloc_bfreg(mdev, &sq->bfreg, true, false); 347 + if (err) { 348 + mlx5_core_err(mdev, "Failed to alloc bfreg for wc, err=%d\n", err); 349 + goto err_alloc_bfreg; 350 + } 351 + 352 + err = mlx5_wc_create_cq(mdev, &sq->cq); 353 + if (err) 354 + goto err_create_cq; 355 + 356 + err = mlx5_wc_create_sq(mdev, sq); 357 + if (err) 358 + goto err_create_sq; 359 + 360 + for (i = 0; i < TEST_WC_NUM_WQES - 1; i++) 361 + mlx5_wc_post_nop(sq, false); 362 + 363 + mlx5_wc_post_nop(sq, true); 364 + 365 + expires = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES; 366 + do { 367 + err = mlx5_wc_poll_cq(sq); 368 + if (err) 369 + usleep_range(2, 10); 370 + } while (mdev->wc_state == MLX5_WC_STATE_UNINITIALIZED && 371 + time_is_after_jiffies(expires)); 372 + 373 + mlx5_wc_destroy_sq(sq); 374 + 375 + err_create_sq: 376 + mlx5_wc_destroy_cq(&sq->cq); 377 + err_create_cq: 378 + mlx5_free_bfreg(mdev, &sq->bfreg); 379 + err_alloc_bfreg: 380 + kfree(sq); 381 + } 382 + 383 + bool mlx5_wc_support_get(struct mlx5_core_dev *mdev) 384 + { 385 + struct mlx5_core_dev *parent = NULL; 386 + 387 + if (!MLX5_CAP_GEN(mdev, bf)) { 388 + mlx5_core_dbg(mdev, "BlueFlame not supported\n"); 389 + goto out; 390 + } 391 + 392 + if (!MLX5_CAP_GEN(mdev, log_max_sq)) { 393 + mlx5_core_dbg(mdev, "SQ not supported\n"); 394 + goto out; 395 + } 396 + 397 + if (mdev->wc_state != MLX5_WC_STATE_UNINITIALIZED) 398 + /* No need to lock anything as we perform WC test only 399 + * once for whole device and was already done. 400 + */ 401 + goto out; 402 + 403 + mutex_lock(&mdev->wc_state_lock); 404 + 405 + if (mdev->wc_state != MLX5_WC_STATE_UNINITIALIZED) 406 + goto unlock; 407 + 408 + #ifdef CONFIG_MLX5_SF 409 + if (mlx5_core_is_sf(mdev)) 410 + parent = mdev->priv.parent_mdev; 411 + #endif 412 + 413 + if (parent) { 414 + mutex_lock(&parent->wc_state_lock); 415 + 416 + mlx5_core_test_wc(parent); 417 + 418 + mlx5_core_dbg(mdev, "parent set wc_state=%d\n", 419 + parent->wc_state); 420 + mdev->wc_state = parent->wc_state; 421 + 422 + mutex_unlock(&parent->wc_state_lock); 423 + } 424 + 425 + mlx5_core_test_wc(mdev); 426 + 427 + unlock: 428 + mutex_unlock(&mdev->wc_state_lock); 429 + out: 430 + mlx5_core_dbg(mdev, "wc_state=%d\n", mdev->wc_state); 431 + 432 + return mdev->wc_state == MLX5_WC_STATE_SUPPORTED; 433 + } 434 + EXPORT_SYMBOL(mlx5_wc_support_get);
+11
include/linux/mlx5/driver.h
··· 766 766 u32 max[MLX5_UN_SZ_DW(hca_cap_union)]; 767 767 }; 768 768 769 + enum mlx5_wc_state { 770 + MLX5_WC_STATE_UNINITIALIZED, 771 + MLX5_WC_STATE_UNSUPPORTED, 772 + MLX5_WC_STATE_SUPPORTED, 773 + }; 774 + 769 775 struct mlx5_core_dev { 770 776 struct device *device; 771 777 enum mlx5_coredev_type coredev_type; ··· 830 824 #endif 831 825 u64 num_ipsec_offloads; 832 826 struct mlx5_sd *sd; 827 + enum mlx5_wc_state wc_state; 828 + /* sync write combining state */ 829 + struct mutex wc_state_lock; 833 830 }; 834 831 835 832 struct mlx5_db { ··· 1384 1375 enum { 1385 1376 MLX5_OCTWORD = 16, 1386 1377 }; 1378 + 1379 + bool mlx5_wc_support_get(struct mlx5_core_dev *mdev); 1387 1380 #endif /* MLX5_DRIVER_H */