Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
"This has another new RDMA driver 'bng_en' for latest generation
Broadcom NICs. There might be one more new driver still to come.

Otherwise it is a fairly quite cycle. Summary:

- Minor driver bug fixes and updates to cxgb4, rxe, rdmavt, bnxt_re,
mlx5

- Many bug fix patches for irdma

- WQ_PERCPU annotations and system_dfl_wq changes

- Improved mlx5 support for "other eswitches" and multiple PFs

- 1600Gbps link speed reporting support. Four Digits Now!

- New driver bng_en for latest generation Broadcom NICs

- Bonding support for hns

- Adjust mlx5's hmm based ODP to work with the very large address
space created by the new 5 level paging default on x86

- Lockdep fixups in rxe and siw"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (65 commits)
RDMA/rxe: reclassify sockets in order to avoid false positives from lockdep
RDMA/siw: reclassify sockets in order to avoid false positives from lockdep
RDMA/bng_re: Remove prefetch instruction
RDMA/core: Reduce cond_resched() frequency in __ib_umem_release
RDMA/irdma: Fix SRQ shadow area address initialization
RDMA/irdma: Remove doorbell elision logic
RDMA/irdma: Do not set IBK_LOCAL_DMA_LKEY for GEN3+
RDMA/irdma: Do not directly rely on IB_PD_UNSAFE_GLOBAL_RKEY
RDMA/irdma: Add missing mutex destroy
RDMA/irdma: Fix SIGBUS in AEQ destroy
RDMA/irdma: Add a missing kfree of struct irdma_pci_f for GEN2
RDMA/irdma: Fix data race in irdma_free_pble
RDMA/irdma: Fix data race in irdma_sc_ccq_arm
RDMA/mlx5: Add support for 1600_8x lane speed
RDMA/core: Add new IB rate for XDR (8x) support
IB/mlx5: Reduce IMR KSM size when 5-level paging is enabled
RDMA/bnxt_re: Pass correct flag for dma mr creation
RDMA/bnxt_re: Fix the inline size for GenP7 devices
RDMA/hns: Support reset recovery for bond
RDMA/hns: Support link state reporting for bond
...

+5066 -473
+7
MAINTAINERS
··· 5243 5243 F: drivers/infiniband/hw/bnxt_re/ 5244 5244 F: include/uapi/rdma/bnxt_re-abi.h 5245 5245 5246 + BROADCOM 800 GIGABIT ROCE DRIVER 5247 + M: Siva Reddy Kallam <siva.kallam@broadcom.com> 5248 + L: linux-rdma@vger.kernel.org 5249 + S: Supported 5250 + W: http://www.broadcom.com 5251 + F: drivers/infiniband/hw/bng_re/ 5252 + 5246 5253 BROADCOM NVRAM DRIVER 5247 5254 M: Rafał Miłecki <zajec5@gmail.com> 5248 5255 L: linux-mips@vger.kernel.org
+1
drivers/infiniband/Kconfig
··· 80 80 if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS 81 81 if !UML 82 82 source "drivers/infiniband/hw/bnxt_re/Kconfig" 83 + source "drivers/infiniband/hw/bng_re/Kconfig" 83 84 source "drivers/infiniband/hw/cxgb4/Kconfig" 84 85 source "drivers/infiniband/hw/efa/Kconfig" 85 86 source "drivers/infiniband/hw/erdma/Kconfig"
+4 -5
drivers/infiniband/core/cm.c
··· 34 34 MODULE_DESCRIPTION("InfiniBand CM"); 35 35 MODULE_LICENSE("Dual BSD/GPL"); 36 36 37 - #define CM_DESTROY_ID_WAIT_TIMEOUT 10000 /* msecs */ 38 37 #define CM_DIRECT_RETRY_CTX ((void *) 1UL) 39 38 #define CM_MRA_SETTING 24 /* 4.096us * 2^24 = ~68.7 seconds */ 40 39 ··· 1056 1057 { 1057 1058 struct cm_id_private *cm_id_priv; 1058 1059 enum ib_cm_state old_state; 1060 + unsigned long timeout; 1059 1061 struct cm_work *work; 1060 1062 int ret; 1061 1063 ··· 1167 1167 1168 1168 xa_erase(&cm.local_id_table, cm_local_id(cm_id->local_id)); 1169 1169 cm_deref_id(cm_id_priv); 1170 + timeout = msecs_to_jiffies((cm_id_priv->max_cm_retries * cm_id_priv->timeout_ms * 5) / 4); 1170 1171 do { 1171 - ret = wait_for_completion_timeout(&cm_id_priv->comp, 1172 - msecs_to_jiffies( 1173 - CM_DESTROY_ID_WAIT_TIMEOUT)); 1172 + ret = wait_for_completion_timeout(&cm_id_priv->comp, timeout); 1174 1173 if (!ret) /* timeout happened */ 1175 1174 cm_destroy_id_wait_timeout(cm_id, old_state); 1176 1175 } while (!ret); ··· 4517 4518 get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand); 4518 4519 INIT_LIST_HEAD(&cm.timewait_list); 4519 4520 4520 - cm.wq = alloc_workqueue("ib_cm", 0, 1); 4521 + cm.wq = alloc_workqueue("ib_cm", WQ_PERCPU, 1); 4521 4522 if (!cm.wq) { 4522 4523 ret = -ENOMEM; 4523 4524 goto error2;
+2
drivers/infiniband/core/cma.c
··· 4475 4475 container_of(id, struct rdma_id_private, id); 4476 4476 int ret; 4477 4477 4478 + lockdep_assert_held(&id_priv->handler_mutex); 4479 + 4478 4480 if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) 4479 4481 return -EINVAL; 4480 4482
+2 -2
drivers/infiniband/core/device.c
··· 3021 3021 { 3022 3022 int ret = -ENOMEM; 3023 3023 3024 - ib_wq = alloc_workqueue("infiniband", 0, 0); 3024 + ib_wq = alloc_workqueue("infiniband", WQ_PERCPU, 0); 3025 3025 if (!ib_wq) 3026 3026 return -ENOMEM; 3027 3027 ··· 3031 3031 goto err; 3032 3032 3033 3033 ib_comp_wq = alloc_workqueue("ib-comp-wq", 3034 - WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0); 3034 + WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS | WQ_PERCPU, 0); 3035 3035 if (!ib_comp_wq) 3036 3036 goto err_unbound; 3037 3037
+2 -2
drivers/infiniband/core/restrack.c
··· 175 175 EXPORT_SYMBOL(rdma_restrack_new); 176 176 177 177 /** 178 - * rdma_restrack_add() - add object to the reource tracking database 178 + * rdma_restrack_add() - add object to the resource tracking database 179 179 * @res: resource entry 180 180 */ 181 181 void rdma_restrack_add(struct rdma_restrack_entry *res) ··· 277 277 EXPORT_SYMBOL(rdma_restrack_put); 278 278 279 279 /** 280 - * rdma_restrack_del() - delete object from the reource tracking database 280 + * rdma_restrack_del() - delete object from the resource tracking database 281 281 * @res: resource entry 282 282 */ 283 283 void rdma_restrack_del(struct rdma_restrack_entry *res)
+1 -1
drivers/infiniband/core/ucma.c
··· 366 366 if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { 367 367 xa_lock(&ctx_table); 368 368 if (xa_load(&ctx_table, ctx->id) == ctx) 369 - queue_work(system_unbound_wq, &ctx->close_work); 369 + queue_work(system_dfl_wq, &ctx->close_work); 370 370 xa_unlock(&ctx_table); 371 371 } 372 372 return 0;
+7 -1
drivers/infiniband/core/umem.c
··· 45 45 46 46 #include "uverbs.h" 47 47 48 + #define RESCHED_LOOP_CNT_THRESHOLD 0x1000 49 + 48 50 static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) 49 51 { 50 52 bool make_dirty = umem->writable && dirty; ··· 57 55 ib_dma_unmap_sgtable_attrs(dev, &umem->sgt_append.sgt, 58 56 DMA_BIDIRECTIONAL, 0); 59 57 60 - for_each_sgtable_sg(&umem->sgt_append.sgt, sg, i) 58 + for_each_sgtable_sg(&umem->sgt_append.sgt, sg, i) { 61 59 unpin_user_page_range_dirty_lock(sg_page(sg), 62 60 DIV_ROUND_UP(sg->length, PAGE_SIZE), make_dirty); 61 + 62 + if (i && !(i % RESCHED_LOOP_CNT_THRESHOLD)) 63 + cond_resched(); 64 + } 63 65 64 66 sg_free_append_table(&umem->sgt_append); 65 67 }
+3
drivers/infiniband/core/verbs.c
··· 148 148 case IB_RATE_400_GBPS: return 160; 149 149 case IB_RATE_600_GBPS: return 240; 150 150 case IB_RATE_800_GBPS: return 320; 151 + case IB_RATE_1600_GBPS: return 640; 151 152 default: return -1; 152 153 } 153 154 } ··· 179 178 case 160: return IB_RATE_400_GBPS; 180 179 case 240: return IB_RATE_600_GBPS; 181 180 case 320: return IB_RATE_800_GBPS; 181 + case 640: return IB_RATE_1600_GBPS; 182 182 default: return IB_RATE_PORT_CURRENT; 183 183 } 184 184 } ··· 210 208 case IB_RATE_400_GBPS: return 425000; 211 209 case IB_RATE_600_GBPS: return 637500; 212 210 case IB_RATE_800_GBPS: return 850000; 211 + case IB_RATE_1600_GBPS: return 1700000; 213 212 default: return -1; 214 213 } 215 214 }
+1
drivers/infiniband/hw/Makefile
··· 13 13 obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns/ 14 14 obj-$(CONFIG_INFINIBAND_QEDR) += qedr/ 15 15 obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re/ 16 + obj-$(CONFIG_INFINIBAND_BNG_RE) += bng_re/ 16 17 obj-$(CONFIG_INFINIBAND_ERDMA) += erdma/ 17 18 obj-$(CONFIG_INFINIBAND_IONIC) += ionic/
+10
drivers/infiniband/hw/bng_re/Kconfig
··· 1 + # SPDX-License-Identifier: GPL-2.0-only 2 + config INFINIBAND_BNG_RE 3 + tristate "Broadcom Next generation RoCE HCA support" 4 + depends on 64BIT 5 + depends on INET && DCB && BNGE 6 + help 7 + This driver supports Broadcom Next generation 8 + 50/100/200/400/800 gigabit RoCE HCAs. The module 9 + will be called bng_re. To compile this driver 10 + as a module, choose M here.
+8
drivers/infiniband/hw/bng_re/Makefile
··· 1 + # SPDX-License-Identifier: GPL-2.0 2 + ccflags-y := -I $(srctree)/drivers/net/ethernet/broadcom/bnge -I $(srctree)/drivers/infiniband/hw/bnxt_re 3 + 4 + obj-$(CONFIG_INFINIBAND_BNG_RE) += bng_re.o 5 + 6 + bng_re-y := bng_dev.o bng_fw.o \ 7 + bng_res.o bng_sp.o \ 8 + bng_debugfs.o
+39
drivers/infiniband/hw/bng_re/bng_debugfs.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (c) 2025 Broadcom. 3 + #include <linux/debugfs.h> 4 + #include <linux/pci.h> 5 + 6 + #include <rdma/ib_verbs.h> 7 + 8 + #include "bng_res.h" 9 + #include "bng_fw.h" 10 + #include "bnge.h" 11 + #include "bnge_auxr.h" 12 + #include "bng_re.h" 13 + #include "bng_debugfs.h" 14 + 15 + static struct dentry *bng_re_debugfs_root; 16 + 17 + void bng_re_debugfs_add_pdev(struct bng_re_dev *rdev) 18 + { 19 + struct pci_dev *pdev = rdev->aux_dev->pdev; 20 + 21 + rdev->dbg_root = 22 + debugfs_create_dir(dev_name(&pdev->dev), bng_re_debugfs_root); 23 + } 24 + 25 + void bng_re_debugfs_rem_pdev(struct bng_re_dev *rdev) 26 + { 27 + debugfs_remove_recursive(rdev->dbg_root); 28 + rdev->dbg_root = NULL; 29 + } 30 + 31 + void bng_re_register_debugfs(void) 32 + { 33 + bng_re_debugfs_root = debugfs_create_dir("bng_re", NULL); 34 + } 35 + 36 + void bng_re_unregister_debugfs(void) 37 + { 38 + debugfs_remove(bng_re_debugfs_root); 39 + }
+12
drivers/infiniband/hw/bng_re/bng_debugfs.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + // Copyright (c) 2025 Broadcom. 3 + 4 + #ifndef __BNG_RE_DEBUGFS__ 5 + #define __BNG_RE_DEBUGFS__ 6 + 7 + void bng_re_debugfs_add_pdev(struct bng_re_dev *rdev); 8 + void bng_re_debugfs_rem_pdev(struct bng_re_dev *rdev); 9 + 10 + void bng_re_register_debugfs(void); 11 + void bng_re_unregister_debugfs(void); 12 + #endif
+534
drivers/infiniband/hw/bng_re/bng_dev.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (c) 2025 Broadcom. 3 + 4 + #include <linux/module.h> 5 + #include <linux/pci.h> 6 + #include <linux/auxiliary_bus.h> 7 + 8 + #include <rdma/ib_verbs.h> 9 + 10 + #include "bng_res.h" 11 + #include "bng_sp.h" 12 + #include "bng_fw.h" 13 + #include "bnge.h" 14 + #include "bnge_auxr.h" 15 + #include "bng_re.h" 16 + #include "bnge_hwrm.h" 17 + #include "bng_debugfs.h" 18 + 19 + MODULE_AUTHOR("Siva Reddy Kallam <siva.kallam@broadcom.com>"); 20 + MODULE_DESCRIPTION(BNG_RE_DESC); 21 + MODULE_LICENSE("Dual BSD/GPL"); 22 + 23 + static struct bng_re_dev *bng_re_dev_add(struct auxiliary_device *adev, 24 + struct bnge_auxr_dev *aux_dev) 25 + { 26 + struct bng_re_dev *rdev; 27 + 28 + /* Allocate bng_re_dev instance */ 29 + rdev = ib_alloc_device(bng_re_dev, ibdev); 30 + if (!rdev) { 31 + pr_err("%s: bng_re_dev allocation failure!", KBUILD_MODNAME); 32 + return NULL; 33 + } 34 + 35 + /* Assign auxiliary device specific data */ 36 + rdev->netdev = aux_dev->net; 37 + rdev->aux_dev = aux_dev; 38 + rdev->adev = adev; 39 + rdev->fn_id = rdev->aux_dev->pdev->devfn; 40 + 41 + return rdev; 42 + } 43 + 44 + 45 + static int bng_re_register_netdev(struct bng_re_dev *rdev) 46 + { 47 + struct bnge_auxr_dev *aux_dev; 48 + 49 + aux_dev = rdev->aux_dev; 50 + return bnge_register_dev(aux_dev, rdev->adev); 51 + } 52 + 53 + static void bng_re_destroy_chip_ctx(struct bng_re_dev *rdev) 54 + { 55 + struct bng_re_chip_ctx *chip_ctx; 56 + 57 + if (!rdev->chip_ctx) 58 + return; 59 + 60 + kfree(rdev->dev_attr); 61 + rdev->dev_attr = NULL; 62 + 63 + chip_ctx = rdev->chip_ctx; 64 + rdev->chip_ctx = NULL; 65 + rdev->rcfw.res = NULL; 66 + rdev->bng_res.cctx = NULL; 67 + rdev->bng_res.pdev = NULL; 68 + kfree(chip_ctx); 69 + } 70 + 71 + static int bng_re_setup_chip_ctx(struct bng_re_dev *rdev) 72 + { 73 + struct bng_re_chip_ctx *chip_ctx; 74 + struct bnge_auxr_dev *aux_dev; 75 + int rc = -ENOMEM; 76 + 77 + aux_dev = rdev->aux_dev; 78 + rdev->bng_res.pdev = aux_dev->pdev; 79 + rdev->rcfw.res = &rdev->bng_res; 80 + chip_ctx = kzalloc(sizeof(*chip_ctx), GFP_KERNEL); 81 + if (!chip_ctx) 82 + return -ENOMEM; 83 + chip_ctx->chip_num = aux_dev->chip_num; 84 + chip_ctx->hw_stats_size = aux_dev->hw_ring_stats_size; 85 + 86 + rdev->chip_ctx = chip_ctx; 87 + rdev->bng_res.cctx = rdev->chip_ctx; 88 + rdev->dev_attr = kzalloc(sizeof(*rdev->dev_attr), GFP_KERNEL); 89 + if (!rdev->dev_attr) 90 + goto free_chip_ctx; 91 + rdev->bng_res.dattr = rdev->dev_attr; 92 + 93 + return 0; 94 + free_chip_ctx: 95 + kfree(rdev->chip_ctx); 96 + rdev->chip_ctx = NULL; 97 + return rc; 98 + } 99 + 100 + static void bng_re_init_hwrm_hdr(struct input *hdr, u16 opcd) 101 + { 102 + hdr->req_type = cpu_to_le16(opcd); 103 + hdr->cmpl_ring = cpu_to_le16(-1); 104 + hdr->target_id = cpu_to_le16(-1); 105 + } 106 + 107 + static void bng_re_fill_fw_msg(struct bnge_fw_msg *fw_msg, void *msg, 108 + int msg_len, void *resp, int resp_max_len, 109 + int timeout) 110 + { 111 + fw_msg->msg = msg; 112 + fw_msg->msg_len = msg_len; 113 + fw_msg->resp = resp; 114 + fw_msg->resp_max_len = resp_max_len; 115 + fw_msg->timeout = timeout; 116 + } 117 + 118 + static int bng_re_net_ring_free(struct bng_re_dev *rdev, 119 + u16 fw_ring_id, int type) 120 + { 121 + struct bnge_auxr_dev *aux_dev = rdev->aux_dev; 122 + struct hwrm_ring_free_input req = {}; 123 + struct hwrm_ring_free_output resp; 124 + struct bnge_fw_msg fw_msg = {}; 125 + int rc = -EINVAL; 126 + 127 + if (!rdev) 128 + return rc; 129 + 130 + if (!aux_dev) 131 + return rc; 132 + 133 + bng_re_init_hwrm_hdr((void *)&req, HWRM_RING_FREE); 134 + req.ring_type = type; 135 + req.ring_id = cpu_to_le16(fw_ring_id); 136 + bng_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 137 + sizeof(resp), BNGE_DFLT_HWRM_CMD_TIMEOUT); 138 + rc = bnge_send_msg(aux_dev, &fw_msg); 139 + if (rc) 140 + ibdev_err(&rdev->ibdev, "Failed to free HW ring:%d :%#x", 141 + req.ring_id, rc); 142 + return rc; 143 + } 144 + 145 + static int bng_re_net_ring_alloc(struct bng_re_dev *rdev, 146 + struct bng_re_ring_attr *ring_attr, 147 + u16 *fw_ring_id) 148 + { 149 + struct bnge_auxr_dev *aux_dev = rdev->aux_dev; 150 + struct hwrm_ring_alloc_input req = {}; 151 + struct hwrm_ring_alloc_output resp; 152 + struct bnge_fw_msg fw_msg = {}; 153 + int rc = -EINVAL; 154 + 155 + if (!aux_dev) 156 + return rc; 157 + 158 + bng_re_init_hwrm_hdr((void *)&req, HWRM_RING_ALLOC); 159 + req.enables = 0; 160 + req.page_tbl_addr = cpu_to_le64(ring_attr->dma_arr[0]); 161 + if (ring_attr->pages > 1) { 162 + /* Page size is in log2 units */ 163 + req.page_size = BNGE_PAGE_SHIFT; 164 + req.page_tbl_depth = 1; 165 + } 166 + req.fbo = 0; 167 + /* Association of ring index with doorbell index and MSIX number */ 168 + req.logical_id = cpu_to_le16(ring_attr->lrid); 169 + req.length = cpu_to_le32(ring_attr->depth + 1); 170 + req.ring_type = ring_attr->type; 171 + req.int_mode = ring_attr->mode; 172 + bng_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 173 + sizeof(resp), BNGE_DFLT_HWRM_CMD_TIMEOUT); 174 + rc = bnge_send_msg(aux_dev, &fw_msg); 175 + if (!rc) 176 + *fw_ring_id = le16_to_cpu(resp.ring_id); 177 + 178 + return rc; 179 + } 180 + 181 + static int bng_re_stats_ctx_free(struct bng_re_dev *rdev) 182 + { 183 + struct bnge_auxr_dev *aux_dev = rdev->aux_dev; 184 + struct hwrm_stat_ctx_free_input req = {}; 185 + struct hwrm_stat_ctx_free_output resp = {}; 186 + struct bnge_fw_msg fw_msg = {}; 187 + int rc = -EINVAL; 188 + 189 + if (!aux_dev) 190 + return rc; 191 + 192 + bng_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_FREE); 193 + req.stat_ctx_id = cpu_to_le32(rdev->stats_ctx.fw_id); 194 + bng_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 195 + sizeof(resp), BNGE_DFLT_HWRM_CMD_TIMEOUT); 196 + rc = bnge_send_msg(aux_dev, &fw_msg); 197 + if (rc) 198 + ibdev_err(&rdev->ibdev, "Failed to free HW stats context %#x", 199 + rc); 200 + 201 + return rc; 202 + } 203 + 204 + static int bng_re_stats_ctx_alloc(struct bng_re_dev *rdev) 205 + { 206 + struct bnge_auxr_dev *aux_dev = rdev->aux_dev; 207 + struct bng_re_stats *stats = &rdev->stats_ctx; 208 + struct hwrm_stat_ctx_alloc_output resp = {}; 209 + struct hwrm_stat_ctx_alloc_input req = {}; 210 + struct bnge_fw_msg fw_msg = {}; 211 + int rc = -EINVAL; 212 + 213 + stats->fw_id = BNGE_INVALID_STATS_CTX_ID; 214 + 215 + if (!aux_dev) 216 + return rc; 217 + 218 + bng_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_ALLOC); 219 + req.update_period_ms = cpu_to_le32(1000); 220 + req.stats_dma_addr = cpu_to_le64(stats->dma_map); 221 + req.stats_dma_length = cpu_to_le16(rdev->chip_ctx->hw_stats_size); 222 + req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE; 223 + bng_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 224 + sizeof(resp), BNGE_DFLT_HWRM_CMD_TIMEOUT); 225 + rc = bnge_send_msg(aux_dev, &fw_msg); 226 + if (!rc) 227 + stats->fw_id = le32_to_cpu(resp.stat_ctx_id); 228 + return rc; 229 + } 230 + 231 + static void bng_re_query_hwrm_version(struct bng_re_dev *rdev) 232 + { 233 + struct bnge_auxr_dev *aux_dev = rdev->aux_dev; 234 + struct hwrm_ver_get_output ver_get_resp = {}; 235 + struct hwrm_ver_get_input ver_get_req = {}; 236 + struct bng_re_chip_ctx *cctx; 237 + struct bnge_fw_msg fw_msg = {}; 238 + int rc; 239 + 240 + bng_re_init_hwrm_hdr((void *)&ver_get_req, HWRM_VER_GET); 241 + ver_get_req.hwrm_intf_maj = HWRM_VERSION_MAJOR; 242 + ver_get_req.hwrm_intf_min = HWRM_VERSION_MINOR; 243 + ver_get_req.hwrm_intf_upd = HWRM_VERSION_UPDATE; 244 + bng_re_fill_fw_msg(&fw_msg, (void *)&ver_get_req, sizeof(ver_get_req), 245 + (void *)&ver_get_resp, sizeof(ver_get_resp), 246 + BNGE_DFLT_HWRM_CMD_TIMEOUT); 247 + rc = bnge_send_msg(aux_dev, &fw_msg); 248 + if (rc) { 249 + ibdev_err(&rdev->ibdev, "Failed to query HW version, rc = 0x%x", 250 + rc); 251 + return; 252 + } 253 + 254 + cctx = rdev->chip_ctx; 255 + cctx->hwrm_intf_ver = 256 + (u64)le16_to_cpu(ver_get_resp.hwrm_intf_major) << 48 | 257 + (u64)le16_to_cpu(ver_get_resp.hwrm_intf_minor) << 32 | 258 + (u64)le16_to_cpu(ver_get_resp.hwrm_intf_build) << 16 | 259 + le16_to_cpu(ver_get_resp.hwrm_intf_patch); 260 + 261 + cctx->hwrm_cmd_max_timeout = le16_to_cpu(ver_get_resp.max_req_timeout); 262 + 263 + if (!cctx->hwrm_cmd_max_timeout) 264 + cctx->hwrm_cmd_max_timeout = BNG_ROCE_FW_MAX_TIMEOUT; 265 + } 266 + 267 + static void bng_re_dev_uninit(struct bng_re_dev *rdev) 268 + { 269 + int rc; 270 + bng_re_debugfs_rem_pdev(rdev); 271 + 272 + if (test_and_clear_bit(BNG_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) { 273 + rc = bng_re_deinit_rcfw(&rdev->rcfw); 274 + if (rc) 275 + ibdev_warn(&rdev->ibdev, 276 + "Failed to deinitialize RCFW: %#x", rc); 277 + bng_re_stats_ctx_free(rdev); 278 + bng_re_free_stats_ctx_mem(rdev->bng_res.pdev, &rdev->stats_ctx); 279 + bng_re_disable_rcfw_channel(&rdev->rcfw); 280 + bng_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, 281 + RING_ALLOC_REQ_RING_TYPE_NQ); 282 + bng_re_free_rcfw_channel(&rdev->rcfw); 283 + } 284 + 285 + kfree(rdev->nqr); 286 + rdev->nqr = NULL; 287 + bng_re_destroy_chip_ctx(rdev); 288 + if (test_and_clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) 289 + bnge_unregister_dev(rdev->aux_dev); 290 + } 291 + 292 + static int bng_re_dev_init(struct bng_re_dev *rdev) 293 + { 294 + struct bng_re_ring_attr rattr = {}; 295 + struct bng_re_creq_ctx *creq; 296 + u32 db_offt; 297 + int vid; 298 + u8 type; 299 + int rc; 300 + 301 + /* Registered a new RoCE device instance to netdev */ 302 + rc = bng_re_register_netdev(rdev); 303 + if (rc) { 304 + ibdev_err(&rdev->ibdev, 305 + "Failed to register with netedev: %#x\n", rc); 306 + return -EINVAL; 307 + } 308 + 309 + set_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); 310 + 311 + if (rdev->aux_dev->auxr_info->msix_requested < BNG_RE_MIN_MSIX) { 312 + ibdev_err(&rdev->ibdev, 313 + "RoCE requires minimum 2 MSI-X vectors, but only %d reserved\n", 314 + rdev->aux_dev->auxr_info->msix_requested); 315 + bnge_unregister_dev(rdev->aux_dev); 316 + clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); 317 + return -EINVAL; 318 + } 319 + ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n", 320 + rdev->aux_dev->auxr_info->msix_requested); 321 + 322 + rc = bng_re_setup_chip_ctx(rdev); 323 + if (rc) { 324 + bnge_unregister_dev(rdev->aux_dev); 325 + clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); 326 + ibdev_err(&rdev->ibdev, "Failed to get chip context\n"); 327 + return -EINVAL; 328 + } 329 + 330 + bng_re_query_hwrm_version(rdev); 331 + 332 + rc = bng_re_alloc_fw_channel(&rdev->bng_res, &rdev->rcfw); 333 + if (rc) { 334 + ibdev_err(&rdev->ibdev, 335 + "Failed to allocate RCFW Channel: %#x\n", rc); 336 + goto fail; 337 + } 338 + 339 + /* Allocate nq record memory */ 340 + rdev->nqr = kzalloc(sizeof(*rdev->nqr), GFP_KERNEL); 341 + if (!rdev->nqr) { 342 + bng_re_destroy_chip_ctx(rdev); 343 + bnge_unregister_dev(rdev->aux_dev); 344 + clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); 345 + return -ENOMEM; 346 + } 347 + 348 + rdev->nqr->num_msix = rdev->aux_dev->auxr_info->msix_requested; 349 + memcpy(rdev->nqr->msix_entries, rdev->aux_dev->msix_info, 350 + sizeof(struct bnge_msix_info) * rdev->nqr->num_msix); 351 + 352 + type = RING_ALLOC_REQ_RING_TYPE_NQ; 353 + creq = &rdev->rcfw.creq; 354 + rattr.dma_arr = creq->hwq.pbl[BNG_PBL_LVL_0].pg_map_arr; 355 + rattr.pages = creq->hwq.pbl[creq->hwq.level].pg_count; 356 + rattr.type = type; 357 + rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX; 358 + rattr.depth = BNG_FW_CREQE_MAX_CNT - 1; 359 + rattr.lrid = rdev->nqr->msix_entries[BNG_RE_CREQ_NQ_IDX].ring_idx; 360 + rc = bng_re_net_ring_alloc(rdev, &rattr, &creq->ring_id); 361 + if (rc) { 362 + ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc); 363 + goto free_rcfw; 364 + } 365 + db_offt = rdev->nqr->msix_entries[BNG_RE_CREQ_NQ_IDX].db_offset; 366 + vid = rdev->nqr->msix_entries[BNG_RE_CREQ_NQ_IDX].vector; 367 + 368 + rc = bng_re_enable_fw_channel(&rdev->rcfw, 369 + vid, db_offt); 370 + if (rc) { 371 + ibdev_err(&rdev->ibdev, "Failed to enable RCFW channel: %#x\n", 372 + rc); 373 + goto free_ring; 374 + } 375 + 376 + rc = bng_re_get_dev_attr(&rdev->rcfw); 377 + if (rc) 378 + goto disable_rcfw; 379 + 380 + bng_re_debugfs_add_pdev(rdev); 381 + rc = bng_re_alloc_stats_ctx_mem(rdev->bng_res.pdev, rdev->chip_ctx, 382 + &rdev->stats_ctx); 383 + if (rc) { 384 + ibdev_err(&rdev->ibdev, 385 + "Failed to allocate stats context: %#x\n", rc); 386 + goto disable_rcfw; 387 + } 388 + 389 + rc = bng_re_stats_ctx_alloc(rdev); 390 + if (rc) { 391 + ibdev_err(&rdev->ibdev, 392 + "Failed to allocate QPLIB context: %#x\n", rc); 393 + goto free_stats_ctx; 394 + } 395 + 396 + rc = bng_re_init_rcfw(&rdev->rcfw, &rdev->stats_ctx); 397 + if (rc) { 398 + ibdev_err(&rdev->ibdev, 399 + "Failed to initialize RCFW: %#x\n", rc); 400 + goto free_sctx; 401 + } 402 + set_bit(BNG_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags); 403 + 404 + return 0; 405 + free_sctx: 406 + bng_re_stats_ctx_free(rdev); 407 + free_stats_ctx: 408 + bng_re_free_stats_ctx_mem(rdev->bng_res.pdev, &rdev->stats_ctx); 409 + disable_rcfw: 410 + bng_re_disable_rcfw_channel(&rdev->rcfw); 411 + free_ring: 412 + bng_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type); 413 + free_rcfw: 414 + bng_re_free_rcfw_channel(&rdev->rcfw); 415 + fail: 416 + bng_re_dev_uninit(rdev); 417 + return rc; 418 + } 419 + 420 + static int bng_re_add_device(struct auxiliary_device *adev) 421 + { 422 + struct bnge_auxr_priv *auxr_priv = 423 + container_of(adev, struct bnge_auxr_priv, aux_dev); 424 + struct bng_re_en_dev_info *dev_info; 425 + struct bng_re_dev *rdev; 426 + int rc; 427 + 428 + dev_info = auxiliary_get_drvdata(adev); 429 + 430 + rdev = bng_re_dev_add(adev, auxr_priv->auxr_dev); 431 + if (!rdev) { 432 + rc = -ENOMEM; 433 + goto exit; 434 + } 435 + 436 + dev_info->rdev = rdev; 437 + 438 + rc = bng_re_dev_init(rdev); 439 + if (rc) 440 + goto re_dev_dealloc; 441 + 442 + return 0; 443 + 444 + re_dev_dealloc: 445 + ib_dealloc_device(&rdev->ibdev); 446 + exit: 447 + return rc; 448 + } 449 + 450 + 451 + static void bng_re_remove_device(struct bng_re_dev *rdev, 452 + struct auxiliary_device *aux_dev) 453 + { 454 + bng_re_dev_uninit(rdev); 455 + ib_dealloc_device(&rdev->ibdev); 456 + } 457 + 458 + 459 + static int bng_re_probe(struct auxiliary_device *adev, 460 + const struct auxiliary_device_id *id) 461 + { 462 + struct bnge_auxr_priv *aux_priv = 463 + container_of(adev, struct bnge_auxr_priv, aux_dev); 464 + struct bng_re_en_dev_info *en_info; 465 + int rc; 466 + 467 + en_info = kzalloc(sizeof(*en_info), GFP_KERNEL); 468 + if (!en_info) 469 + return -ENOMEM; 470 + 471 + en_info->auxr_dev = aux_priv->auxr_dev; 472 + 473 + auxiliary_set_drvdata(adev, en_info); 474 + 475 + rc = bng_re_add_device(adev); 476 + if (rc) 477 + kfree(en_info); 478 + 479 + return rc; 480 + } 481 + 482 + static void bng_re_remove(struct auxiliary_device *adev) 483 + { 484 + struct bng_re_en_dev_info *dev_info = auxiliary_get_drvdata(adev); 485 + struct bng_re_dev *rdev; 486 + 487 + rdev = dev_info->rdev; 488 + 489 + if (rdev) 490 + bng_re_remove_device(rdev, adev); 491 + kfree(dev_info); 492 + } 493 + 494 + static const struct auxiliary_device_id bng_re_id_table[] = { 495 + { .name = BNG_RE_ADEV_NAME ".rdma", }, 496 + {}, 497 + }; 498 + 499 + MODULE_DEVICE_TABLE(auxiliary, bng_re_id_table); 500 + 501 + static struct auxiliary_driver bng_re_driver = { 502 + .name = "rdma", 503 + .probe = bng_re_probe, 504 + .remove = bng_re_remove, 505 + .id_table = bng_re_id_table, 506 + }; 507 + 508 + static int __init bng_re_mod_init(void) 509 + { 510 + int rc; 511 + 512 + 513 + bng_re_register_debugfs(); 514 + 515 + rc = auxiliary_driver_register(&bng_re_driver); 516 + if (rc) { 517 + pr_err("%s: Failed to register auxiliary driver\n", 518 + KBUILD_MODNAME); 519 + goto unreg_debugfs; 520 + } 521 + return 0; 522 + unreg_debugfs: 523 + bng_re_unregister_debugfs(); 524 + return rc; 525 + } 526 + 527 + static void __exit bng_re_mod_exit(void) 528 + { 529 + auxiliary_driver_unregister(&bng_re_driver); 530 + bng_re_unregister_debugfs(); 531 + } 532 + 533 + module_init(bng_re_mod_init); 534 + module_exit(bng_re_mod_exit);
+767
drivers/infiniband/hw/bng_re/bng_fw.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (c) 2025 Broadcom. 3 + #include <linux/pci.h> 4 + 5 + #include "roce_hsi.h" 6 + #include "bng_res.h" 7 + #include "bng_fw.h" 8 + #include "bng_sp.h" 9 + 10 + /** 11 + * bng_re_map_rc - map return type based on opcode 12 + * @opcode: roce slow path opcode 13 + * 14 + * case #1 15 + * Firmware initiated error recovery is a safe state machine and 16 + * driver can consider all the underlying rdma resources are free. 17 + * In this state, it is safe to return success for opcodes related to 18 + * destroying rdma resources (like destroy qp, destroy cq etc.). 19 + * 20 + * case #2 21 + * If driver detect potential firmware stall, it is not safe state machine 22 + * and the driver can not consider all the underlying rdma resources are 23 + * freed. 24 + * In this state, it is not safe to return success for opcodes related to 25 + * destroying rdma resources (like destroy qp, destroy cq etc.). 26 + * 27 + * Scope of this helper function is only for case #1. 28 + * 29 + * Returns: 30 + * 0 to communicate success to caller. 31 + * Non zero error code to communicate failure to caller. 32 + */ 33 + static int bng_re_map_rc(u8 opcode) 34 + { 35 + switch (opcode) { 36 + case CMDQ_BASE_OPCODE_DESTROY_QP: 37 + case CMDQ_BASE_OPCODE_DESTROY_SRQ: 38 + case CMDQ_BASE_OPCODE_DESTROY_CQ: 39 + case CMDQ_BASE_OPCODE_DEALLOCATE_KEY: 40 + case CMDQ_BASE_OPCODE_DEREGISTER_MR: 41 + case CMDQ_BASE_OPCODE_DELETE_GID: 42 + case CMDQ_BASE_OPCODE_DESTROY_QP1: 43 + case CMDQ_BASE_OPCODE_DESTROY_AH: 44 + case CMDQ_BASE_OPCODE_DEINITIALIZE_FW: 45 + case CMDQ_BASE_OPCODE_MODIFY_ROCE_CC: 46 + case CMDQ_BASE_OPCODE_SET_LINK_AGGR_MODE: 47 + return 0; 48 + default: 49 + return -ETIMEDOUT; 50 + } 51 + } 52 + 53 + void bng_re_free_rcfw_channel(struct bng_re_rcfw *rcfw) 54 + { 55 + kfree(rcfw->crsqe_tbl); 56 + bng_re_free_hwq(rcfw->res, &rcfw->cmdq.hwq); 57 + bng_re_free_hwq(rcfw->res, &rcfw->creq.hwq); 58 + rcfw->pdev = NULL; 59 + } 60 + 61 + int bng_re_alloc_fw_channel(struct bng_re_res *res, 62 + struct bng_re_rcfw *rcfw) 63 + { 64 + struct bng_re_hwq_attr hwq_attr = {}; 65 + struct bng_re_sg_info sginfo = {}; 66 + struct bng_re_cmdq_ctx *cmdq; 67 + struct bng_re_creq_ctx *creq; 68 + 69 + rcfw->pdev = res->pdev; 70 + cmdq = &rcfw->cmdq; 71 + creq = &rcfw->creq; 72 + rcfw->res = res; 73 + 74 + sginfo.pgsize = PAGE_SIZE; 75 + sginfo.pgshft = PAGE_SHIFT; 76 + 77 + hwq_attr.sginfo = &sginfo; 78 + hwq_attr.res = rcfw->res; 79 + hwq_attr.depth = BNG_FW_CREQE_MAX_CNT; 80 + hwq_attr.stride = BNG_FW_CREQE_UNITS; 81 + hwq_attr.type = BNG_HWQ_TYPE_QUEUE; 82 + 83 + if (bng_re_alloc_init_hwq(&creq->hwq, &hwq_attr)) { 84 + dev_err(&rcfw->pdev->dev, 85 + "HW channel CREQ allocation failed\n"); 86 + goto fail; 87 + } 88 + 89 + rcfw->cmdq_depth = BNG_FW_CMDQE_MAX_CNT; 90 + 91 + sginfo.pgsize = bng_fw_cmdqe_page_size(rcfw->cmdq_depth); 92 + hwq_attr.depth = rcfw->cmdq_depth & 0x7FFFFFFF; 93 + hwq_attr.stride = BNG_FW_CMDQE_UNITS; 94 + hwq_attr.type = BNG_HWQ_TYPE_CTX; 95 + if (bng_re_alloc_init_hwq(&cmdq->hwq, &hwq_attr)) { 96 + dev_err(&rcfw->pdev->dev, 97 + "HW channel CMDQ allocation failed\n"); 98 + goto fail; 99 + } 100 + 101 + rcfw->crsqe_tbl = kcalloc(cmdq->hwq.max_elements, 102 + sizeof(*rcfw->crsqe_tbl), GFP_KERNEL); 103 + if (!rcfw->crsqe_tbl) 104 + goto fail; 105 + 106 + spin_lock_init(&rcfw->tbl_lock); 107 + 108 + rcfw->max_timeout = res->cctx->hwrm_cmd_max_timeout; 109 + return 0; 110 + 111 + fail: 112 + bng_re_free_rcfw_channel(rcfw); 113 + return -ENOMEM; 114 + } 115 + 116 + static int bng_re_process_qp_event(struct bng_re_rcfw *rcfw, 117 + struct creq_qp_event *qp_event, 118 + u32 *num_wait) 119 + { 120 + struct bng_re_hwq *hwq = &rcfw->cmdq.hwq; 121 + struct bng_re_crsqe *crsqe; 122 + u32 req_size; 123 + u16 cookie; 124 + bool is_waiter_alive; 125 + struct pci_dev *pdev; 126 + u32 wait_cmds = 0; 127 + int rc = 0; 128 + 129 + pdev = rcfw->pdev; 130 + switch (qp_event->event) { 131 + case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION: 132 + dev_err(&pdev->dev, "Received QP error notification\n"); 133 + break; 134 + default: 135 + /* 136 + * Command Response 137 + * cmdq->lock needs to be acquired to synchronie 138 + * the command send and completion reaping. This function 139 + * is always called with creq->lock held. Using 140 + * the nested variant of spin_lock. 141 + * 142 + */ 143 + 144 + spin_lock_nested(&hwq->lock, SINGLE_DEPTH_NESTING); 145 + cookie = le16_to_cpu(qp_event->cookie); 146 + cookie &= BNG_FW_MAX_COOKIE_VALUE; 147 + crsqe = &rcfw->crsqe_tbl[cookie]; 148 + 149 + if (WARN_ONCE(test_bit(FIRMWARE_STALL_DETECTED, 150 + &rcfw->cmdq.flags), 151 + "Unreponsive rcfw channel detected.!!")) { 152 + dev_info(&pdev->dev, 153 + "rcfw timedout: cookie = %#x, free_slots = %d", 154 + cookie, crsqe->free_slots); 155 + spin_unlock(&hwq->lock); 156 + return rc; 157 + } 158 + 159 + if (crsqe->is_waiter_alive) { 160 + if (crsqe->resp) { 161 + memcpy(crsqe->resp, qp_event, sizeof(*qp_event)); 162 + /* Insert write memory barrier to ensure that 163 + * response data is copied before clearing the 164 + * flags 165 + */ 166 + smp_wmb(); 167 + } 168 + } 169 + 170 + wait_cmds++; 171 + 172 + req_size = crsqe->req_size; 173 + is_waiter_alive = crsqe->is_waiter_alive; 174 + 175 + crsqe->req_size = 0; 176 + if (!is_waiter_alive) 177 + crsqe->resp = NULL; 178 + 179 + crsqe->is_in_used = false; 180 + 181 + hwq->cons += req_size; 182 + 183 + spin_unlock(&hwq->lock); 184 + } 185 + *num_wait += wait_cmds; 186 + return rc; 187 + } 188 + 189 + /* function events */ 190 + static int bng_re_process_func_event(struct bng_re_rcfw *rcfw, 191 + struct creq_func_event *func_event) 192 + { 193 + switch (func_event->event) { 194 + case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR: 195 + case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR: 196 + case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR: 197 + case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR: 198 + case CREQ_FUNC_EVENT_EVENT_CQ_ERROR: 199 + case CREQ_FUNC_EVENT_EVENT_TQM_ERROR: 200 + case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR: 201 + case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR: 202 + case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR: 203 + case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR: 204 + case CREQ_FUNC_EVENT_EVENT_TIM_ERROR: 205 + case CREQ_FUNC_EVENT_EVENT_VF_COMM_REQUEST: 206 + case CREQ_FUNC_EVENT_EVENT_RESOURCE_EXHAUSTED: 207 + break; 208 + default: 209 + return -EINVAL; 210 + } 211 + 212 + return 0; 213 + } 214 + 215 + /* CREQ Completion handlers */ 216 + static void bng_re_service_creq(struct tasklet_struct *t) 217 + { 218 + struct bng_re_rcfw *rcfw = from_tasklet(rcfw, t, creq.creq_tasklet); 219 + struct bng_re_creq_ctx *creq = &rcfw->creq; 220 + u32 type, budget = BNG_FW_CREQ_ENTRY_POLL_BUDGET; 221 + struct bng_re_hwq *hwq = &creq->hwq; 222 + struct creq_base *creqe; 223 + u32 num_wakeup = 0; 224 + u32 hw_polled = 0; 225 + 226 + /* Service the CREQ until budget is over */ 227 + spin_lock_bh(&hwq->lock); 228 + while (budget > 0) { 229 + creqe = bng_re_get_qe(hwq, hwq->cons, NULL); 230 + if (!BNG_FW_CREQ_CMP_VALID(creqe, creq->creq_db.dbinfo.flags)) 231 + break; 232 + /* The valid test of the entry must be done first before 233 + * reading any further. 234 + */ 235 + dma_rmb(); 236 + 237 + type = creqe->type & CREQ_BASE_TYPE_MASK; 238 + switch (type) { 239 + case CREQ_BASE_TYPE_QP_EVENT: 240 + bng_re_process_qp_event 241 + (rcfw, (struct creq_qp_event *)creqe, 242 + &num_wakeup); 243 + creq->stats.creq_qp_event_processed++; 244 + break; 245 + case CREQ_BASE_TYPE_FUNC_EVENT: 246 + if (!bng_re_process_func_event 247 + (rcfw, (struct creq_func_event *)creqe)) 248 + creq->stats.creq_func_event_processed++; 249 + else 250 + dev_warn(&rcfw->pdev->dev, 251 + "aeqe:%#x Not handled\n", type); 252 + break; 253 + default: 254 + if (type != ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT) 255 + dev_warn(&rcfw->pdev->dev, 256 + "creqe with event 0x%x not handled\n", 257 + type); 258 + break; 259 + } 260 + budget--; 261 + hw_polled++; 262 + bng_re_hwq_incr_cons(hwq->max_elements, &hwq->cons, 263 + 1, &creq->creq_db.dbinfo.flags); 264 + } 265 + 266 + if (hw_polled) 267 + bng_re_ring_nq_db(&creq->creq_db.dbinfo, 268 + rcfw->res->cctx, true); 269 + spin_unlock_bh(&hwq->lock); 270 + if (num_wakeup) 271 + wake_up_nr(&rcfw->cmdq.waitq, num_wakeup); 272 + } 273 + 274 + static int __send_message_basic_sanity(struct bng_re_rcfw *rcfw, 275 + struct bng_re_cmdqmsg *msg, 276 + u8 opcode) 277 + { 278 + struct bng_re_cmdq_ctx *cmdq; 279 + 280 + cmdq = &rcfw->cmdq; 281 + 282 + if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags)) 283 + return -ETIMEDOUT; 284 + 285 + if (test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) && 286 + opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) { 287 + dev_err(&rcfw->pdev->dev, "RCFW already initialized!"); 288 + return -EINVAL; 289 + } 290 + 291 + if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) && 292 + (opcode != CMDQ_BASE_OPCODE_QUERY_FUNC && 293 + opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW && 294 + opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) { 295 + dev_err(&rcfw->pdev->dev, 296 + "RCFW not initialized, reject opcode 0x%x", 297 + opcode); 298 + return -EOPNOTSUPP; 299 + } 300 + 301 + return 0; 302 + } 303 + 304 + static int __send_message(struct bng_re_rcfw *rcfw, 305 + struct bng_re_cmdqmsg *msg, u8 opcode) 306 + { 307 + u32 bsize, free_slots, required_slots; 308 + struct bng_re_cmdq_ctx *cmdq; 309 + struct bng_re_crsqe *crsqe; 310 + struct bng_fw_cmdqe *cmdqe; 311 + struct bng_re_hwq *hwq; 312 + u32 sw_prod, cmdq_prod; 313 + struct pci_dev *pdev; 314 + u16 cookie; 315 + u8 *preq; 316 + 317 + cmdq = &rcfw->cmdq; 318 + hwq = &cmdq->hwq; 319 + pdev = rcfw->pdev; 320 + 321 + /* Cmdq are in 16-byte units, each request can consume 1 or more 322 + * cmdqe 323 + */ 324 + spin_lock_bh(&hwq->lock); 325 + required_slots = bng_re_get_cmd_slots(msg->req); 326 + free_slots = HWQ_FREE_SLOTS(hwq); 327 + cookie = cmdq->seq_num & BNG_FW_MAX_COOKIE_VALUE; 328 + crsqe = &rcfw->crsqe_tbl[cookie]; 329 + 330 + if (required_slots >= free_slots) { 331 + dev_info_ratelimited(&pdev->dev, 332 + "CMDQ is full req/free %d/%d!", 333 + required_slots, free_slots); 334 + spin_unlock_bh(&hwq->lock); 335 + return -EAGAIN; 336 + } 337 + __set_cmdq_base_cookie(msg->req, msg->req_sz, cpu_to_le16(cookie)); 338 + 339 + bsize = bng_re_set_cmd_slots(msg->req); 340 + crsqe->free_slots = free_slots; 341 + crsqe->resp = (struct creq_qp_event *)msg->resp; 342 + crsqe->is_waiter_alive = true; 343 + crsqe->is_in_used = true; 344 + crsqe->opcode = opcode; 345 + 346 + crsqe->req_size = __get_cmdq_base_cmd_size(msg->req, msg->req_sz); 347 + if (__get_cmdq_base_resp_size(msg->req, msg->req_sz) && msg->sb) { 348 + struct bng_re_rcfw_sbuf *sbuf = msg->sb; 349 + 350 + __set_cmdq_base_resp_addr(msg->req, msg->req_sz, 351 + cpu_to_le64(sbuf->dma_addr)); 352 + __set_cmdq_base_resp_size(msg->req, msg->req_sz, 353 + ALIGN(sbuf->size, 354 + BNG_FW_CMDQE_UNITS) / 355 + BNG_FW_CMDQE_UNITS); 356 + } 357 + 358 + preq = (u8 *)msg->req; 359 + do { 360 + /* Locate the next cmdq slot */ 361 + sw_prod = HWQ_CMP(hwq->prod, hwq); 362 + cmdqe = bng_re_get_qe(hwq, sw_prod, NULL); 363 + /* Copy a segment of the req cmd to the cmdq */ 364 + memset(cmdqe, 0, sizeof(*cmdqe)); 365 + memcpy(cmdqe, preq, min_t(u32, bsize, sizeof(*cmdqe))); 366 + preq += min_t(u32, bsize, sizeof(*cmdqe)); 367 + bsize -= min_t(u32, bsize, sizeof(*cmdqe)); 368 + hwq->prod++; 369 + } while (bsize > 0); 370 + cmdq->seq_num++; 371 + 372 + cmdq_prod = hwq->prod & 0xFFFF; 373 + if (test_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags)) { 374 + /* The very first doorbell write 375 + * is required to set this flag 376 + * which prompts the FW to reset 377 + * its internal pointers 378 + */ 379 + cmdq_prod |= BIT(FIRMWARE_FIRST_FLAG); 380 + clear_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags); 381 + } 382 + /* ring CMDQ DB */ 383 + wmb(); 384 + writel(cmdq_prod, cmdq->cmdq_mbox.prod); 385 + writel(BNG_FW_CMDQ_TRIG_VAL, cmdq->cmdq_mbox.db); 386 + spin_unlock_bh(&hwq->lock); 387 + /* Return the CREQ response pointer */ 388 + return 0; 389 + } 390 + 391 + /** 392 + * __wait_for_resp - Don't hold the cpu context and wait for response 393 + * @rcfw: rcfw channel instance of rdev 394 + * @cookie: cookie to track the command 395 + * 396 + * Wait for command completion in sleepable context. 397 + * 398 + * Returns: 399 + * 0 if command is completed by firmware. 400 + * Non zero error code for rest of the case. 401 + */ 402 + static int __wait_for_resp(struct bng_re_rcfw *rcfw, u16 cookie) 403 + { 404 + struct bng_re_cmdq_ctx *cmdq; 405 + struct bng_re_crsqe *crsqe; 406 + 407 + cmdq = &rcfw->cmdq; 408 + crsqe = &rcfw->crsqe_tbl[cookie]; 409 + 410 + do { 411 + wait_event_timeout(cmdq->waitq, 412 + !crsqe->is_in_used, 413 + secs_to_jiffies(rcfw->max_timeout)); 414 + 415 + if (!crsqe->is_in_used) 416 + return 0; 417 + 418 + bng_re_service_creq(&rcfw->creq.creq_tasklet); 419 + 420 + if (!crsqe->is_in_used) 421 + return 0; 422 + } while (true); 423 + }; 424 + 425 + /** 426 + * bng_re_rcfw_send_message - interface to send 427 + * and complete rcfw command. 428 + * @rcfw: rcfw channel instance of rdev 429 + * @msg: message to send 430 + * 431 + * This function does not account shadow queue depth. It will send 432 + * all the command unconditionally as long as send queue is not full. 433 + * 434 + * Returns: 435 + * 0 if command completed by firmware. 436 + * Non zero if the command is not completed by firmware. 437 + */ 438 + int bng_re_rcfw_send_message(struct bng_re_rcfw *rcfw, 439 + struct bng_re_cmdqmsg *msg) 440 + { 441 + struct creq_qp_event *evnt = (struct creq_qp_event *)msg->resp; 442 + struct bng_re_crsqe *crsqe; 443 + u16 cookie; 444 + int rc; 445 + u8 opcode; 446 + 447 + opcode = __get_cmdq_base_opcode(msg->req, msg->req_sz); 448 + 449 + rc = __send_message_basic_sanity(rcfw, msg, opcode); 450 + if (rc) 451 + return rc == -ENXIO ? bng_re_map_rc(opcode) : rc; 452 + 453 + rc = __send_message(rcfw, msg, opcode); 454 + if (rc) 455 + return rc; 456 + 457 + cookie = le16_to_cpu(__get_cmdq_base_cookie(msg->req, msg->req_sz)) 458 + & BNG_FW_MAX_COOKIE_VALUE; 459 + 460 + rc = __wait_for_resp(rcfw, cookie); 461 + 462 + if (rc) { 463 + spin_lock_bh(&rcfw->cmdq.hwq.lock); 464 + crsqe = &rcfw->crsqe_tbl[cookie]; 465 + crsqe->is_waiter_alive = false; 466 + if (rc == -ENODEV) 467 + set_bit(FIRMWARE_STALL_DETECTED, &rcfw->cmdq.flags); 468 + spin_unlock_bh(&rcfw->cmdq.hwq.lock); 469 + return -ETIMEDOUT; 470 + } 471 + 472 + if (evnt->status) { 473 + /* failed with status */ 474 + dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x status %#x\n", 475 + cookie, opcode, evnt->status); 476 + rc = -EIO; 477 + } 478 + 479 + return rc; 480 + } 481 + 482 + static int bng_re_map_cmdq_mbox(struct bng_re_rcfw *rcfw) 483 + { 484 + struct bng_re_cmdq_mbox *mbox; 485 + resource_size_t bar_reg; 486 + struct pci_dev *pdev; 487 + 488 + pdev = rcfw->pdev; 489 + mbox = &rcfw->cmdq.cmdq_mbox; 490 + 491 + mbox->reg.bar_id = BNG_FW_COMM_PCI_BAR_REGION; 492 + mbox->reg.len = BNG_FW_COMM_SIZE; 493 + mbox->reg.bar_base = pci_resource_start(pdev, mbox->reg.bar_id); 494 + if (!mbox->reg.bar_base) { 495 + dev_err(&pdev->dev, 496 + "CMDQ BAR region %d resc start is 0!\n", 497 + mbox->reg.bar_id); 498 + return -ENOMEM; 499 + } 500 + 501 + bar_reg = mbox->reg.bar_base + BNG_FW_COMM_BASE_OFFSET; 502 + mbox->reg.len = BNG_FW_COMM_SIZE; 503 + mbox->reg.bar_reg = ioremap(bar_reg, mbox->reg.len); 504 + if (!mbox->reg.bar_reg) { 505 + dev_err(&pdev->dev, 506 + "CMDQ BAR region %d mapping failed\n", 507 + mbox->reg.bar_id); 508 + return -ENOMEM; 509 + } 510 + 511 + mbox->prod = (void __iomem *)(mbox->reg.bar_reg + 512 + BNG_FW_PF_VF_COMM_PROD_OFFSET); 513 + mbox->db = (void __iomem *)(mbox->reg.bar_reg + BNG_FW_COMM_TRIG_OFFSET); 514 + return 0; 515 + } 516 + 517 + static irqreturn_t bng_re_creq_irq(int irq, void *dev_instance) 518 + { 519 + struct bng_re_rcfw *rcfw = dev_instance; 520 + struct bng_re_creq_ctx *creq; 521 + struct bng_re_hwq *hwq; 522 + u32 sw_cons; 523 + 524 + creq = &rcfw->creq; 525 + hwq = &creq->hwq; 526 + /* Prefetch the CREQ element */ 527 + sw_cons = HWQ_CMP(hwq->cons, hwq); 528 + bng_re_get_qe(hwq, sw_cons, NULL); 529 + 530 + tasklet_schedule(&creq->creq_tasklet); 531 + return IRQ_HANDLED; 532 + } 533 + 534 + int bng_re_rcfw_start_irq(struct bng_re_rcfw *rcfw, int msix_vector, 535 + bool need_init) 536 + { 537 + struct bng_re_creq_ctx *creq; 538 + struct bng_re_res *res; 539 + int rc; 540 + 541 + creq = &rcfw->creq; 542 + res = rcfw->res; 543 + 544 + if (creq->irq_handler_avail) 545 + return -EFAULT; 546 + 547 + creq->msix_vec = msix_vector; 548 + if (need_init) 549 + tasklet_setup(&creq->creq_tasklet, bng_re_service_creq); 550 + else 551 + tasklet_enable(&creq->creq_tasklet); 552 + 553 + creq->irq_name = kasprintf(GFP_KERNEL, "bng_re-creq@pci:%s", 554 + pci_name(res->pdev)); 555 + if (!creq->irq_name) 556 + return -ENOMEM; 557 + rc = request_irq(creq->msix_vec, bng_re_creq_irq, 0, 558 + creq->irq_name, rcfw); 559 + if (rc) { 560 + kfree(creq->irq_name); 561 + creq->irq_name = NULL; 562 + tasklet_disable(&creq->creq_tasklet); 563 + return rc; 564 + } 565 + creq->irq_handler_avail = true; 566 + 567 + bng_re_ring_nq_db(&creq->creq_db.dbinfo, res->cctx, true); 568 + atomic_inc(&rcfw->rcfw_intr_enabled); 569 + 570 + return 0; 571 + } 572 + 573 + static int bng_re_map_creq_db(struct bng_re_rcfw *rcfw, u32 reg_offt) 574 + { 575 + struct bng_re_creq_db *creq_db; 576 + resource_size_t bar_reg; 577 + struct pci_dev *pdev; 578 + 579 + pdev = rcfw->pdev; 580 + creq_db = &rcfw->creq.creq_db; 581 + 582 + creq_db->dbinfo.flags = 0; 583 + creq_db->reg.bar_id = BNG_FW_COMM_CONS_PCI_BAR_REGION; 584 + creq_db->reg.bar_base = pci_resource_start(pdev, creq_db->reg.bar_id); 585 + if (!creq_db->reg.bar_id) 586 + dev_err(&pdev->dev, 587 + "CREQ BAR region %d resc start is 0!", 588 + creq_db->reg.bar_id); 589 + 590 + bar_reg = creq_db->reg.bar_base + reg_offt; 591 + 592 + creq_db->reg.len = BNG_FW_CREQ_DB_LEN; 593 + creq_db->reg.bar_reg = ioremap(bar_reg, creq_db->reg.len); 594 + if (!creq_db->reg.bar_reg) { 595 + dev_err(&pdev->dev, 596 + "CREQ BAR region %d mapping failed", 597 + creq_db->reg.bar_id); 598 + return -ENOMEM; 599 + } 600 + creq_db->dbinfo.db = creq_db->reg.bar_reg; 601 + creq_db->dbinfo.hwq = &rcfw->creq.hwq; 602 + creq_db->dbinfo.xid = rcfw->creq.ring_id; 603 + return 0; 604 + } 605 + 606 + void bng_re_rcfw_stop_irq(struct bng_re_rcfw *rcfw, bool kill) 607 + { 608 + struct bng_re_creq_ctx *creq; 609 + 610 + creq = &rcfw->creq; 611 + 612 + if (!creq->irq_handler_avail) 613 + return; 614 + 615 + creq->irq_handler_avail = false; 616 + /* Mask h/w interrupts */ 617 + bng_re_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, false); 618 + /* Sync with last running IRQ-handler */ 619 + synchronize_irq(creq->msix_vec); 620 + free_irq(creq->msix_vec, rcfw); 621 + kfree(creq->irq_name); 622 + creq->irq_name = NULL; 623 + atomic_set(&rcfw->rcfw_intr_enabled, 0); 624 + if (kill) 625 + tasklet_kill(&creq->creq_tasklet); 626 + tasklet_disable(&creq->creq_tasklet); 627 + } 628 + 629 + void bng_re_disable_rcfw_channel(struct bng_re_rcfw *rcfw) 630 + { 631 + struct bng_re_creq_ctx *creq; 632 + struct bng_re_cmdq_ctx *cmdq; 633 + 634 + creq = &rcfw->creq; 635 + cmdq = &rcfw->cmdq; 636 + /* Make sure the HW channel is stopped! */ 637 + bng_re_rcfw_stop_irq(rcfw, true); 638 + 639 + iounmap(cmdq->cmdq_mbox.reg.bar_reg); 640 + iounmap(creq->creq_db.reg.bar_reg); 641 + 642 + cmdq->cmdq_mbox.reg.bar_reg = NULL; 643 + creq->creq_db.reg.bar_reg = NULL; 644 + creq->msix_vec = 0; 645 + } 646 + 647 + static void bng_re_start_rcfw(struct bng_re_rcfw *rcfw) 648 + { 649 + struct bng_re_cmdq_ctx *cmdq; 650 + struct bng_re_creq_ctx *creq; 651 + struct bng_re_cmdq_mbox *mbox; 652 + struct cmdq_init init = {0}; 653 + 654 + cmdq = &rcfw->cmdq; 655 + creq = &rcfw->creq; 656 + mbox = &cmdq->cmdq_mbox; 657 + 658 + init.cmdq_pbl = cpu_to_le64(cmdq->hwq.pbl[BNG_PBL_LVL_0].pg_map_arr[0]); 659 + init.cmdq_size_cmdq_lvl = 660 + cpu_to_le16(((rcfw->cmdq_depth << 661 + CMDQ_INIT_CMDQ_SIZE_SFT) & 662 + CMDQ_INIT_CMDQ_SIZE_MASK) | 663 + ((cmdq->hwq.level << 664 + CMDQ_INIT_CMDQ_LVL_SFT) & 665 + CMDQ_INIT_CMDQ_LVL_MASK)); 666 + init.creq_ring_id = cpu_to_le16(creq->ring_id); 667 + /* Write to the mailbox register */ 668 + __iowrite32_copy(mbox->reg.bar_reg, &init, sizeof(init) / 4); 669 + } 670 + 671 + int bng_re_enable_fw_channel(struct bng_re_rcfw *rcfw, 672 + int msix_vector, 673 + int cp_bar_reg_off) 674 + { 675 + struct bng_re_cmdq_ctx *cmdq; 676 + int rc; 677 + 678 + cmdq = &rcfw->cmdq; 679 + 680 + /* Assign defaults */ 681 + cmdq->seq_num = 0; 682 + set_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags); 683 + init_waitqueue_head(&cmdq->waitq); 684 + 685 + rc = bng_re_map_cmdq_mbox(rcfw); 686 + if (rc) 687 + return rc; 688 + 689 + rc = bng_re_map_creq_db(rcfw, cp_bar_reg_off); 690 + if (rc) 691 + return rc; 692 + 693 + rc = bng_re_rcfw_start_irq(rcfw, msix_vector, true); 694 + if (rc) { 695 + dev_err(&rcfw->pdev->dev, 696 + "Failed to request IRQ for CREQ rc = 0x%x\n", rc); 697 + bng_re_disable_rcfw_channel(rcfw); 698 + return rc; 699 + } 700 + 701 + bng_re_start_rcfw(rcfw); 702 + return 0; 703 + } 704 + 705 + int bng_re_deinit_rcfw(struct bng_re_rcfw *rcfw) 706 + { 707 + struct creq_deinitialize_fw_resp resp = {}; 708 + struct cmdq_deinitialize_fw req = {}; 709 + struct bng_re_cmdqmsg msg = {}; 710 + int rc; 711 + 712 + bng_re_rcfw_cmd_prep((struct cmdq_base *)&req, 713 + CMDQ_BASE_OPCODE_DEINITIALIZE_FW, 714 + sizeof(req)); 715 + bng_re_fill_cmdqmsg(&msg, &req, &resp, NULL, 716 + sizeof(req), sizeof(resp), 0); 717 + rc = bng_re_rcfw_send_message(rcfw, &msg); 718 + if (rc) 719 + return rc; 720 + 721 + clear_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->cmdq.flags); 722 + return 0; 723 + } 724 + static inline bool _is_hw_retx_supported(u16 dev_cap_flags) 725 + { 726 + return dev_cap_flags & 727 + (CREQ_QUERY_FUNC_RESP_SB_HW_REQUESTER_RETX_ENABLED | 728 + CREQ_QUERY_FUNC_RESP_SB_HW_RESPONDER_RETX_ENABLED); 729 + } 730 + 731 + #define BNG_RE_HW_RETX(a) _is_hw_retx_supported((a)) 732 + static inline bool _is_optimize_modify_qp_supported(u16 dev_cap_ext_flags2) 733 + { 734 + return dev_cap_ext_flags2 & 735 + CREQ_QUERY_FUNC_RESP_SB_OPTIMIZE_MODIFY_QP_SUPPORTED; 736 + } 737 + 738 + int bng_re_init_rcfw(struct bng_re_rcfw *rcfw, 739 + struct bng_re_stats *stats_ctx) 740 + { 741 + struct creq_initialize_fw_resp resp = {}; 742 + struct cmdq_initialize_fw req = {}; 743 + struct bng_re_cmdqmsg msg = {}; 744 + int rc; 745 + u16 flags = 0; 746 + 747 + bng_re_rcfw_cmd_prep((struct cmdq_base *)&req, 748 + CMDQ_BASE_OPCODE_INITIALIZE_FW, 749 + sizeof(req)); 750 + /* Supply (log-base-2-of-host-page-size - base-page-shift) 751 + * to bono to adjust the doorbell page sizes. 752 + */ 753 + req.log2_dbr_pg_size = cpu_to_le16(PAGE_SHIFT - 754 + BNG_FW_DBR_BASE_PAGE_SHIFT); 755 + if (BNG_RE_HW_RETX(rcfw->res->dattr->dev_cap_flags)) 756 + flags |= CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED; 757 + if (_is_optimize_modify_qp_supported(rcfw->res->dattr->dev_cap_flags2)) 758 + flags |= CMDQ_INITIALIZE_FW_FLAGS_OPTIMIZE_MODIFY_QP_SUPPORTED; 759 + req.flags |= cpu_to_le16(flags); 760 + req.stat_ctx_id = cpu_to_le32(stats_ctx->fw_id); 761 + bng_re_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0); 762 + rc = bng_re_rcfw_send_message(rcfw, &msg); 763 + if (rc) 764 + return rc; 765 + set_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->cmdq.flags); 766 + return 0; 767 + }
+211
drivers/infiniband/hw/bng_re/bng_fw.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + // Copyright (c) 2025 Broadcom. 3 + 4 + #ifndef __BNG_FW_H__ 5 + #define __BNG_FW_H__ 6 + 7 + #include "bng_tlv.h" 8 + 9 + /* FW DB related */ 10 + #define BNG_FW_CMDQ_TRIG_VAL 1 11 + #define BNG_FW_COMM_PCI_BAR_REGION 0 12 + #define BNG_FW_COMM_CONS_PCI_BAR_REGION 2 13 + #define BNG_FW_DBR_BASE_PAGE_SHIFT 12 14 + #define BNG_FW_COMM_SIZE 0x104 15 + #define BNG_FW_COMM_BASE_OFFSET 0x600 16 + #define BNG_FW_COMM_TRIG_OFFSET 0x100 17 + #define BNG_FW_PF_VF_COMM_PROD_OFFSET 0xc 18 + #define BNG_FW_CREQ_DB_LEN 8 19 + 20 + /* CREQ */ 21 + #define BNG_FW_CREQE_MAX_CNT (64 * 1024) 22 + #define BNG_FW_CREQE_UNITS 16 23 + #define BNG_FW_CREQ_ENTRY_POLL_BUDGET 0x100 24 + #define BNG_FW_CREQ_CMP_VALID(hdr, pass) \ 25 + (!!((hdr)->v & CREQ_BASE_V) == \ 26 + !((pass) & BNG_RE_FLAG_EPOCH_CONS_MASK)) 27 + #define BNG_FW_CREQ_ENTRY_POLL_BUDGET 0x100 28 + 29 + /* CMDQ */ 30 + struct bng_fw_cmdqe { 31 + u8 data[16]; 32 + }; 33 + 34 + #define BNG_FW_CMDQE_MAX_CNT 8192 35 + #define BNG_FW_CMDQE_UNITS sizeof(struct bng_fw_cmdqe) 36 + #define BNG_FW_CMDQE_BYTES(depth) ((depth) * BNG_FW_CMDQE_UNITS) 37 + 38 + #define BNG_FW_MAX_COOKIE_VALUE (BNG_FW_CMDQE_MAX_CNT - 1) 39 + #define BNG_FW_CMD_IS_BLOCKING 0x8000 40 + 41 + /* Crsq buf is 1024-Byte */ 42 + struct bng_re_crsbe { 43 + u8 data[1024]; 44 + }; 45 + 46 + 47 + static inline u32 bng_fw_cmdqe_npages(u32 depth) 48 + { 49 + u32 npages; 50 + 51 + npages = BNG_FW_CMDQE_BYTES(depth) / PAGE_SIZE; 52 + if (BNG_FW_CMDQE_BYTES(depth) % PAGE_SIZE) 53 + npages++; 54 + return npages; 55 + } 56 + 57 + static inline u32 bng_fw_cmdqe_page_size(u32 depth) 58 + { 59 + return (bng_fw_cmdqe_npages(depth) * PAGE_SIZE); 60 + } 61 + struct bng_re_cmdq_mbox { 62 + struct bng_re_reg_desc reg; 63 + void __iomem *prod; 64 + void __iomem *db; 65 + }; 66 + 67 + /* HWQ */ 68 + struct bng_re_cmdq_ctx { 69 + struct bng_re_hwq hwq; 70 + struct bng_re_cmdq_mbox cmdq_mbox; 71 + unsigned long flags; 72 + #define FIRMWARE_INITIALIZED_FLAG (0) 73 + #define FIRMWARE_STALL_DETECTED (3) 74 + #define FIRMWARE_FIRST_FLAG (31) 75 + wait_queue_head_t waitq; 76 + u32 seq_num; 77 + }; 78 + 79 + struct bng_re_creq_db { 80 + struct bng_re_reg_desc reg; 81 + struct bng_re_db_info dbinfo; 82 + }; 83 + 84 + struct bng_re_creq_stat { 85 + u64 creq_qp_event_processed; 86 + u64 creq_func_event_processed; 87 + }; 88 + 89 + struct bng_re_creq_ctx { 90 + struct bng_re_hwq hwq; 91 + struct bng_re_creq_db creq_db; 92 + struct bng_re_creq_stat stats; 93 + struct tasklet_struct creq_tasklet; 94 + u16 ring_id; 95 + int msix_vec; 96 + bool irq_handler_avail; 97 + char *irq_name; 98 + }; 99 + 100 + struct bng_re_crsqe { 101 + struct creq_qp_event *resp; 102 + u32 req_size; 103 + /* Free slots at the time of submission */ 104 + u32 free_slots; 105 + u8 opcode; 106 + bool is_waiter_alive; 107 + bool is_in_used; 108 + }; 109 + 110 + struct bng_re_rcfw_sbuf { 111 + void *sb; 112 + dma_addr_t dma_addr; 113 + u32 size; 114 + }; 115 + 116 + /* RoCE FW Communication Channels */ 117 + struct bng_re_rcfw { 118 + struct pci_dev *pdev; 119 + struct bng_re_res *res; 120 + struct bng_re_cmdq_ctx cmdq; 121 + struct bng_re_creq_ctx creq; 122 + struct bng_re_crsqe *crsqe_tbl; 123 + /* To synchronize the qp-handle hash table */ 124 + spinlock_t tbl_lock; 125 + u32 cmdq_depth; 126 + /* cached from chip cctx for quick reference in slow path */ 127 + u16 max_timeout; 128 + atomic_t rcfw_intr_enabled; 129 + }; 130 + 131 + struct bng_re_cmdqmsg { 132 + struct cmdq_base *req; 133 + struct creq_base *resp; 134 + void *sb; 135 + u32 req_sz; 136 + u32 res_sz; 137 + u8 block; 138 + }; 139 + 140 + static inline void bng_re_rcfw_cmd_prep(struct cmdq_base *req, 141 + u8 opcode, u8 cmd_size) 142 + { 143 + req->opcode = opcode; 144 + req->cmd_size = cmd_size; 145 + } 146 + 147 + static inline void bng_re_fill_cmdqmsg(struct bng_re_cmdqmsg *msg, 148 + void *req, void *resp, void *sb, 149 + u32 req_sz, u32 res_sz, u8 block) 150 + { 151 + msg->req = req; 152 + msg->resp = resp; 153 + msg->sb = sb; 154 + msg->req_sz = req_sz; 155 + msg->res_sz = res_sz; 156 + msg->block = block; 157 + } 158 + 159 + /* Get the number of command units required for the req. The 160 + * function returns correct value only if called before 161 + * setting using bng_re_set_cmd_slots 162 + */ 163 + static inline u32 bng_re_get_cmd_slots(struct cmdq_base *req) 164 + { 165 + u32 cmd_units = 0; 166 + 167 + if (HAS_TLV_HEADER(req)) { 168 + struct roce_tlv *tlv_req = (struct roce_tlv *)req; 169 + 170 + cmd_units = tlv_req->total_size; 171 + } else { 172 + cmd_units = (req->cmd_size + BNG_FW_CMDQE_UNITS - 1) / 173 + BNG_FW_CMDQE_UNITS; 174 + } 175 + 176 + return cmd_units; 177 + } 178 + 179 + static inline u32 bng_re_set_cmd_slots(struct cmdq_base *req) 180 + { 181 + u32 cmd_byte = 0; 182 + 183 + if (HAS_TLV_HEADER(req)) { 184 + struct roce_tlv *tlv_req = (struct roce_tlv *)req; 185 + 186 + cmd_byte = tlv_req->total_size * BNG_FW_CMDQE_UNITS; 187 + } else { 188 + cmd_byte = req->cmd_size; 189 + req->cmd_size = (req->cmd_size + BNG_FW_CMDQE_UNITS - 1) / 190 + BNG_FW_CMDQE_UNITS; 191 + } 192 + 193 + return cmd_byte; 194 + } 195 + 196 + void bng_re_free_rcfw_channel(struct bng_re_rcfw *rcfw); 197 + int bng_re_alloc_fw_channel(struct bng_re_res *res, 198 + struct bng_re_rcfw *rcfw); 199 + int bng_re_enable_fw_channel(struct bng_re_rcfw *rcfw, 200 + int msix_vector, 201 + int cp_bar_reg_off); 202 + void bng_re_disable_rcfw_channel(struct bng_re_rcfw *rcfw); 203 + int bng_re_rcfw_start_irq(struct bng_re_rcfw *rcfw, int msix_vector, 204 + bool need_init); 205 + void bng_re_rcfw_stop_irq(struct bng_re_rcfw *rcfw, bool kill); 206 + int bng_re_rcfw_send_message(struct bng_re_rcfw *rcfw, 207 + struct bng_re_cmdqmsg *msg); 208 + int bng_re_init_rcfw(struct bng_re_rcfw *rcfw, 209 + struct bng_re_stats *stats_ctx); 210 + int bng_re_deinit_rcfw(struct bng_re_rcfw *rcfw); 211 + #endif
+85
drivers/infiniband/hw/bng_re/bng_re.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + // Copyright (c) 2025 Broadcom. 3 + 4 + #ifndef __BNG_RE_H__ 5 + #define __BNG_RE_H__ 6 + 7 + #include "bng_res.h" 8 + 9 + #define BNG_RE_ADEV_NAME "bng_en" 10 + 11 + #define BNG_RE_DESC "Broadcom 800G RoCE Driver" 12 + 13 + #define rdev_to_dev(rdev) ((rdev) ? (&(rdev)->ibdev.dev) : NULL) 14 + 15 + #define BNG_RE_MIN_MSIX 2 16 + #define BNG_RE_MAX_MSIX BNGE_MAX_ROCE_MSIX 17 + 18 + #define BNG_RE_CREQ_NQ_IDX 0 19 + 20 + #define BNGE_INVALID_STATS_CTX_ID -1 21 + /* NQ specific structures */ 22 + struct bng_re_nq_db { 23 + struct bng_re_reg_desc reg; 24 + struct bng_re_db_info dbinfo; 25 + }; 26 + 27 + struct bng_re_nq { 28 + struct pci_dev *pdev; 29 + struct bng_re_res *res; 30 + char *name; 31 + struct bng_re_hwq hwq; 32 + struct bng_re_nq_db nq_db; 33 + u16 ring_id; 34 + int msix_vec; 35 + cpumask_t mask; 36 + struct tasklet_struct nq_tasklet; 37 + bool requested; 38 + int budget; 39 + u32 load; 40 + 41 + struct workqueue_struct *cqn_wq; 42 + }; 43 + 44 + struct bng_re_nq_record { 45 + struct bnge_msix_info msix_entries[BNG_RE_MAX_MSIX]; 46 + struct bng_re_nq nq[BNG_RE_MAX_MSIX]; 47 + int num_msix; 48 + /* serialize NQ access */ 49 + struct mutex load_lock; 50 + }; 51 + 52 + struct bng_re_en_dev_info { 53 + struct bng_re_dev *rdev; 54 + struct bnge_auxr_dev *auxr_dev; 55 + }; 56 + 57 + struct bng_re_ring_attr { 58 + dma_addr_t *dma_arr; 59 + int pages; 60 + int type; 61 + u32 depth; 62 + u32 lrid; /* Logical ring id */ 63 + u8 mode; 64 + }; 65 + 66 + struct bng_re_dev { 67 + struct ib_device ibdev; 68 + unsigned long flags; 69 + #define BNG_RE_FLAG_NETDEV_REGISTERED 0 70 + #define BNG_RE_FLAG_RCFW_CHANNEL_EN 1 71 + struct net_device *netdev; 72 + struct auxiliary_device *adev; 73 + struct bnge_auxr_dev *aux_dev; 74 + struct bng_re_chip_ctx *chip_ctx; 75 + int fn_id; 76 + struct bng_re_res bng_res; 77 + struct bng_re_rcfw rcfw; 78 + struct bng_re_nq_record *nqr; 79 + /* Device Resources */ 80 + struct bng_re_dev_attr *dev_attr; 81 + struct dentry *dbg_root; 82 + struct bng_re_stats stats_ctx; 83 + }; 84 + 85 + #endif
+279
drivers/infiniband/hw/bng_re/bng_res.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (c) 2025 Broadcom. 3 + 4 + #include <linux/pci.h> 5 + #include <linux/vmalloc.h> 6 + #include <rdma/ib_umem.h> 7 + 8 + #include <linux/bnxt/hsi.h> 9 + #include "bng_res.h" 10 + #include "roce_hsi.h" 11 + 12 + /* Stats */ 13 + void bng_re_free_stats_ctx_mem(struct pci_dev *pdev, 14 + struct bng_re_stats *stats) 15 + { 16 + if (stats->dma) { 17 + dma_free_coherent(&pdev->dev, stats->size, 18 + stats->dma, stats->dma_map); 19 + } 20 + memset(stats, 0, sizeof(*stats)); 21 + stats->fw_id = -1; 22 + } 23 + 24 + int bng_re_alloc_stats_ctx_mem(struct pci_dev *pdev, 25 + struct bng_re_chip_ctx *cctx, 26 + struct bng_re_stats *stats) 27 + { 28 + memset(stats, 0, sizeof(*stats)); 29 + stats->fw_id = -1; 30 + stats->size = cctx->hw_stats_size; 31 + stats->dma = dma_alloc_coherent(&pdev->dev, stats->size, 32 + &stats->dma_map, GFP_KERNEL); 33 + if (!stats->dma) 34 + return -ENOMEM; 35 + 36 + return 0; 37 + } 38 + 39 + static void bng_free_pbl(struct bng_re_res *res, struct bng_re_pbl *pbl) 40 + { 41 + struct pci_dev *pdev = res->pdev; 42 + int i; 43 + 44 + for (i = 0; i < pbl->pg_count; i++) { 45 + if (pbl->pg_arr[i]) 46 + dma_free_coherent(&pdev->dev, pbl->pg_size, 47 + (void *)((unsigned long) 48 + pbl->pg_arr[i] & 49 + PAGE_MASK), 50 + pbl->pg_map_arr[i]); 51 + else 52 + dev_warn(&pdev->dev, 53 + "PBL free pg_arr[%d] empty?!\n", i); 54 + pbl->pg_arr[i] = NULL; 55 + } 56 + 57 + vfree(pbl->pg_arr); 58 + pbl->pg_arr = NULL; 59 + vfree(pbl->pg_map_arr); 60 + pbl->pg_map_arr = NULL; 61 + pbl->pg_count = 0; 62 + pbl->pg_size = 0; 63 + } 64 + 65 + static int bng_alloc_pbl(struct bng_re_res *res, 66 + struct bng_re_pbl *pbl, 67 + struct bng_re_sg_info *sginfo) 68 + { 69 + struct pci_dev *pdev = res->pdev; 70 + u32 pages; 71 + int i; 72 + 73 + if (sginfo->nopte) 74 + return 0; 75 + pages = sginfo->npages; 76 + 77 + /* page ptr arrays */ 78 + pbl->pg_arr = vmalloc_array(pages, sizeof(void *)); 79 + if (!pbl->pg_arr) 80 + return -ENOMEM; 81 + 82 + pbl->pg_map_arr = vmalloc_array(pages, sizeof(dma_addr_t)); 83 + if (!pbl->pg_map_arr) { 84 + vfree(pbl->pg_arr); 85 + pbl->pg_arr = NULL; 86 + return -ENOMEM; 87 + } 88 + pbl->pg_count = 0; 89 + pbl->pg_size = sginfo->pgsize; 90 + 91 + for (i = 0; i < pages; i++) { 92 + pbl->pg_arr[i] = dma_alloc_coherent(&pdev->dev, 93 + pbl->pg_size, 94 + &pbl->pg_map_arr[i], 95 + GFP_KERNEL); 96 + if (!pbl->pg_arr[i]) 97 + goto fail; 98 + pbl->pg_count++; 99 + } 100 + 101 + return 0; 102 + fail: 103 + bng_free_pbl(res, pbl); 104 + return -ENOMEM; 105 + } 106 + 107 + void bng_re_free_hwq(struct bng_re_res *res, 108 + struct bng_re_hwq *hwq) 109 + { 110 + int i; 111 + 112 + if (!hwq->max_elements) 113 + return; 114 + if (hwq->level >= BNG_PBL_LVL_MAX) 115 + return; 116 + 117 + for (i = 0; i < hwq->level + 1; i++) 118 + bng_free_pbl(res, &hwq->pbl[i]); 119 + 120 + hwq->level = BNG_PBL_LVL_MAX; 121 + hwq->max_elements = 0; 122 + hwq->element_size = 0; 123 + hwq->prod = 0; 124 + hwq->cons = 0; 125 + } 126 + 127 + /* All HWQs are power of 2 in size */ 128 + int bng_re_alloc_init_hwq(struct bng_re_hwq *hwq, 129 + struct bng_re_hwq_attr *hwq_attr) 130 + { 131 + u32 npages, pg_size; 132 + struct bng_re_sg_info sginfo = {}; 133 + u32 depth, stride, npbl, npde; 134 + dma_addr_t *src_phys_ptr, **dst_virt_ptr; 135 + struct bng_re_res *res; 136 + struct pci_dev *pdev; 137 + int i, rc, lvl; 138 + 139 + res = hwq_attr->res; 140 + pdev = res->pdev; 141 + pg_size = hwq_attr->sginfo->pgsize; 142 + hwq->level = BNG_PBL_LVL_MAX; 143 + 144 + depth = roundup_pow_of_two(hwq_attr->depth); 145 + stride = roundup_pow_of_two(hwq_attr->stride); 146 + 147 + npages = (depth * stride) / pg_size; 148 + if ((depth * stride) % pg_size) 149 + npages++; 150 + if (!npages) 151 + return -EINVAL; 152 + hwq_attr->sginfo->npages = npages; 153 + 154 + if (npages == MAX_PBL_LVL_0_PGS && !hwq_attr->sginfo->nopte) { 155 + /* This request is Level 0, map PTE */ 156 + rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_0], hwq_attr->sginfo); 157 + if (rc) 158 + goto fail; 159 + hwq->level = BNG_PBL_LVL_0; 160 + goto done; 161 + } 162 + 163 + if (npages >= MAX_PBL_LVL_0_PGS) { 164 + if (npages > MAX_PBL_LVL_1_PGS) { 165 + u32 flag = PTU_PTE_VALID; 166 + /* 2 levels of indirection */ 167 + npbl = npages >> MAX_PBL_LVL_1_PGS_SHIFT; 168 + if (npages % BIT(MAX_PBL_LVL_1_PGS_SHIFT)) 169 + npbl++; 170 + npde = npbl >> MAX_PDL_LVL_SHIFT; 171 + if (npbl % BIT(MAX_PDL_LVL_SHIFT)) 172 + npde++; 173 + /* Alloc PDE pages */ 174 + sginfo.pgsize = npde * pg_size; 175 + sginfo.npages = 1; 176 + rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_0], &sginfo); 177 + if (rc) 178 + goto fail; 179 + 180 + /* Alloc PBL pages */ 181 + sginfo.npages = npbl; 182 + sginfo.pgsize = PAGE_SIZE; 183 + rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_1], &sginfo); 184 + if (rc) 185 + goto fail; 186 + /* Fill PDL with PBL page pointers */ 187 + dst_virt_ptr = 188 + (dma_addr_t **)hwq->pbl[BNG_PBL_LVL_0].pg_arr; 189 + src_phys_ptr = hwq->pbl[BNG_PBL_LVL_1].pg_map_arr; 190 + for (i = 0; i < hwq->pbl[BNG_PBL_LVL_1].pg_count; i++) 191 + dst_virt_ptr[0][i] = src_phys_ptr[i] | flag; 192 + 193 + /* Alloc or init PTEs */ 194 + rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_2], 195 + hwq_attr->sginfo); 196 + if (rc) 197 + goto fail; 198 + hwq->level = BNG_PBL_LVL_2; 199 + if (hwq_attr->sginfo->nopte) 200 + goto done; 201 + /* Fill PBLs with PTE pointers */ 202 + dst_virt_ptr = 203 + (dma_addr_t **)hwq->pbl[BNG_PBL_LVL_1].pg_arr; 204 + src_phys_ptr = hwq->pbl[BNG_PBL_LVL_2].pg_map_arr; 205 + for (i = 0; i < hwq->pbl[BNG_PBL_LVL_2].pg_count; i++) { 206 + dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] = 207 + src_phys_ptr[i] | PTU_PTE_VALID; 208 + } 209 + if (hwq_attr->type == BNG_HWQ_TYPE_QUEUE) { 210 + /* Find the last pg of the size */ 211 + i = hwq->pbl[BNG_PBL_LVL_2].pg_count; 212 + dst_virt_ptr[PTR_PG(i - 1)][PTR_IDX(i - 1)] |= 213 + PTU_PTE_LAST; 214 + if (i > 1) 215 + dst_virt_ptr[PTR_PG(i - 2)] 216 + [PTR_IDX(i - 2)] |= 217 + PTU_PTE_NEXT_TO_LAST; 218 + } 219 + } else { /* pages < 512 npbl = 1, npde = 0 */ 220 + u32 flag = PTU_PTE_VALID; 221 + 222 + /* 1 level of indirection */ 223 + npbl = npages >> MAX_PBL_LVL_1_PGS_SHIFT; 224 + if (npages % BIT(MAX_PBL_LVL_1_PGS_SHIFT)) 225 + npbl++; 226 + sginfo.npages = npbl; 227 + sginfo.pgsize = PAGE_SIZE; 228 + /* Alloc PBL page */ 229 + rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_0], &sginfo); 230 + if (rc) 231 + goto fail; 232 + /* Alloc or init PTEs */ 233 + rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_1], 234 + hwq_attr->sginfo); 235 + if (rc) 236 + goto fail; 237 + hwq->level = BNG_PBL_LVL_1; 238 + if (hwq_attr->sginfo->nopte) 239 + goto done; 240 + /* Fill PBL with PTE pointers */ 241 + dst_virt_ptr = 242 + (dma_addr_t **)hwq->pbl[BNG_PBL_LVL_0].pg_arr; 243 + src_phys_ptr = hwq->pbl[BNG_PBL_LVL_1].pg_map_arr; 244 + for (i = 0; i < hwq->pbl[BNG_PBL_LVL_1].pg_count; i++) 245 + dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] = 246 + src_phys_ptr[i] | flag; 247 + if (hwq_attr->type == BNG_HWQ_TYPE_QUEUE) { 248 + /* Find the last pg of the size */ 249 + i = hwq->pbl[BNG_PBL_LVL_1].pg_count; 250 + dst_virt_ptr[PTR_PG(i - 1)][PTR_IDX(i - 1)] |= 251 + PTU_PTE_LAST; 252 + if (i > 1) 253 + dst_virt_ptr[PTR_PG(i - 2)] 254 + [PTR_IDX(i - 2)] |= 255 + PTU_PTE_NEXT_TO_LAST; 256 + } 257 + } 258 + } 259 + done: 260 + hwq->prod = 0; 261 + hwq->cons = 0; 262 + hwq->pdev = pdev; 263 + hwq->depth = hwq_attr->depth; 264 + hwq->max_elements = hwq->depth; 265 + hwq->element_size = stride; 266 + hwq->qe_ppg = pg_size / stride; 267 + /* For direct access to the elements */ 268 + lvl = hwq->level; 269 + if (hwq_attr->sginfo->nopte && hwq->level) 270 + lvl = hwq->level - 1; 271 + hwq->pbl_ptr = hwq->pbl[lvl].pg_arr; 272 + hwq->pbl_dma_ptr = hwq->pbl[lvl].pg_map_arr; 273 + spin_lock_init(&hwq->lock); 274 + 275 + return 0; 276 + fail: 277 + bng_re_free_hwq(res, hwq); 278 + return -ENOMEM; 279 + }
+215
drivers/infiniband/hw/bng_re/bng_res.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + // Copyright (c) 2025 Broadcom. 3 + 4 + #ifndef __BNG_RES_H__ 5 + #define __BNG_RES_H__ 6 + 7 + #include "roce_hsi.h" 8 + 9 + #define BNG_ROCE_FW_MAX_TIMEOUT 60 10 + 11 + #define PTR_CNT_PER_PG (PAGE_SIZE / sizeof(void *)) 12 + #define PTR_MAX_IDX_PER_PG (PTR_CNT_PER_PG - 1) 13 + #define PTR_PG(x) (((x) & ~PTR_MAX_IDX_PER_PG) / PTR_CNT_PER_PG) 14 + #define PTR_IDX(x) ((x) & PTR_MAX_IDX_PER_PG) 15 + 16 + #define HWQ_CMP(idx, hwq) ((idx) & ((hwq)->max_elements - 1)) 17 + #define HWQ_FREE_SLOTS(hwq) (hwq->max_elements - \ 18 + ((HWQ_CMP(hwq->prod, hwq)\ 19 + - HWQ_CMP(hwq->cons, hwq))\ 20 + & (hwq->max_elements - 1))) 21 + 22 + #define MAX_PBL_LVL_0_PGS 1 23 + #define MAX_PBL_LVL_1_PGS 512 24 + #define MAX_PBL_LVL_1_PGS_SHIFT 9 25 + #define MAX_PBL_LVL_1_PGS_FOR_LVL_2 256 26 + #define MAX_PBL_LVL_2_PGS (256 * 512) 27 + #define MAX_PDL_LVL_SHIFT 9 28 + 29 + #define BNG_RE_DBR_VALID (0x1UL << 26) 30 + #define BNG_RE_DBR_EPOCH_SHIFT 24 31 + #define BNG_RE_DBR_TOGGLE_SHIFT 25 32 + 33 + #define BNG_MAX_TQM_ALLOC_REQ 48 34 + 35 + struct bng_re_reg_desc { 36 + u8 bar_id; 37 + resource_size_t bar_base; 38 + unsigned long offset; 39 + void __iomem *bar_reg; 40 + size_t len; 41 + }; 42 + 43 + struct bng_re_db_info { 44 + void __iomem *db; 45 + void __iomem *priv_db; 46 + struct bng_re_hwq *hwq; 47 + u32 xid; 48 + u32 max_slot; 49 + u32 flags; 50 + u8 toggle; 51 + }; 52 + 53 + enum bng_re_db_info_flags_mask { 54 + BNG_RE_FLAG_EPOCH_CONS_SHIFT = 0x0UL, 55 + BNG_RE_FLAG_EPOCH_PROD_SHIFT = 0x1UL, 56 + BNG_RE_FLAG_EPOCH_CONS_MASK = 0x1UL, 57 + BNG_RE_FLAG_EPOCH_PROD_MASK = 0x2UL, 58 + }; 59 + 60 + enum bng_re_db_epoch_flag_shift { 61 + BNG_RE_DB_EPOCH_CONS_SHIFT = BNG_RE_DBR_EPOCH_SHIFT, 62 + BNG_RE_DB_EPOCH_PROD_SHIFT = (BNG_RE_DBR_EPOCH_SHIFT - 1), 63 + }; 64 + 65 + struct bng_re_chip_ctx { 66 + u16 chip_num; 67 + u16 hw_stats_size; 68 + u64 hwrm_intf_ver; 69 + u16 hwrm_cmd_max_timeout; 70 + }; 71 + 72 + struct bng_re_pbl { 73 + u32 pg_count; 74 + u32 pg_size; 75 + void **pg_arr; 76 + dma_addr_t *pg_map_arr; 77 + }; 78 + 79 + enum bng_re_pbl_lvl { 80 + BNG_PBL_LVL_0, 81 + BNG_PBL_LVL_1, 82 + BNG_PBL_LVL_2, 83 + BNG_PBL_LVL_MAX 84 + }; 85 + 86 + enum bng_re_hwq_type { 87 + BNG_HWQ_TYPE_CTX, 88 + BNG_HWQ_TYPE_QUEUE 89 + }; 90 + 91 + struct bng_re_sg_info { 92 + u32 npages; 93 + u32 pgshft; 94 + u32 pgsize; 95 + bool nopte; 96 + }; 97 + 98 + struct bng_re_hwq_attr { 99 + struct bng_re_res *res; 100 + struct bng_re_sg_info *sginfo; 101 + enum bng_re_hwq_type type; 102 + u32 depth; 103 + u32 stride; 104 + u32 aux_stride; 105 + u32 aux_depth; 106 + }; 107 + 108 + struct bng_re_hwq { 109 + struct pci_dev *pdev; 110 + /* lock to protect hwq */ 111 + spinlock_t lock; 112 + struct bng_re_pbl pbl[BNG_PBL_LVL_MAX + 1]; 113 + /* Valid values: 0, 1, 2 */ 114 + enum bng_re_pbl_lvl level; 115 + /* PBL entries */ 116 + void **pbl_ptr; 117 + /* PBL dma_addr */ 118 + dma_addr_t *pbl_dma_ptr; 119 + u32 max_elements; 120 + u32 depth; 121 + u16 element_size; 122 + u32 prod; 123 + u32 cons; 124 + /* queue entry per page */ 125 + u16 qe_ppg; 126 + }; 127 + 128 + struct bng_re_stats { 129 + dma_addr_t dma_map; 130 + void *dma; 131 + u32 size; 132 + u32 fw_id; 133 + }; 134 + 135 + struct bng_re_res { 136 + struct pci_dev *pdev; 137 + struct bng_re_chip_ctx *cctx; 138 + struct bng_re_dev_attr *dattr; 139 + }; 140 + 141 + static inline void *bng_re_get_qe(struct bng_re_hwq *hwq, 142 + u32 indx, u64 *pg) 143 + { 144 + u32 pg_num, pg_idx; 145 + 146 + pg_num = (indx / hwq->qe_ppg); 147 + pg_idx = (indx % hwq->qe_ppg); 148 + if (pg) 149 + *pg = (u64)&hwq->pbl_ptr[pg_num]; 150 + return (void *)(hwq->pbl_ptr[pg_num] + hwq->element_size * pg_idx); 151 + } 152 + 153 + #define BNG_RE_INIT_DBHDR(xid, type, indx, toggle) \ 154 + (((u64)(((xid) & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | \ 155 + (type) | BNG_RE_DBR_VALID) << 32) | (indx) | \ 156 + (((u32)(toggle)) << (BNG_RE_DBR_TOGGLE_SHIFT))) 157 + 158 + static inline void bng_re_ring_db(struct bng_re_db_info *info, 159 + u32 type) 160 + { 161 + u64 key = 0; 162 + u32 indx; 163 + u8 toggle = 0; 164 + 165 + if (type == DBC_DBC_TYPE_CQ_ARMALL || 166 + type == DBC_DBC_TYPE_CQ_ARMSE) 167 + toggle = info->toggle; 168 + 169 + indx = (info->hwq->cons & DBC_DBC_INDEX_MASK) | 170 + ((info->flags & BNG_RE_FLAG_EPOCH_CONS_MASK) << 171 + BNG_RE_DB_EPOCH_CONS_SHIFT); 172 + 173 + key = BNG_RE_INIT_DBHDR(info->xid, type, indx, toggle); 174 + writeq(key, info->db); 175 + } 176 + 177 + static inline void bng_re_ring_nq_db(struct bng_re_db_info *info, 178 + struct bng_re_chip_ctx *cctx, 179 + bool arm) 180 + { 181 + u32 type; 182 + 183 + type = arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ; 184 + bng_re_ring_db(info, type); 185 + } 186 + 187 + static inline void bng_re_hwq_incr_cons(u32 max_elements, u32 *cons, u32 cnt, 188 + u32 *dbinfo_flags) 189 + { 190 + /* move cons and update toggle/epoch if wrap around */ 191 + *cons += cnt; 192 + if (*cons >= max_elements) { 193 + *cons %= max_elements; 194 + *dbinfo_flags ^= 1UL << BNG_RE_FLAG_EPOCH_CONS_SHIFT; 195 + } 196 + } 197 + 198 + static inline bool _is_max_srq_ext_supported(u16 dev_cap_ext_flags_2) 199 + { 200 + return !!(dev_cap_ext_flags_2 & CREQ_QUERY_FUNC_RESP_SB_MAX_SRQ_EXTENDED); 201 + } 202 + 203 + void bng_re_free_hwq(struct bng_re_res *res, 204 + struct bng_re_hwq *hwq); 205 + 206 + int bng_re_alloc_init_hwq(struct bng_re_hwq *hwq, 207 + struct bng_re_hwq_attr *hwq_attr); 208 + 209 + void bng_re_free_stats_ctx_mem(struct pci_dev *pdev, 210 + struct bng_re_stats *stats); 211 + 212 + int bng_re_alloc_stats_ctx_mem(struct pci_dev *pdev, 213 + struct bng_re_chip_ctx *cctx, 214 + struct bng_re_stats *stats); 215 + #endif
+131
drivers/infiniband/hw/bng_re/bng_sp.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (c) 2025 Broadcom. 3 + #include <linux/interrupt.h> 4 + #include <linux/pci.h> 5 + 6 + #include "bng_res.h" 7 + #include "bng_fw.h" 8 + #include "bng_sp.h" 9 + #include "bng_tlv.h" 10 + 11 + static bool bng_re_is_atomic_cap(struct bng_re_rcfw *rcfw) 12 + { 13 + u16 pcie_ctl2 = 0; 14 + 15 + pcie_capability_read_word(rcfw->pdev, PCI_EXP_DEVCTL2, &pcie_ctl2); 16 + return (pcie_ctl2 & PCI_EXP_DEVCTL2_ATOMIC_REQ); 17 + } 18 + 19 + static void bng_re_query_version(struct bng_re_rcfw *rcfw, 20 + char *fw_ver) 21 + { 22 + struct creq_query_version_resp resp = {}; 23 + struct bng_re_cmdqmsg msg = {}; 24 + struct cmdq_query_version req = {}; 25 + int rc; 26 + 27 + bng_re_rcfw_cmd_prep((struct cmdq_base *)&req, 28 + CMDQ_BASE_OPCODE_QUERY_VERSION, 29 + sizeof(req)); 30 + 31 + bng_re_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0); 32 + rc = bng_re_rcfw_send_message(rcfw, &msg); 33 + if (rc) 34 + return; 35 + fw_ver[0] = resp.fw_maj; 36 + fw_ver[1] = resp.fw_minor; 37 + fw_ver[2] = resp.fw_bld; 38 + fw_ver[3] = resp.fw_rsvd; 39 + } 40 + 41 + int bng_re_get_dev_attr(struct bng_re_rcfw *rcfw) 42 + { 43 + struct bng_re_dev_attr *attr = rcfw->res->dattr; 44 + struct creq_query_func_resp resp = {}; 45 + struct bng_re_cmdqmsg msg = {}; 46 + struct creq_query_func_resp_sb *sb; 47 + struct bng_re_rcfw_sbuf sbuf; 48 + struct cmdq_query_func req = {}; 49 + u8 *tqm_alloc; 50 + int i, rc; 51 + u32 temp; 52 + 53 + bng_re_rcfw_cmd_prep((struct cmdq_base *)&req, 54 + CMDQ_BASE_OPCODE_QUERY_FUNC, 55 + sizeof(req)); 56 + 57 + sbuf.size = ALIGN(sizeof(*sb), BNG_FW_CMDQE_UNITS); 58 + sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size, 59 + &sbuf.dma_addr, GFP_KERNEL); 60 + if (!sbuf.sb) 61 + return -ENOMEM; 62 + sb = sbuf.sb; 63 + req.resp_size = sbuf.size / BNG_FW_CMDQE_UNITS; 64 + bng_re_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req), 65 + sizeof(resp), 0); 66 + rc = bng_re_rcfw_send_message(rcfw, &msg); 67 + if (rc) 68 + goto bail; 69 + /* Extract the context from the side buffer */ 70 + attr->max_qp = le32_to_cpu(sb->max_qp); 71 + /* max_qp value reported by FW doesn't include the QP1 */ 72 + attr->max_qp += 1; 73 + attr->max_qp_rd_atom = 74 + sb->max_qp_rd_atom > BNG_RE_MAX_OUT_RD_ATOM ? 75 + BNG_RE_MAX_OUT_RD_ATOM : sb->max_qp_rd_atom; 76 + attr->max_qp_init_rd_atom = 77 + sb->max_qp_init_rd_atom > BNG_RE_MAX_OUT_RD_ATOM ? 78 + BNG_RE_MAX_OUT_RD_ATOM : sb->max_qp_init_rd_atom; 79 + attr->max_qp_wqes = le16_to_cpu(sb->max_qp_wr) - 1; 80 + 81 + /* Adjust for max_qp_wqes for variable wqe */ 82 + attr->max_qp_wqes = min_t(u32, attr->max_qp_wqes, BNG_VAR_MAX_WQE - 1); 83 + 84 + attr->max_qp_sges = min_t(u32, sb->max_sge_var_wqe, BNG_VAR_MAX_SGE); 85 + attr->max_cq = le32_to_cpu(sb->max_cq); 86 + attr->max_cq_wqes = le32_to_cpu(sb->max_cqe); 87 + attr->max_cq_sges = attr->max_qp_sges; 88 + attr->max_mr = le32_to_cpu(sb->max_mr); 89 + attr->max_mw = le32_to_cpu(sb->max_mw); 90 + 91 + attr->max_mr_size = le64_to_cpu(sb->max_mr_size); 92 + attr->max_pd = 64 * 1024; 93 + attr->max_raw_ethy_qp = le32_to_cpu(sb->max_raw_eth_qp); 94 + attr->max_ah = le32_to_cpu(sb->max_ah); 95 + 96 + attr->max_srq = le16_to_cpu(sb->max_srq); 97 + attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1; 98 + attr->max_srq_sges = sb->max_srq_sge; 99 + attr->max_pkey = 1; 100 + attr->max_inline_data = le32_to_cpu(sb->max_inline_data); 101 + /* 102 + * Read the max gid supported by HW. 103 + * For each entry in HW GID in HW table, we consume 2 104 + * GID entries in the kernel GID table. So max_gid reported 105 + * to stack can be up to twice the value reported by the HW, up to 256 gids. 106 + */ 107 + attr->max_sgid = le32_to_cpu(sb->max_gid); 108 + attr->max_sgid = min_t(u32, BNG_RE_NUM_GIDS_SUPPORTED, 2 * attr->max_sgid); 109 + attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags); 110 + attr->dev_cap_flags2 = le16_to_cpu(sb->dev_cap_ext_flags_2); 111 + 112 + if (_is_max_srq_ext_supported(attr->dev_cap_flags2)) 113 + attr->max_srq += le16_to_cpu(sb->max_srq_ext); 114 + 115 + bng_re_query_version(rcfw, attr->fw_ver); 116 + for (i = 0; i < BNG_MAX_TQM_ALLOC_REQ / 4; i++) { 117 + temp = le32_to_cpu(sb->tqm_alloc_reqs[i]); 118 + tqm_alloc = (u8 *)&temp; 119 + attr->tqm_alloc_reqs[i * 4] = *tqm_alloc; 120 + attr->tqm_alloc_reqs[i * 4 + 1] = *(++tqm_alloc); 121 + attr->tqm_alloc_reqs[i * 4 + 2] = *(++tqm_alloc); 122 + attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc); 123 + } 124 + 125 + attr->max_dpi = le32_to_cpu(sb->max_dpi); 126 + attr->is_atomic = bng_re_is_atomic_cap(rcfw); 127 + bail: 128 + dma_free_coherent(&rcfw->pdev->dev, sbuf.size, 129 + sbuf.sb, sbuf.dma_addr); 130 + return rc; 131 + }
+47
drivers/infiniband/hw/bng_re/bng_sp.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + // Copyright (c) 2025 Broadcom. 3 + 4 + #ifndef __BNG_SP_H__ 5 + #define __BNG_SP_H__ 6 + 7 + #include "bng_fw.h" 8 + 9 + #define BNG_VAR_MAX_WQE 4352 10 + #define BNG_VAR_MAX_SGE 13 11 + 12 + struct bng_re_dev_attr { 13 + #define FW_VER_ARR_LEN 4 14 + u8 fw_ver[FW_VER_ARR_LEN]; 15 + #define BNG_RE_NUM_GIDS_SUPPORTED 256 16 + u16 max_sgid; 17 + u16 max_mrw; 18 + u32 max_qp; 19 + #define BNG_RE_MAX_OUT_RD_ATOM 126 20 + u32 max_qp_rd_atom; 21 + u32 max_qp_init_rd_atom; 22 + u32 max_qp_wqes; 23 + u32 max_qp_sges; 24 + u32 max_cq; 25 + u32 max_cq_wqes; 26 + u32 max_cq_sges; 27 + u32 max_mr; 28 + u64 max_mr_size; 29 + u32 max_pd; 30 + u32 max_mw; 31 + u32 max_raw_ethy_qp; 32 + u32 max_ah; 33 + u32 max_srq; 34 + u32 max_srq_wqes; 35 + u32 max_srq_sges; 36 + u32 max_pkey; 37 + u32 max_inline_data; 38 + u32 l2_db_size; 39 + u8 tqm_alloc_reqs[BNG_MAX_TQM_ALLOC_REQ]; 40 + bool is_atomic; 41 + u16 dev_cap_flags; 42 + u16 dev_cap_flags2; 43 + u32 max_dpi; 44 + }; 45 + 46 + int bng_re_get_dev_attr(struct bng_re_rcfw *rcfw); 47 + #endif
+128
drivers/infiniband/hw/bng_re/bng_tlv.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ 2 + 3 + #ifndef __BNG_TLV_H__ 4 + #define __BNG_TLV_H__ 5 + 6 + #include "roce_hsi.h" 7 + 8 + struct roce_tlv { 9 + struct tlv tlv; 10 + u8 total_size; // in units of 16 byte chunks 11 + u8 unused[7]; // for 16 byte alignment 12 + }; 13 + 14 + /* 15 + * TLV size in units of 16 byte chunks 16 + */ 17 + #define TLV_SIZE ((sizeof(struct roce_tlv) + 15) / 16) 18 + /* 19 + * TLV length in bytes 20 + */ 21 + #define TLV_BYTES (TLV_SIZE * 16) 22 + 23 + #define HAS_TLV_HEADER(msg) (le16_to_cpu(((struct tlv *)(msg))->cmd_discr) == CMD_DISCR_TLV_ENCAP) 24 + #define GET_TLV_DATA(tlv) ((void *)&((uint8_t *)(tlv))[TLV_BYTES]) 25 + 26 + static inline u8 __get_cmdq_base_opcode(struct cmdq_base *req, u32 size) 27 + { 28 + if (HAS_TLV_HEADER(req) && size > TLV_BYTES) 29 + return ((struct cmdq_base *)GET_TLV_DATA(req))->opcode; 30 + else 31 + return req->opcode; 32 + } 33 + 34 + static inline void __set_cmdq_base_opcode(struct cmdq_base *req, 35 + u32 size, u8 val) 36 + { 37 + if (HAS_TLV_HEADER(req) && size > TLV_BYTES) 38 + ((struct cmdq_base *)GET_TLV_DATA(req))->opcode = val; 39 + else 40 + req->opcode = val; 41 + } 42 + 43 + static inline __le16 __get_cmdq_base_cookie(struct cmdq_base *req, u32 size) 44 + { 45 + if (HAS_TLV_HEADER(req) && size > TLV_BYTES) 46 + return ((struct cmdq_base *)GET_TLV_DATA(req))->cookie; 47 + else 48 + return req->cookie; 49 + } 50 + 51 + static inline void __set_cmdq_base_cookie(struct cmdq_base *req, 52 + u32 size, __le16 val) 53 + { 54 + if (HAS_TLV_HEADER(req) && size > TLV_BYTES) 55 + ((struct cmdq_base *)GET_TLV_DATA(req))->cookie = val; 56 + else 57 + req->cookie = val; 58 + } 59 + 60 + static inline __le64 __get_cmdq_base_resp_addr(struct cmdq_base *req, u32 size) 61 + { 62 + if (HAS_TLV_HEADER(req) && size > TLV_BYTES) 63 + return ((struct cmdq_base *)GET_TLV_DATA(req))->resp_addr; 64 + else 65 + return req->resp_addr; 66 + } 67 + 68 + static inline void __set_cmdq_base_resp_addr(struct cmdq_base *req, 69 + u32 size, __le64 val) 70 + { 71 + if (HAS_TLV_HEADER(req) && size > TLV_BYTES) 72 + ((struct cmdq_base *)GET_TLV_DATA(req))->resp_addr = val; 73 + else 74 + req->resp_addr = val; 75 + } 76 + 77 + static inline u8 __get_cmdq_base_resp_size(struct cmdq_base *req, u32 size) 78 + { 79 + if (HAS_TLV_HEADER(req) && size > TLV_BYTES) 80 + return ((struct cmdq_base *)GET_TLV_DATA(req))->resp_size; 81 + else 82 + return req->resp_size; 83 + } 84 + 85 + static inline void __set_cmdq_base_resp_size(struct cmdq_base *req, 86 + u32 size, u8 val) 87 + { 88 + if (HAS_TLV_HEADER(req) && size > TLV_BYTES) 89 + ((struct cmdq_base *)GET_TLV_DATA(req))->resp_size = val; 90 + else 91 + req->resp_size = val; 92 + } 93 + 94 + static inline u8 __get_cmdq_base_cmd_size(struct cmdq_base *req, u32 size) 95 + { 96 + if (HAS_TLV_HEADER(req) && size > TLV_BYTES) 97 + return ((struct roce_tlv *)(req))->total_size; 98 + else 99 + return req->cmd_size; 100 + } 101 + 102 + static inline void __set_cmdq_base_cmd_size(struct cmdq_base *req, 103 + u32 size, u8 val) 104 + { 105 + if (HAS_TLV_HEADER(req) && size > TLV_BYTES) 106 + ((struct cmdq_base *)GET_TLV_DATA(req))->cmd_size = val; 107 + else 108 + req->cmd_size = val; 109 + } 110 + 111 + static inline __le16 __get_cmdq_base_flags(struct cmdq_base *req, u32 size) 112 + { 113 + if (HAS_TLV_HEADER(req) && size > TLV_BYTES) 114 + return ((struct cmdq_base *)GET_TLV_DATA(req))->flags; 115 + else 116 + return req->flags; 117 + } 118 + 119 + static inline void __set_cmdq_base_flags(struct cmdq_base *req, 120 + u32 size, __le16 val) 121 + { 122 + if (HAS_TLV_HEADER(req) && size > TLV_BYTES) 123 + ((struct cmdq_base *)GET_TLV_DATA(req))->flags = val; 124 + else 125 + req->flags = val; 126 + } 127 + 128 + #endif /* __BNG_TLV_H__ */
+2
drivers/infiniband/hw/bnxt_re/bnxt_re.h
··· 224 224 struct workqueue_struct *dcb_wq; 225 225 struct dentry *cc_config; 226 226 struct bnxt_re_dbg_cc_config_params *cc_config_params; 227 + struct dentry *cq_coal_cfg; 228 + struct bnxt_re_dbg_cq_coal_params *cq_coal_cfg_params; 227 229 #define BNXT_VPD_FLD_LEN 32 228 230 char board_partno[BNXT_VPD_FLD_LEN]; 229 231 /* RoCE mirror */
+128
drivers/infiniband/hw/bnxt_re/debugfs.c
··· 23 23 24 24 static struct dentry *bnxt_re_debugfs_root; 25 25 26 + static const char * const bnxt_re_cq_coal_str[] = { 27 + "buf_maxtime", 28 + "normal_maxbuf", 29 + "during_maxbuf", 30 + "en_ring_idle_mode", 31 + "enable", 32 + }; 33 + 26 34 static const char * const bnxt_re_cc_gen0_name[] = { 27 35 "enable_cc", 28 36 "run_avg_weight_g", ··· 357 349 debugfs_create_file("info", 0400, rdev->dbg_root, rdev, &info_fops); 358 350 } 359 351 352 + static ssize_t cq_coal_cfg_write(struct file *file, 353 + const char __user *buf, 354 + size_t count, loff_t *pos) 355 + { 356 + struct seq_file *s = file->private_data; 357 + struct bnxt_re_cq_coal_param *param = s->private; 358 + struct bnxt_re_dev *rdev = param->rdev; 359 + int offset = param->offset; 360 + char lbuf[16] = { }; 361 + u32 val; 362 + 363 + if (count > sizeof(lbuf)) 364 + return -EINVAL; 365 + 366 + if (copy_from_user(lbuf, buf, count)) 367 + return -EFAULT; 368 + 369 + lbuf[sizeof(lbuf) - 1] = '\0'; 370 + 371 + if (kstrtou32(lbuf, 0, &val)) 372 + return -EINVAL; 373 + 374 + switch (offset) { 375 + case BNXT_RE_COAL_CQ_BUF_MAXTIME: 376 + if (val < 1 || val > BNXT_QPLIB_CQ_COAL_MAX_BUF_MAXTIME) 377 + return -EINVAL; 378 + rdev->cq_coalescing.buf_maxtime = val; 379 + break; 380 + case BNXT_RE_COAL_CQ_NORMAL_MAXBUF: 381 + if (val < 1 || val > BNXT_QPLIB_CQ_COAL_MAX_NORMAL_MAXBUF) 382 + return -EINVAL; 383 + rdev->cq_coalescing.normal_maxbuf = val; 384 + break; 385 + case BNXT_RE_COAL_CQ_DURING_MAXBUF: 386 + if (val < 1 || val > BNXT_QPLIB_CQ_COAL_MAX_DURING_MAXBUF) 387 + return -EINVAL; 388 + rdev->cq_coalescing.during_maxbuf = val; 389 + break; 390 + case BNXT_RE_COAL_CQ_EN_RING_IDLE_MODE: 391 + if (val > BNXT_QPLIB_CQ_COAL_MAX_EN_RING_IDLE_MODE) 392 + return -EINVAL; 393 + rdev->cq_coalescing.en_ring_idle_mode = val; 394 + break; 395 + case BNXT_RE_COAL_CQ_ENABLE: 396 + if (val > 1) 397 + return -EINVAL; 398 + rdev->cq_coalescing.enable = val; 399 + break; 400 + default: 401 + return -EINVAL; 402 + } 403 + return count; 404 + } 405 + 406 + static int cq_coal_cfg_show(struct seq_file *s, void *unused) 407 + { 408 + struct bnxt_re_cq_coal_param *param = s->private; 409 + struct bnxt_re_dev *rdev = param->rdev; 410 + int offset = param->offset; 411 + u32 val = 0; 412 + 413 + switch (offset) { 414 + case BNXT_RE_COAL_CQ_BUF_MAXTIME: 415 + val = rdev->cq_coalescing.buf_maxtime; 416 + break; 417 + case BNXT_RE_COAL_CQ_NORMAL_MAXBUF: 418 + val = rdev->cq_coalescing.normal_maxbuf; 419 + break; 420 + case BNXT_RE_COAL_CQ_DURING_MAXBUF: 421 + val = rdev->cq_coalescing.during_maxbuf; 422 + break; 423 + case BNXT_RE_COAL_CQ_EN_RING_IDLE_MODE: 424 + val = rdev->cq_coalescing.en_ring_idle_mode; 425 + break; 426 + case BNXT_RE_COAL_CQ_ENABLE: 427 + val = rdev->cq_coalescing.enable; 428 + break; 429 + default: 430 + return -EINVAL; 431 + } 432 + 433 + seq_printf(s, "%u\n", val); 434 + return 0; 435 + } 436 + DEFINE_SHOW_STORE_ATTRIBUTE(cq_coal_cfg); 437 + 438 + static void bnxt_re_cleanup_cq_coal_debugfs(struct bnxt_re_dev *rdev) 439 + { 440 + debugfs_remove_recursive(rdev->cq_coal_cfg); 441 + kfree(rdev->cq_coal_cfg_params); 442 + } 443 + 444 + static void bnxt_re_init_cq_coal_debugfs(struct bnxt_re_dev *rdev) 445 + { 446 + struct bnxt_re_dbg_cq_coal_params *dbg_cq_coal_params; 447 + int i; 448 + 449 + if (!_is_cq_coalescing_supported(rdev->dev_attr->dev_cap_flags2)) 450 + return; 451 + 452 + dbg_cq_coal_params = kzalloc(sizeof(*dbg_cq_coal_params), GFP_KERNEL); 453 + if (!dbg_cq_coal_params) 454 + return; 455 + 456 + rdev->cq_coal_cfg = debugfs_create_dir("cq_coal_cfg", rdev->dbg_root); 457 + rdev->cq_coal_cfg_params = dbg_cq_coal_params; 458 + 459 + for (i = 0; i < BNXT_RE_COAL_CQ_MAX; i++) { 460 + dbg_cq_coal_params->params[i].offset = i; 461 + dbg_cq_coal_params->params[i].rdev = rdev; 462 + debugfs_create_file(bnxt_re_cq_coal_str[i], 463 + 0600, rdev->cq_coal_cfg, 464 + &dbg_cq_coal_params->params[i], 465 + &cq_coal_cfg_fops); 466 + } 467 + } 468 + 360 469 void bnxt_re_debugfs_add_pdev(struct bnxt_re_dev *rdev) 361 470 { 362 471 struct pci_dev *pdev = rdev->en_dev->pdev; ··· 499 374 rdev->cc_config, tmp_params, 500 375 &bnxt_re_cc_config_ops); 501 376 } 377 + 378 + bnxt_re_init_cq_coal_debugfs(rdev); 502 379 } 503 380 504 381 void bnxt_re_debugfs_rem_pdev(struct bnxt_re_dev *rdev) 505 382 { 383 + bnxt_re_cleanup_cq_coal_debugfs(rdev); 506 384 debugfs_remove_recursive(rdev->qp_debugfs); 507 385 debugfs_remove_recursive(rdev->cc_config); 508 386 kfree(rdev->cc_config_params);
+19
drivers/infiniband/hw/bnxt_re/debugfs.h
··· 33 33 struct bnxt_re_dbg_cc_config_params { 34 34 struct bnxt_re_cc_param gen0_parms[BNXT_RE_CC_PARAM_GEN0]; 35 35 }; 36 + 37 + struct bnxt_re_cq_coal_param { 38 + struct bnxt_re_dev *rdev; 39 + u32 offset; 40 + }; 41 + 42 + enum bnxt_re_cq_coal_types { 43 + BNXT_RE_COAL_CQ_BUF_MAXTIME, 44 + BNXT_RE_COAL_CQ_NORMAL_MAXBUF, 45 + BNXT_RE_COAL_CQ_DURING_MAXBUF, 46 + BNXT_RE_COAL_CQ_EN_RING_IDLE_MODE, 47 + BNXT_RE_COAL_CQ_ENABLE, 48 + BNXT_RE_COAL_CQ_MAX 49 + 50 + }; 51 + 52 + struct bnxt_re_dbg_cq_coal_params { 53 + struct bnxt_re_cq_coal_param params[BNXT_RE_COAL_CQ_MAX]; 54 + }; 36 55 #endif
+5 -3
drivers/infiniband/hw/bnxt_re/ib_verbs.c
··· 601 601 mr->qplib_mr.va = (u64)(unsigned long)fence->va; 602 602 mr->qplib_mr.total_size = BNXT_RE_FENCE_BYTES; 603 603 rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, NULL, 604 - BNXT_RE_FENCE_PBL_SIZE, PAGE_SIZE); 604 + BNXT_RE_FENCE_PBL_SIZE, PAGE_SIZE, 605 + _is_alloc_mr_unified(rdev->dev_attr->dev_cap_flags)); 605 606 if (rc) { 606 607 ibdev_err(&rdev->ibdev, "Failed to register fence-MR\n"); 607 608 goto fail; ··· 4028 4027 mr->qplib_mr.hwq.level = PBL_LVL_MAX; 4029 4028 mr->qplib_mr.total_size = -1; /* Infinte length */ 4030 4029 rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, NULL, 0, 4031 - PAGE_SIZE); 4030 + PAGE_SIZE, false); 4032 4031 if (rc) 4033 4032 goto fail_mr; 4034 4033 ··· 4258 4257 4259 4258 umem_pgs = ib_umem_num_dma_blocks(umem, page_size); 4260 4259 rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, umem, 4261 - umem_pgs, page_size); 4260 + umem_pgs, page_size, 4261 + _is_alloc_mr_unified(rdev->dev_attr->dev_cap_flags)); 4262 4262 if (rc) { 4263 4263 ibdev_err(&rdev->ibdev, "Failed to register user MR - rc = %d\n", rc); 4264 4264 rc = -EIO;
+1
drivers/infiniband/hw/bnxt_re/main.c
··· 1453 1453 atomic_set(&rdev->stats.res.pd_count, 0); 1454 1454 rdev->cosq[0] = 0xFFFF; 1455 1455 rdev->cosq[1] = 0xFFFF; 1456 + rdev->cq_coalescing.enable = 1; 1456 1457 rdev->cq_coalescing.buf_maxtime = BNXT_QPLIB_CQ_COAL_DEF_BUF_MAXTIME; 1457 1458 if (bnxt_re_chip_gen_p7(en_dev->chip_num)) { 1458 1459 rdev->cq_coalescing.normal_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P7;
+2 -1
drivers/infiniband/hw/bnxt_re/qplib_fp.c
··· 2226 2226 req.cq_handle = cpu_to_le64(cq->cq_handle); 2227 2227 req.cq_size = cpu_to_le32(cq->max_wqe); 2228 2228 2229 - if (_is_cq_coalescing_supported(res->dattr->dev_cap_flags2)) { 2229 + if (_is_cq_coalescing_supported(res->dattr->dev_cap_flags2) && 2230 + cq->coalescing->enable) { 2230 2231 req.flags |= cpu_to_le16(CMDQ_CREATE_CQ_FLAGS_COALESCING_VALID); 2231 2232 coalescing |= ((cq->coalescing->buf_maxtime << 2232 2233 CMDQ_CREATE_CQ_BUF_MAXTIME_SFT) &
+1
drivers/infiniband/hw/bnxt_re/qplib_fp.h
··· 395 395 u8 normal_maxbuf; 396 396 u8 during_maxbuf; 397 397 u8 en_ring_idle_mode; 398 + u8 enable; 398 399 }; 399 400 400 401 #define BNXT_QPLIB_CQ_COAL_DEF_BUF_MAXTIME 0x1
+4 -4
drivers/infiniband/hw/bnxt_re/qplib_sp.c
··· 162 162 attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1; 163 163 attr->max_srq_sges = sb->max_srq_sge; 164 164 attr->max_pkey = 1; 165 - attr->max_inline_data = le32_to_cpu(sb->max_inline_data); 165 + attr->max_inline_data = attr->max_qp_sges * sizeof(struct sq_sge); 166 166 if (!bnxt_qplib_is_chip_gen_p7(rcfw->res->cctx)) 167 167 attr->l2_db_size = (sb->l2_db_space_size + 1) * 168 168 (0x01 << RCFW_DBR_BASE_PAGE_SHIFT); ··· 578 578 } 579 579 580 580 int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, 581 - struct ib_umem *umem, int num_pbls, u32 buf_pg_size) 581 + struct ib_umem *umem, int num_pbls, u32 buf_pg_size, bool unified_mr) 582 582 { 583 583 struct bnxt_qplib_rcfw *rcfw = res->rcfw; 584 584 struct bnxt_qplib_hwq_attr hwq_attr = {}; ··· 640 640 req.access = (mr->access_flags & BNXT_QPLIB_MR_ACCESS_MASK); 641 641 req.va = cpu_to_le64(mr->va); 642 642 req.key = cpu_to_le32(mr->lkey); 643 - if (_is_alloc_mr_unified(res->dattr->dev_cap_flags)) 643 + if (unified_mr) 644 644 req.key = cpu_to_le32(mr->pd->id); 645 645 req.flags = cpu_to_le16(mr->flags); 646 646 req.mr_size = cpu_to_le64(mr->total_size); ··· 651 651 if (rc) 652 652 goto fail; 653 653 654 - if (_is_alloc_mr_unified(res->dattr->dev_cap_flags)) { 654 + if (unified_mr) { 655 655 mr->lkey = le32_to_cpu(resp.xid); 656 656 mr->rkey = mr->lkey; 657 657 }
+1 -1
drivers/infiniband/hw/bnxt_re/qplib_sp.h
··· 341 341 int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw, 342 342 bool block); 343 343 int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, 344 - struct ib_umem *umem, int num_pbls, u32 buf_pg_size); 344 + struct ib_umem *umem, int num_pbls, u32 buf_pg_size, bool unified_mr); 345 345 int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr); 346 346 int bnxt_qplib_alloc_fast_reg_mr(struct bnxt_qplib_res *res, 347 347 struct bnxt_qplib_mrw *mr, int max);
+1 -1
drivers/infiniband/hw/cxgb4/mem.c
··· 348 348 { 349 349 int err; 350 350 351 - pr_debug("*pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n", 351 + pr_debug("*pbl_addr 0x%x, pbl_base 0x%x, pbl_size %d\n", 352 352 pbl_addr, rdev->lldi.vr->pbl.start, 353 353 pbl_size); 354 354
+2 -2
drivers/infiniband/hw/hfi1/init.c
··· 745 745 ppd->hfi1_wq = 746 746 alloc_workqueue( 747 747 "hfi%d_%d", 748 - WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE | 749 - WQ_MEM_RECLAIM, 748 + WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | 749 + WQ_PERCPU, 750 750 HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES, 751 751 dd->unit, pidx); 752 752 if (!ppd->hfi1_wq)
+2 -2
drivers/infiniband/hw/hfi1/opfn.c
··· 305 305 int opfn_init(void) 306 306 { 307 307 opfn_wq = alloc_workqueue("hfi_opfn", 308 - WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE | 309 - WQ_MEM_RECLAIM, 308 + WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | 309 + WQ_PERCPU, 310 310 HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES); 311 311 if (!opfn_wq) 312 312 return -ENOMEM;
+3 -1
drivers/infiniband/hw/hns/Makefile
··· 4 4 # 5 5 6 6 ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 7 + ccflags-y += -I $(srctree)/drivers/net/ethernet/hisilicon/hns3/hns3pf 8 + ccflags-y += -I $(srctree)/drivers/net/ethernet/hisilicon/hns3/hns3_common 7 9 ccflags-y += -I $(src) 8 10 9 11 hns-roce-hw-v2-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ 10 12 hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ 11 13 hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o \ 12 - hns_roce_debugfs.o hns_roce_hw_v2.o 14 + hns_roce_debugfs.o hns_roce_hw_v2.o hns_roce_bond.o 13 15 14 16 obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o
-1
drivers/infiniband/hw/hns/hns_roce_ah.c
··· 30 30 * SOFTWARE. 31 31 */ 32 32 33 - #include <linux/pci.h> 34 33 #include <rdma/ib_addr.h> 35 34 #include <rdma/ib_cache.h> 36 35 #include "hns_roce_device.h"
+1012
drivers/infiniband/hw/hns/hns_roce_bond.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + /* 3 + * Copyright (c) 2025 Hisilicon Limited. 4 + */ 5 + 6 + #include <net/lag.h> 7 + #include <net/bonding.h> 8 + #include "hns_roce_device.h" 9 + #include "hns_roce_hw_v2.h" 10 + #include "hns_roce_bond.h" 11 + 12 + static DEFINE_XARRAY(roce_bond_xa); 13 + 14 + static struct hns_roce_dev *hns_roce_get_hrdev_by_netdev(struct net_device *net_dev) 15 + { 16 + struct ib_device *ibdev = 17 + ib_device_get_by_netdev(net_dev, RDMA_DRIVER_HNS); 18 + 19 + if (!ibdev) 20 + return NULL; 21 + 22 + return container_of(ibdev, struct hns_roce_dev, ib_dev); 23 + } 24 + 25 + static struct net_device *get_upper_dev_from_ndev(struct net_device *net_dev) 26 + { 27 + struct net_device *upper_dev; 28 + 29 + rcu_read_lock(); 30 + upper_dev = netdev_master_upper_dev_get_rcu(net_dev); 31 + dev_hold(upper_dev); 32 + rcu_read_unlock(); 33 + 34 + return upper_dev; 35 + } 36 + 37 + static int get_netdev_bond_slave_id(struct net_device *net_dev, 38 + struct hns_roce_bond_group *bond_grp) 39 + { 40 + int i; 41 + 42 + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) 43 + if (net_dev == bond_grp->bond_func_info[i].net_dev) 44 + return i; 45 + 46 + return -ENOENT; 47 + } 48 + 49 + struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev, 50 + u8 bus_num) 51 + { 52 + struct hns_roce_die_info *die_info = xa_load(&roce_bond_xa, bus_num); 53 + struct hns_roce_bond_group *bond_grp; 54 + struct net_device *upper_dev = NULL; 55 + int i; 56 + 57 + if (!die_info) 58 + return NULL; 59 + 60 + for (i = 0; i < ROCE_BOND_NUM_MAX; i++) { 61 + bond_grp = die_info->bgrps[i]; 62 + if (!bond_grp) 63 + continue; 64 + if (get_netdev_bond_slave_id(net_dev, bond_grp) >= 0) 65 + return bond_grp; 66 + if (bond_grp->upper_dev) { 67 + upper_dev = get_upper_dev_from_ndev(net_dev); 68 + if (bond_grp->upper_dev == upper_dev) { 69 + dev_put(upper_dev); 70 + return bond_grp; 71 + } 72 + dev_put(upper_dev); 73 + } 74 + } 75 + 76 + return NULL; 77 + } 78 + 79 + static int hns_roce_set_bond_netdev(struct hns_roce_bond_group *bond_grp, 80 + struct hns_roce_dev *hr_dev) 81 + { 82 + struct net_device *active_dev; 83 + struct net_device *old_dev; 84 + int i, ret = 0; 85 + 86 + if (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { 87 + rcu_read_lock(); 88 + active_dev = 89 + bond_option_active_slave_get_rcu(netdev_priv(bond_grp->upper_dev)); 90 + rcu_read_unlock(); 91 + } else { 92 + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { 93 + active_dev = bond_grp->bond_func_info[i].net_dev; 94 + if (active_dev && 95 + ib_get_curr_port_state(active_dev) == IB_PORT_ACTIVE) 96 + break; 97 + } 98 + } 99 + 100 + if (!active_dev || i == ROCE_BOND_FUNC_MAX) 101 + active_dev = get_hr_netdev(hr_dev, 0); 102 + 103 + old_dev = ib_device_get_netdev(&hr_dev->ib_dev, 1); 104 + if (old_dev == active_dev) 105 + goto out; 106 + 107 + ret = ib_device_set_netdev(&hr_dev->ib_dev, active_dev, 1); 108 + if (ret) { 109 + dev_err(hr_dev->dev, "failed to set netdev for bond.\n"); 110 + goto out; 111 + } 112 + 113 + if (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { 114 + if (old_dev) 115 + roce_del_all_netdev_gids(&hr_dev->ib_dev, 1, old_dev); 116 + rdma_roce_rescan_port(&hr_dev->ib_dev, 1); 117 + } 118 + out: 119 + dev_put(old_dev); 120 + return ret; 121 + } 122 + 123 + bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev) 124 + { 125 + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); 126 + struct hns_roce_bond_group *bond_grp; 127 + u8 bus_num = get_hr_bus_num(hr_dev); 128 + 129 + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); 130 + if (bond_grp && bond_grp->bond_state != HNS_ROCE_BOND_NOT_BONDED && 131 + bond_grp->bond_state != HNS_ROCE_BOND_NOT_ATTACHED) 132 + return true; 133 + 134 + return false; 135 + } 136 + 137 + static void hns_roce_bond_get_active_slave(struct hns_roce_bond_group *bond_grp) 138 + { 139 + struct net_device *net_dev; 140 + u32 active_slave_map = 0; 141 + u8 active_slave_num = 0; 142 + bool active; 143 + u8 i; 144 + 145 + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { 146 + net_dev = bond_grp->bond_func_info[i].net_dev; 147 + if (!net_dev || !(bond_grp->slave_map & (1U << i))) 148 + continue; 149 + 150 + active = (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) ? 151 + net_lag_port_dev_txable(net_dev) : 152 + (ib_get_curr_port_state(net_dev) == IB_PORT_ACTIVE); 153 + if (active) { 154 + active_slave_num++; 155 + active_slave_map |= (1U << i); 156 + } 157 + } 158 + 159 + bond_grp->active_slave_num = active_slave_num; 160 + bond_grp->active_slave_map = active_slave_map; 161 + } 162 + 163 + static int hns_roce_recover_bond(struct hns_roce_bond_group *bond_grp, 164 + struct hns_roce_dev *hr_dev) 165 + { 166 + bond_grp->main_hr_dev = hr_dev; 167 + hns_roce_bond_get_active_slave(bond_grp); 168 + 169 + return hns_roce_cmd_bond(bond_grp, HNS_ROCE_SET_BOND); 170 + } 171 + 172 + static void hns_roce_slave_uninit(struct hns_roce_bond_group *bond_grp, 173 + u8 func_idx) 174 + { 175 + struct hnae3_handle *handle; 176 + 177 + handle = bond_grp->bond_func_info[func_idx].handle; 178 + if (handle->priv) 179 + hns_roce_bond_uninit_client(bond_grp, func_idx); 180 + } 181 + 182 + static struct hns_roce_dev 183 + *hns_roce_slave_init(struct hns_roce_bond_group *bond_grp, 184 + u8 func_idx, bool need_switch); 185 + 186 + static int switch_main_dev(struct hns_roce_bond_group *bond_grp, 187 + u8 main_func_idx) 188 + { 189 + struct hns_roce_dev *hr_dev; 190 + struct net_device *net_dev; 191 + u8 i; 192 + 193 + bond_grp->main_hr_dev = NULL; 194 + hns_roce_bond_uninit_client(bond_grp, main_func_idx); 195 + 196 + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { 197 + net_dev = bond_grp->bond_func_info[i].net_dev; 198 + if ((bond_grp->slave_map & (1U << i)) && net_dev) { 199 + /* In case this slave is still being registered as 200 + * a non-bonded PF, uninit it first and then re-init 201 + * it as the main device. 202 + */ 203 + hns_roce_slave_uninit(bond_grp, i); 204 + hr_dev = hns_roce_slave_init(bond_grp, i, false); 205 + if (hr_dev) { 206 + bond_grp->main_hr_dev = hr_dev; 207 + break; 208 + } 209 + } 210 + } 211 + 212 + if (!bond_grp->main_hr_dev) 213 + return -ENODEV; 214 + 215 + return 0; 216 + } 217 + 218 + static struct hns_roce_dev 219 + *hns_roce_slave_init(struct hns_roce_bond_group *bond_grp, 220 + u8 func_idx, bool need_switch) 221 + { 222 + struct hns_roce_dev *hr_dev = NULL; 223 + struct hnae3_handle *handle; 224 + u8 main_func_idx; 225 + int ret; 226 + 227 + if (need_switch) { 228 + main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn); 229 + if (func_idx == main_func_idx) { 230 + ret = switch_main_dev(bond_grp, main_func_idx); 231 + if (ret == -ENODEV) 232 + return NULL; 233 + } 234 + } 235 + 236 + handle = bond_grp->bond_func_info[func_idx].handle; 237 + if (handle) { 238 + if (handle->priv) 239 + return handle->priv; 240 + /* Prevent this device from being initialized as a bond device */ 241 + if (need_switch) 242 + bond_grp->bond_func_info[func_idx].net_dev = NULL; 243 + hr_dev = hns_roce_bond_init_client(bond_grp, func_idx); 244 + if (!hr_dev) 245 + BOND_ERR_LOG("failed to init slave %u.\n", func_idx); 246 + } 247 + 248 + return hr_dev; 249 + } 250 + 251 + static struct hns_roce_die_info *alloc_die_info(int bus_num) 252 + { 253 + struct hns_roce_die_info *die_info; 254 + int ret; 255 + 256 + die_info = kzalloc(sizeof(*die_info), GFP_KERNEL); 257 + if (!die_info) 258 + return NULL; 259 + 260 + ret = xa_err(xa_store(&roce_bond_xa, bus_num, die_info, GFP_KERNEL)); 261 + if (ret) { 262 + kfree(die_info); 263 + return NULL; 264 + } 265 + 266 + mutex_init(&die_info->die_mutex); 267 + 268 + return die_info; 269 + } 270 + 271 + static void dealloc_die_info(struct hns_roce_die_info *die_info, u8 bus_num) 272 + { 273 + mutex_destroy(&die_info->die_mutex); 274 + xa_erase(&roce_bond_xa, bus_num); 275 + kfree(die_info); 276 + } 277 + 278 + static int alloc_bond_id(struct hns_roce_bond_group *bond_grp) 279 + { 280 + u8 bus_num = bond_grp->bus_num; 281 + struct hns_roce_die_info *die_info = xa_load(&roce_bond_xa, bus_num); 282 + int i; 283 + 284 + if (!die_info) { 285 + die_info = alloc_die_info(bus_num); 286 + if (!die_info) 287 + return -ENOMEM; 288 + } 289 + 290 + for (i = 0; i < ROCE_BOND_NUM_MAX; i++) { 291 + if (die_info->bond_id_mask & BOND_ID(i)) 292 + continue; 293 + 294 + die_info->bond_id_mask |= BOND_ID(i); 295 + die_info->bgrps[i] = bond_grp; 296 + bond_grp->bond_id = i; 297 + 298 + return 0; 299 + } 300 + 301 + return -ENOSPC; 302 + } 303 + 304 + static int remove_bond_id(int bus_num, u8 bond_id) 305 + { 306 + struct hns_roce_die_info *die_info = xa_load(&roce_bond_xa, bus_num); 307 + 308 + if (bond_id >= ROCE_BOND_NUM_MAX) 309 + return -EINVAL; 310 + 311 + if (!die_info) 312 + return -ENODEV; 313 + 314 + die_info->bond_id_mask &= ~BOND_ID(bond_id); 315 + die_info->bgrps[bond_id] = NULL; 316 + if (!die_info->bond_id_mask) 317 + dealloc_die_info(die_info, bus_num); 318 + 319 + return 0; 320 + } 321 + 322 + static void hns_roce_set_bond(struct hns_roce_bond_group *bond_grp) 323 + { 324 + struct hns_roce_dev *hr_dev; 325 + int ret; 326 + int i; 327 + 328 + for (i = ROCE_BOND_FUNC_MAX - 1; i >= 0; i--) { 329 + if (bond_grp->slave_map & (1 << i)) 330 + hns_roce_slave_uninit(bond_grp, i); 331 + } 332 + 333 + mutex_lock(&bond_grp->bond_mutex); 334 + bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; 335 + mutex_unlock(&bond_grp->bond_mutex); 336 + bond_grp->main_hr_dev = NULL; 337 + 338 + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { 339 + if (bond_grp->slave_map & (1 << i)) { 340 + hr_dev = hns_roce_slave_init(bond_grp, i, false); 341 + if (hr_dev) { 342 + bond_grp->main_hr_dev = hr_dev; 343 + break; 344 + } 345 + } 346 + } 347 + 348 + if (!bond_grp->main_hr_dev) { 349 + ret = -ENODEV; 350 + goto out; 351 + } 352 + 353 + hns_roce_bond_get_active_slave(bond_grp); 354 + 355 + ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_SET_BOND); 356 + 357 + out: 358 + if (ret) { 359 + BOND_ERR_LOG("failed to set RoCE bond, ret = %d.\n", ret); 360 + hns_roce_cleanup_bond(bond_grp); 361 + } else { 362 + ibdev_info(&bond_grp->main_hr_dev->ib_dev, 363 + "RoCE set bond finished!\n"); 364 + } 365 + } 366 + 367 + static void hns_roce_clear_bond(struct hns_roce_bond_group *bond_grp) 368 + { 369 + u8 main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn); 370 + struct hns_roce_dev *hr_dev; 371 + u8 i; 372 + 373 + if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_BONDED) 374 + goto out; 375 + 376 + bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; 377 + bond_grp->main_hr_dev = NULL; 378 + 379 + hns_roce_slave_uninit(bond_grp, main_func_idx); 380 + 381 + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { 382 + hr_dev = hns_roce_slave_init(bond_grp, i, false); 383 + if (hr_dev) 384 + bond_grp->main_hr_dev = hr_dev; 385 + } 386 + 387 + out: 388 + hns_roce_cleanup_bond(bond_grp); 389 + } 390 + 391 + static void hns_roce_slave_changestate(struct hns_roce_bond_group *bond_grp) 392 + { 393 + int ret; 394 + 395 + hns_roce_bond_get_active_slave(bond_grp); 396 + 397 + ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND); 398 + 399 + mutex_lock(&bond_grp->bond_mutex); 400 + if (bond_grp->bond_state == HNS_ROCE_BOND_SLAVE_CHANGESTATE) 401 + bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; 402 + mutex_unlock(&bond_grp->bond_mutex); 403 + 404 + if (ret) 405 + ibdev_err(&bond_grp->main_hr_dev->ib_dev, 406 + "failed to change RoCE bond slave state, ret = %d.\n", 407 + ret); 408 + else 409 + ibdev_info(&bond_grp->main_hr_dev->ib_dev, 410 + "RoCE slave changestate finished!\n"); 411 + } 412 + 413 + static void hns_roce_slave_change_num(struct hns_roce_bond_group *bond_grp) 414 + { 415 + int ret; 416 + u8 i; 417 + 418 + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { 419 + if (bond_grp->slave_map & (1U << i)) { 420 + if (i == PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn)) 421 + continue; 422 + hns_roce_slave_uninit(bond_grp, i); 423 + } else { 424 + hns_roce_slave_init(bond_grp, i, true); 425 + if (!bond_grp->main_hr_dev) { 426 + ret = -ENODEV; 427 + goto out; 428 + } 429 + bond_grp->bond_func_info[i].net_dev = NULL; 430 + bond_grp->bond_func_info[i].handle = NULL; 431 + } 432 + } 433 + 434 + hns_roce_bond_get_active_slave(bond_grp); 435 + 436 + ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND); 437 + 438 + out: 439 + if (ret) { 440 + BOND_ERR_LOG("failed to change RoCE bond slave num, ret = %d.\n", ret); 441 + hns_roce_cleanup_bond(bond_grp); 442 + } else { 443 + mutex_lock(&bond_grp->bond_mutex); 444 + if (bond_grp->bond_state == HNS_ROCE_BOND_SLAVE_CHANGE_NUM) 445 + bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; 446 + mutex_unlock(&bond_grp->bond_mutex); 447 + ibdev_info(&bond_grp->main_hr_dev->ib_dev, 448 + "RoCE slave change num finished!\n"); 449 + } 450 + } 451 + 452 + static void hns_roce_bond_info_update_nolock(struct hns_roce_bond_group *bond_grp, 453 + struct net_device *upper_dev) 454 + { 455 + struct hns_roce_v2_priv *priv; 456 + struct hns_roce_dev *hr_dev; 457 + struct net_device *net_dev; 458 + int func_idx; 459 + 460 + bond_grp->slave_map = 0; 461 + rcu_read_lock(); 462 + for_each_netdev_in_bond_rcu(upper_dev, net_dev) { 463 + func_idx = get_netdev_bond_slave_id(net_dev, bond_grp); 464 + if (func_idx < 0) { 465 + hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); 466 + if (!hr_dev) 467 + continue; 468 + func_idx = PCI_FUNC(hr_dev->pci_dev->devfn); 469 + if (!bond_grp->bond_func_info[func_idx].net_dev) { 470 + priv = hr_dev->priv; 471 + bond_grp->bond_func_info[func_idx].net_dev = 472 + net_dev; 473 + bond_grp->bond_func_info[func_idx].handle = 474 + priv->handle; 475 + } 476 + ib_device_put(&hr_dev->ib_dev); 477 + } 478 + 479 + bond_grp->slave_map |= (1 << func_idx); 480 + } 481 + rcu_read_unlock(); 482 + } 483 + 484 + static bool is_dev_bond_supported(struct hns_roce_bond_group *bond_grp, 485 + struct net_device *net_dev) 486 + { 487 + struct hns_roce_dev *hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); 488 + bool ret = true; 489 + 490 + if (!hr_dev) { 491 + if (bond_grp && 492 + get_netdev_bond_slave_id(net_dev, bond_grp) >= 0) 493 + return true; 494 + else 495 + return false; 496 + } 497 + 498 + if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND)) { 499 + ret = false; 500 + goto out; 501 + } 502 + 503 + if (hr_dev->is_vf || pci_num_vf(hr_dev->pci_dev) > 0) { 504 + ret = false; 505 + goto out; 506 + } 507 + 508 + if (bond_grp->bus_num != get_hr_bus_num(hr_dev)) 509 + ret = false; 510 + 511 + out: 512 + ib_device_put(&hr_dev->ib_dev); 513 + return ret; 514 + } 515 + 516 + static bool check_slave_support(struct hns_roce_bond_group *bond_grp, 517 + struct net_device *upper_dev) 518 + { 519 + struct net_device *net_dev; 520 + u8 slave_num = 0; 521 + 522 + rcu_read_lock(); 523 + for_each_netdev_in_bond_rcu(upper_dev, net_dev) { 524 + if (is_dev_bond_supported(bond_grp, net_dev)) { 525 + slave_num++; 526 + continue; 527 + } 528 + rcu_read_unlock(); 529 + return false; 530 + } 531 + rcu_read_unlock(); 532 + 533 + return (slave_num > 1 && slave_num <= ROCE_BOND_FUNC_MAX); 534 + } 535 + 536 + static void hns_roce_bond_work(struct work_struct *work) 537 + { 538 + struct delayed_work *delayed_work = to_delayed_work(work); 539 + struct hns_roce_bond_group *bond_grp = 540 + container_of(delayed_work, struct hns_roce_bond_group, 541 + bond_work); 542 + enum hns_roce_bond_state bond_state; 543 + bool bond_ready; 544 + 545 + mutex_lock(&bond_grp->bond_mutex); 546 + bond_ready = check_slave_support(bond_grp, bond_grp->upper_dev); 547 + hns_roce_bond_info_update_nolock(bond_grp, bond_grp->upper_dev); 548 + bond_state = bond_grp->bond_state; 549 + bond_grp->bond_ready = bond_ready; 550 + mutex_unlock(&bond_grp->bond_mutex); 551 + 552 + ibdev_info(&bond_grp->main_hr_dev->ib_dev, 553 + "bond work: bond_ready - %d, bond_state - %d.\n", 554 + bond_ready, bond_state); 555 + 556 + if (!bond_ready) { 557 + hns_roce_clear_bond(bond_grp); 558 + return; 559 + } 560 + 561 + switch (bond_state) { 562 + case HNS_ROCE_BOND_NOT_BONDED: 563 + hns_roce_set_bond(bond_grp); 564 + /* In set_bond flow, we don't need to set bond netdev here as 565 + * it has been done when bond_grp->main_hr_dev is registered. 566 + */ 567 + return; 568 + case HNS_ROCE_BOND_SLAVE_CHANGESTATE: 569 + hns_roce_slave_changestate(bond_grp); 570 + break; 571 + case HNS_ROCE_BOND_SLAVE_CHANGE_NUM: 572 + hns_roce_slave_change_num(bond_grp); 573 + break; 574 + default: 575 + return; 576 + } 577 + hns_roce_set_bond_netdev(bond_grp, bond_grp->main_hr_dev); 578 + } 579 + 580 + static void hns_roce_attach_bond_grp(struct hns_roce_bond_group *bond_grp, 581 + struct hns_roce_dev *hr_dev, 582 + struct net_device *upper_dev) 583 + { 584 + bond_grp->upper_dev = upper_dev; 585 + bond_grp->main_hr_dev = hr_dev; 586 + bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; 587 + bond_grp->bond_ready = false; 588 + } 589 + 590 + static void hns_roce_detach_bond_grp(struct hns_roce_bond_group *bond_grp) 591 + { 592 + mutex_lock(&bond_grp->bond_mutex); 593 + 594 + cancel_delayed_work(&bond_grp->bond_work); 595 + bond_grp->upper_dev = NULL; 596 + bond_grp->main_hr_dev = NULL; 597 + bond_grp->bond_ready = false; 598 + bond_grp->bond_state = HNS_ROCE_BOND_NOT_ATTACHED; 599 + bond_grp->slave_map = 0; 600 + memset(bond_grp->bond_func_info, 0, sizeof(bond_grp->bond_func_info)); 601 + 602 + mutex_unlock(&bond_grp->bond_mutex); 603 + } 604 + 605 + void hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp) 606 + { 607 + int ret; 608 + 609 + ret = bond_grp->main_hr_dev ? 610 + hns_roce_cmd_bond(bond_grp, HNS_ROCE_CLEAR_BOND) : -EIO; 611 + if (ret) 612 + BOND_ERR_LOG("failed to clear RoCE bond, ret = %d.\n", ret); 613 + else 614 + ibdev_info(&bond_grp->main_hr_dev->ib_dev, 615 + "RoCE clear bond finished!\n"); 616 + 617 + hns_roce_detach_bond_grp(bond_grp); 618 + } 619 + 620 + static bool lowerstate_event_filter(struct hns_roce_bond_group *bond_grp, 621 + struct net_device *net_dev) 622 + { 623 + struct hns_roce_bond_group *bond_grp_tmp; 624 + 625 + bond_grp_tmp = hns_roce_get_bond_grp(net_dev, bond_grp->bus_num); 626 + return bond_grp_tmp == bond_grp; 627 + } 628 + 629 + static void lowerstate_event_setting(struct hns_roce_bond_group *bond_grp, 630 + struct netdev_notifier_changelowerstate_info *info) 631 + { 632 + mutex_lock(&bond_grp->bond_mutex); 633 + 634 + if (bond_grp->bond_ready && 635 + bond_grp->bond_state == HNS_ROCE_BOND_IS_BONDED) 636 + bond_grp->bond_state = HNS_ROCE_BOND_SLAVE_CHANGESTATE; 637 + 638 + mutex_unlock(&bond_grp->bond_mutex); 639 + } 640 + 641 + static bool hns_roce_bond_lowerstate_event(struct hns_roce_bond_group *bond_grp, 642 + struct netdev_notifier_changelowerstate_info *info) 643 + { 644 + struct net_device *net_dev = 645 + netdev_notifier_info_to_dev((struct netdev_notifier_info *)info); 646 + 647 + if (!netif_is_lag_port(net_dev)) 648 + return false; 649 + 650 + if (!lowerstate_event_filter(bond_grp, net_dev)) 651 + return false; 652 + 653 + lowerstate_event_setting(bond_grp, info); 654 + 655 + return true; 656 + } 657 + 658 + static bool is_bond_setting_supported(struct netdev_lag_upper_info *bond_info) 659 + { 660 + if (!bond_info) 661 + return false; 662 + 663 + if (bond_info->tx_type != NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && 664 + bond_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) 665 + return false; 666 + 667 + if (bond_info->tx_type == NETDEV_LAG_TX_TYPE_HASH && 668 + bond_info->hash_type > NETDEV_LAG_HASH_L23) 669 + return false; 670 + 671 + return true; 672 + } 673 + 674 + static void upper_event_setting(struct hns_roce_bond_group *bond_grp, 675 + struct netdev_notifier_changeupper_info *info) 676 + { 677 + struct netdev_lag_upper_info *bond_upper_info = NULL; 678 + bool slave_inc = info->linking; 679 + 680 + if (slave_inc) 681 + bond_upper_info = info->upper_info; 682 + 683 + if (bond_upper_info) { 684 + bond_grp->tx_type = bond_upper_info->tx_type; 685 + bond_grp->hash_type = bond_upper_info->hash_type; 686 + } 687 + } 688 + 689 + static bool check_unlinking_bond_support(struct hns_roce_bond_group *bond_grp) 690 + { 691 + struct net_device *net_dev; 692 + u8 slave_num = 0; 693 + 694 + rcu_read_lock(); 695 + for_each_netdev_in_bond_rcu(bond_grp->upper_dev, net_dev) { 696 + if (get_netdev_bond_slave_id(net_dev, bond_grp) >= 0) 697 + slave_num++; 698 + } 699 + rcu_read_unlock(); 700 + 701 + return (slave_num > 1); 702 + } 703 + 704 + static bool check_linking_bond_support(struct netdev_lag_upper_info *bond_info, 705 + struct hns_roce_bond_group *bond_grp, 706 + struct net_device *upper_dev) 707 + { 708 + if (!is_bond_setting_supported(bond_info)) 709 + return false; 710 + 711 + return check_slave_support(bond_grp, upper_dev); 712 + } 713 + 714 + static enum bond_support_type 715 + check_bond_support(struct hns_roce_bond_group *bond_grp, 716 + struct net_device *upper_dev, 717 + struct netdev_notifier_changeupper_info *info) 718 + { 719 + bool bond_grp_exist = false; 720 + bool support; 721 + 722 + if (upper_dev == bond_grp->upper_dev) 723 + bond_grp_exist = true; 724 + 725 + if (!info->linking && !bond_grp_exist) 726 + return BOND_NOT_SUPPORT; 727 + 728 + if (info->linking) 729 + support = check_linking_bond_support(info->upper_info, bond_grp, 730 + upper_dev); 731 + else 732 + support = check_unlinking_bond_support(bond_grp); 733 + 734 + if (support) 735 + return BOND_SUPPORT; 736 + 737 + return bond_grp_exist ? BOND_EXISTING_NOT_SUPPORT : BOND_NOT_SUPPORT; 738 + } 739 + 740 + static bool upper_event_filter(struct netdev_notifier_changeupper_info *info, 741 + struct hns_roce_bond_group *bond_grp, 742 + struct net_device *net_dev) 743 + { 744 + struct net_device *upper_dev = info->upper_dev; 745 + struct hns_roce_bond_group *bond_grp_tmp; 746 + struct hns_roce_dev *hr_dev; 747 + bool ret = true; 748 + u8 bus_num; 749 + 750 + if (!info->linking || 751 + bond_grp->bond_state != HNS_ROCE_BOND_NOT_ATTACHED) 752 + return bond_grp->upper_dev == upper_dev; 753 + 754 + hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); 755 + if (!hr_dev) 756 + return false; 757 + 758 + bus_num = get_hr_bus_num(hr_dev); 759 + if (bond_grp->bus_num != bus_num) { 760 + ret = false; 761 + goto out; 762 + } 763 + 764 + bond_grp_tmp = hns_roce_get_bond_grp(net_dev, bus_num); 765 + if (bond_grp_tmp && bond_grp_tmp != bond_grp) 766 + ret = false; 767 + out: 768 + ib_device_put(&hr_dev->ib_dev); 769 + return ret; 770 + } 771 + 772 + static bool hns_roce_bond_upper_event(struct hns_roce_bond_group *bond_grp, 773 + struct netdev_notifier_changeupper_info *info) 774 + { 775 + struct net_device *net_dev = 776 + netdev_notifier_info_to_dev((struct netdev_notifier_info *)info); 777 + struct net_device *upper_dev = info->upper_dev; 778 + enum bond_support_type support = BOND_SUPPORT; 779 + struct hns_roce_dev *hr_dev; 780 + int slave_id; 781 + 782 + if (!upper_dev || !netif_is_lag_master(upper_dev)) 783 + return false; 784 + 785 + if (!upper_event_filter(info, bond_grp, net_dev)) 786 + return false; 787 + 788 + mutex_lock(&bond_grp->bond_mutex); 789 + support = check_bond_support(bond_grp, upper_dev, info); 790 + if (support == BOND_NOT_SUPPORT) { 791 + mutex_unlock(&bond_grp->bond_mutex); 792 + return false; 793 + } 794 + 795 + if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_ATTACHED) { 796 + hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); 797 + if (!hr_dev) { 798 + mutex_unlock(&bond_grp->bond_mutex); 799 + return false; 800 + } 801 + hns_roce_attach_bond_grp(bond_grp, hr_dev, upper_dev); 802 + ib_device_put(&hr_dev->ib_dev); 803 + } 804 + 805 + /* In the case of netdev being unregistered, the roce 806 + * instance shouldn't be inited. 807 + */ 808 + if (net_dev->reg_state >= NETREG_UNREGISTERING) { 809 + slave_id = get_netdev_bond_slave_id(net_dev, bond_grp); 810 + if (slave_id >= 0) { 811 + bond_grp->bond_func_info[slave_id].net_dev = NULL; 812 + bond_grp->bond_func_info[slave_id].handle = NULL; 813 + } 814 + } 815 + 816 + if (support == BOND_SUPPORT) { 817 + bond_grp->bond_ready = true; 818 + if (bond_grp->bond_state != HNS_ROCE_BOND_NOT_BONDED) 819 + bond_grp->bond_state = HNS_ROCE_BOND_SLAVE_CHANGE_NUM; 820 + } 821 + mutex_unlock(&bond_grp->bond_mutex); 822 + if (support == BOND_SUPPORT) 823 + upper_event_setting(bond_grp, info); 824 + 825 + return true; 826 + } 827 + 828 + static int hns_roce_bond_event(struct notifier_block *self, 829 + unsigned long event, void *ptr) 830 + { 831 + struct hns_roce_bond_group *bond_grp = 832 + container_of(self, struct hns_roce_bond_group, bond_nb); 833 + bool changed = false; 834 + 835 + if (event == NETDEV_CHANGEUPPER) 836 + changed = hns_roce_bond_upper_event(bond_grp, ptr); 837 + if (event == NETDEV_CHANGELOWERSTATE) 838 + changed = hns_roce_bond_lowerstate_event(bond_grp, ptr); 839 + 840 + if (changed) 841 + schedule_delayed_work(&bond_grp->bond_work, HZ); 842 + 843 + return NOTIFY_DONE; 844 + } 845 + 846 + int hns_roce_alloc_bond_grp(struct hns_roce_dev *hr_dev) 847 + { 848 + struct hns_roce_bond_group *bgrps[ROCE_BOND_NUM_MAX]; 849 + struct hns_roce_bond_group *bond_grp; 850 + u8 bus_num = get_hr_bus_num(hr_dev); 851 + int ret; 852 + int i; 853 + 854 + if (xa_load(&roce_bond_xa, bus_num)) 855 + return 0; 856 + 857 + for (i = 0; i < ROCE_BOND_NUM_MAX; i++) { 858 + bond_grp = kvzalloc(sizeof(*bond_grp), GFP_KERNEL); 859 + if (!bond_grp) { 860 + ret = -ENOMEM; 861 + goto mem_err; 862 + } 863 + 864 + mutex_init(&bond_grp->bond_mutex); 865 + INIT_DELAYED_WORK(&bond_grp->bond_work, hns_roce_bond_work); 866 + 867 + bond_grp->bond_ready = false; 868 + bond_grp->bond_state = HNS_ROCE_BOND_NOT_ATTACHED; 869 + bond_grp->bus_num = bus_num; 870 + 871 + ret = alloc_bond_id(bond_grp); 872 + if (ret) { 873 + dev_err(hr_dev->dev, 874 + "failed to alloc bond ID, ret = %d.\n", ret); 875 + goto alloc_id_err; 876 + } 877 + 878 + bond_grp->bond_nb.notifier_call = hns_roce_bond_event; 879 + ret = register_netdevice_notifier(&bond_grp->bond_nb); 880 + if (ret) { 881 + ibdev_err(&hr_dev->ib_dev, 882 + "failed to register bond nb, ret = %d.\n", ret); 883 + goto register_nb_err; 884 + } 885 + bgrps[i] = bond_grp; 886 + } 887 + 888 + return 0; 889 + 890 + register_nb_err: 891 + remove_bond_id(bond_grp->bus_num, bond_grp->bond_id); 892 + alloc_id_err: 893 + mutex_destroy(&bond_grp->bond_mutex); 894 + kvfree(bond_grp); 895 + mem_err: 896 + for (i--; i >= 0; i--) { 897 + unregister_netdevice_notifier(&bgrps[i]->bond_nb); 898 + cancel_delayed_work_sync(&bgrps[i]->bond_work); 899 + remove_bond_id(bgrps[i]->bus_num, bgrps[i]->bond_id); 900 + mutex_destroy(&bgrps[i]->bond_mutex); 901 + kvfree(bgrps[i]); 902 + } 903 + return ret; 904 + } 905 + 906 + void hns_roce_dealloc_bond_grp(void) 907 + { 908 + struct hns_roce_bond_group *bond_grp; 909 + struct hns_roce_die_info *die_info; 910 + unsigned long id; 911 + int i; 912 + 913 + xa_for_each(&roce_bond_xa, id, die_info) { 914 + for (i = 0; i < ROCE_BOND_NUM_MAX; i++) { 915 + bond_grp = die_info->bgrps[i]; 916 + if (!bond_grp) 917 + continue; 918 + unregister_netdevice_notifier(&bond_grp->bond_nb); 919 + cancel_delayed_work_sync(&bond_grp->bond_work); 920 + remove_bond_id(bond_grp->bus_num, bond_grp->bond_id); 921 + mutex_destroy(&bond_grp->bond_mutex); 922 + kvfree(bond_grp); 923 + } 924 + } 925 + } 926 + 927 + int hns_roce_bond_init(struct hns_roce_dev *hr_dev) 928 + { 929 + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); 930 + struct hns_roce_v2_priv *priv = hr_dev->priv; 931 + struct hns_roce_bond_group *bond_grp; 932 + u8 bus_num = get_hr_bus_num(hr_dev); 933 + int ret; 934 + 935 + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); 936 + 937 + if (priv->handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT) { 938 + ret = hns_roce_recover_bond(bond_grp, hr_dev); 939 + if (ret) { 940 + dev_err(hr_dev->dev, 941 + "failed to recover RoCE bond, ret = %d.\n", ret); 942 + return ret; 943 + } 944 + } 945 + 946 + return hns_roce_set_bond_netdev(bond_grp, hr_dev); 947 + } 948 + 949 + void hns_roce_bond_suspend(struct hnae3_handle *handle) 950 + { 951 + u8 bus_num = handle->pdev->bus->number; 952 + struct hns_roce_bond_group *bond_grp; 953 + struct hns_roce_die_info *die_info; 954 + int i; 955 + 956 + die_info = xa_load(&roce_bond_xa, bus_num); 957 + if (!die_info) 958 + return; 959 + 960 + mutex_lock(&die_info->die_mutex); 961 + 962 + /* 963 + * Avoid duplicated processing when calling this function 964 + * multiple times. 965 + */ 966 + if (die_info->suspend_cnt) 967 + goto out; 968 + 969 + for (i = 0; i < ROCE_BOND_NUM_MAX; i++) { 970 + bond_grp = die_info->bgrps[i]; 971 + if (!bond_grp) 972 + continue; 973 + unregister_netdevice_notifier(&bond_grp->bond_nb); 974 + cancel_delayed_work_sync(&bond_grp->bond_work); 975 + } 976 + 977 + out: 978 + die_info->suspend_cnt++; 979 + mutex_unlock(&die_info->die_mutex); 980 + } 981 + 982 + void hns_roce_bond_resume(struct hnae3_handle *handle) 983 + { 984 + u8 bus_num = handle->pdev->bus->number; 985 + struct hns_roce_bond_group *bond_grp; 986 + struct hns_roce_die_info *die_info; 987 + int i, ret; 988 + 989 + die_info = xa_load(&roce_bond_xa, bus_num); 990 + if (!die_info) 991 + return; 992 + 993 + mutex_lock(&die_info->die_mutex); 994 + 995 + die_info->suspend_cnt--; 996 + if (die_info->suspend_cnt) 997 + goto out; 998 + 999 + for (i = 0; i < ROCE_BOND_NUM_MAX; i++) { 1000 + bond_grp = die_info->bgrps[i]; 1001 + if (!bond_grp) 1002 + continue; 1003 + ret = register_netdevice_notifier(&bond_grp->bond_nb); 1004 + if (ret) 1005 + dev_err(&handle->pdev->dev, 1006 + "failed to resume bond notifier(bus_num = %u, id = %u), ret = %d.\n", 1007 + bus_num, bond_grp->bond_id, ret); 1008 + } 1009 + 1010 + out: 1011 + mutex_unlock(&die_info->die_mutex); 1012 + }
+95
drivers/infiniband/hw/hns/hns_roce_bond.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ */ 2 + /* 3 + * Copyright (c) 2025 Hisilicon Limited. 4 + */ 5 + 6 + #ifndef _HNS_ROCE_BOND_H 7 + #define _HNS_ROCE_BOND_H 8 + 9 + #include <linux/netdevice.h> 10 + #include <net/bonding.h> 11 + 12 + #define ROCE_BOND_FUNC_MAX 4 13 + #define ROCE_BOND_NUM_MAX 2 14 + 15 + #define BOND_ID(id) BIT(id) 16 + 17 + #define BOND_ERR_LOG(fmt, ...) \ 18 + pr_err("HNS RoCE Bonding: " fmt, ##__VA_ARGS__) 19 + 20 + enum { 21 + BOND_MODE_1, 22 + BOND_MODE_2_4, 23 + }; 24 + 25 + enum hns_roce_bond_hashtype { 26 + BOND_HASH_L2, 27 + BOND_HASH_L34, 28 + BOND_HASH_L23, 29 + }; 30 + 31 + enum bond_support_type { 32 + BOND_NOT_SUPPORT, 33 + /* 34 + * bond_grp already exists, but in the current 35 + * conditions it's no longer supported 36 + */ 37 + BOND_EXISTING_NOT_SUPPORT, 38 + BOND_SUPPORT, 39 + }; 40 + 41 + enum hns_roce_bond_state { 42 + HNS_ROCE_BOND_NOT_ATTACHED, 43 + HNS_ROCE_BOND_NOT_BONDED, 44 + HNS_ROCE_BOND_IS_BONDED, 45 + HNS_ROCE_BOND_SLAVE_CHANGE_NUM, 46 + HNS_ROCE_BOND_SLAVE_CHANGESTATE, 47 + }; 48 + 49 + enum hns_roce_bond_cmd_type { 50 + HNS_ROCE_SET_BOND, 51 + HNS_ROCE_CHANGE_BOND, 52 + HNS_ROCE_CLEAR_BOND, 53 + }; 54 + 55 + struct hns_roce_func_info { 56 + struct net_device *net_dev; 57 + struct hnae3_handle *handle; 58 + }; 59 + 60 + struct hns_roce_bond_group { 61 + struct net_device *upper_dev; 62 + struct hns_roce_dev *main_hr_dev; 63 + u8 active_slave_num; 64 + u32 slave_map; 65 + u32 active_slave_map; 66 + u8 bond_id; 67 + u8 bus_num; 68 + struct hns_roce_func_info bond_func_info[ROCE_BOND_FUNC_MAX]; 69 + bool bond_ready; 70 + enum hns_roce_bond_state bond_state; 71 + enum netdev_lag_tx_type tx_type; 72 + enum netdev_lag_hash hash_type; 73 + struct mutex bond_mutex; 74 + struct notifier_block bond_nb; 75 + struct delayed_work bond_work; 76 + }; 77 + 78 + struct hns_roce_die_info { 79 + u8 bond_id_mask; 80 + struct hns_roce_bond_group *bgrps[ROCE_BOND_NUM_MAX]; 81 + struct mutex die_mutex; 82 + u8 suspend_cnt; 83 + }; 84 + 85 + struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev, 86 + u8 bus_num); 87 + int hns_roce_alloc_bond_grp(struct hns_roce_dev *hr_dev); 88 + void hns_roce_dealloc_bond_grp(void); 89 + void hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp); 90 + bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev); 91 + int hns_roce_bond_init(struct hns_roce_dev *hr_dev); 92 + void hns_roce_bond_suspend(struct hnae3_handle *handle); 93 + void hns_roce_bond_resume(struct hnae3_handle *handle); 94 + 95 + #endif
+15 -1
drivers/infiniband/hw/hns/hns_roce_device.h
··· 33 33 #ifndef _HNS_ROCE_DEVICE_H 34 34 #define _HNS_ROCE_DEVICE_H 35 35 36 + #include <linux/pci.h> 36 37 #include <rdma/ib_verbs.h> 37 38 #include <rdma/hns-abi.h> 38 39 #include "hns_roce_debugfs.h" ··· 154 153 HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14), 155 154 HNS_ROCE_CAP_FLAG_STASH = BIT(17), 156 155 HNS_ROCE_CAP_FLAG_CQE_INLINE = BIT(19), 156 + HNS_ROCE_CAP_FLAG_BOND = BIT(21), 157 157 HNS_ROCE_CAP_FLAG_SRQ_RECORD_DB = BIT(22), 158 158 }; 159 159 ··· 179 177 HNS_ROCE_STATE_INIT, 180 178 HNS_ROCE_STATE_INITED, 181 179 HNS_ROCE_STATE_UNINIT, 180 + HNS_ROCE_STATE_BOND_UNINIT, 182 181 }; 183 182 184 183 enum { ··· 1170 1167 grh->traffic_class >> DSCP_SHIFT : grh->traffic_class; 1171 1168 } 1172 1169 1170 + static inline struct net_device *get_hr_netdev(struct hns_roce_dev *hr_dev, 1171 + u8 port) 1172 + { 1173 + return hr_dev->iboe.netdevs[port]; 1174 + } 1175 + 1176 + static inline u8 get_hr_bus_num(struct hns_roce_dev *hr_dev) 1177 + { 1178 + return hr_dev->pci_dev->bus->number; 1179 + } 1180 + 1173 1181 void hns_roce_init_uar_table(struct hns_roce_dev *dev); 1174 1182 int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar); 1175 1183 ··· 1307 1293 void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type); 1308 1294 void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); 1309 1295 int hns_roce_init(struct hns_roce_dev *hr_dev); 1310 - void hns_roce_exit(struct hns_roce_dev *hr_dev); 1296 + void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup); 1311 1297 int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq); 1312 1298 int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq); 1313 1299 int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp);
+136 -5
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
··· 43 43 #include <rdma/ib_umem.h> 44 44 #include <rdma/uverbs_ioctl.h> 45 45 46 + #include "hclge_main.h" 46 47 #include "hns_roce_common.h" 47 48 #include "hns_roce_device.h" 48 49 #include "hns_roce_cmd.h" 49 50 #include "hns_roce_hem.h" 50 51 #include "hns_roce_hw_v2.h" 52 + #include "hns_roce_bond.h" 51 53 52 54 #define CREATE_TRACE_POINTS 53 55 #include "hns_roce_trace.h" ··· 1436 1434 return ret; 1437 1435 } 1438 1436 1437 + static enum hns_roce_opcode_type 1438 + get_bond_opcode(enum hns_roce_bond_cmd_type bond_type) 1439 + { 1440 + switch (bond_type) { 1441 + case HNS_ROCE_SET_BOND: 1442 + return HNS_ROCE_OPC_SET_BOND_INFO; 1443 + case HNS_ROCE_CHANGE_BOND: 1444 + return HNS_ROCE_OPC_CHANGE_ACTIVE_PORT; 1445 + case HNS_ROCE_CLEAR_BOND: 1446 + return HNS_ROCE_OPC_CLEAR_BOND_INFO; 1447 + default: 1448 + WARN(true, "Invalid bond type %d!\n", bond_type); 1449 + return HNS_ROCE_OPC_SET_BOND_INFO; 1450 + } 1451 + } 1452 + 1453 + static enum hns_roce_bond_hashtype 1454 + get_bond_hashtype(enum netdev_lag_hash netdev_hashtype) 1455 + { 1456 + switch (netdev_hashtype) { 1457 + case NETDEV_LAG_HASH_L2: 1458 + return BOND_HASH_L2; 1459 + case NETDEV_LAG_HASH_L34: 1460 + return BOND_HASH_L34; 1461 + case NETDEV_LAG_HASH_L23: 1462 + return BOND_HASH_L23; 1463 + default: 1464 + WARN(true, "Invalid hash type %d!\n", netdev_hashtype); 1465 + return BOND_HASH_L2; 1466 + } 1467 + } 1468 + 1469 + int hns_roce_cmd_bond(struct hns_roce_bond_group *bond_grp, 1470 + enum hns_roce_bond_cmd_type bond_type) 1471 + { 1472 + enum hns_roce_opcode_type opcode = get_bond_opcode(bond_type); 1473 + struct hns_roce_bond_info *slave_info; 1474 + struct hns_roce_cmq_desc desc = {}; 1475 + int ret; 1476 + 1477 + slave_info = (struct hns_roce_bond_info *)desc.data; 1478 + hns_roce_cmq_setup_basic_desc(&desc, opcode, false); 1479 + 1480 + slave_info->bond_id = cpu_to_le32(bond_grp->bond_id); 1481 + if (bond_type == HNS_ROCE_CLEAR_BOND) 1482 + goto out; 1483 + 1484 + if (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { 1485 + slave_info->bond_mode = cpu_to_le32(BOND_MODE_1); 1486 + if (bond_grp->active_slave_num != 1) 1487 + ibdev_warn(&bond_grp->main_hr_dev->ib_dev, 1488 + "active slave cnt(%u) in Mode 1 is invalid.\n", 1489 + bond_grp->active_slave_num); 1490 + } else { 1491 + slave_info->bond_mode = cpu_to_le32(BOND_MODE_2_4); 1492 + slave_info->hash_policy = 1493 + cpu_to_le32(get_bond_hashtype(bond_grp->hash_type)); 1494 + } 1495 + 1496 + slave_info->active_slave_cnt = cpu_to_le32(bond_grp->active_slave_num); 1497 + slave_info->active_slave_mask = cpu_to_le32(bond_grp->active_slave_map); 1498 + slave_info->slave_mask = cpu_to_le32(bond_grp->slave_map); 1499 + 1500 + out: 1501 + ret = hns_roce_cmq_send(bond_grp->main_hr_dev, &desc, 1); 1502 + if (ret) 1503 + ibdev_err(&bond_grp->main_hr_dev->ib_dev, 1504 + "cmq bond type(%d) failed, ret = %d.\n", 1505 + bond_type, ret); 1506 + 1507 + return ret; 1508 + } 1509 + 1439 1510 static int config_hem_ba_to_hw(struct hns_roce_dev *hr_dev, 1440 1511 dma_addr_t base_addr, u8 cmd, unsigned long tag) 1441 1512 { ··· 2349 2274 caps->flags = hr_reg_read(resp_c, PF_CAPS_C_CAP_FLAGS); 2350 2275 caps->flags |= le16_to_cpu(resp_d->cap_flags_ex) << 2351 2276 HNS_ROCE_CAP_FLAGS_EX_SHIFT; 2277 + 2278 + if (hr_dev->is_vf) 2279 + caps->flags &= ~HNS_ROCE_CAP_FLAG_BOND; 2352 2280 2353 2281 caps->num_cqs = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_CQS); 2354 2282 caps->gid_table_len[0] = hr_reg_read(resp_c, PF_CAPS_C_MAX_GID); ··· 7145 7067 } 7146 7068 7147 7069 static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, 7148 - bool reset) 7070 + bool reset, bool bond_cleanup) 7149 7071 { 7150 7072 struct hns_roce_dev *hr_dev = handle->priv; 7151 7073 ··· 7157 7079 hr_dev->state = HNS_ROCE_DEVICE_STATE_UNINIT; 7158 7080 hns_roce_handle_device_err(hr_dev); 7159 7081 7160 - hns_roce_exit(hr_dev); 7082 + hns_roce_exit(hr_dev, bond_cleanup); 7161 7083 kfree(hr_dev->priv); 7162 7084 ib_dealloc_device(&hr_dev->ib_dev); 7163 7085 } ··· 7208 7130 static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, 7209 7131 bool reset) 7210 7132 { 7133 + /* Suspend bond to avoid concurrency */ 7134 + hns_roce_bond_suspend(handle); 7135 + 7211 7136 if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) 7212 - return; 7137 + goto out; 7213 7138 7214 7139 handle->rinfo.instance_state = HNS_ROCE_STATE_UNINIT; 7215 7140 7216 - __hns_roce_hw_v2_uninit_instance(handle, reset); 7141 + __hns_roce_hw_v2_uninit_instance(handle, reset, true); 7142 + 7143 + handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; 7144 + 7145 + out: 7146 + hns_roce_bond_resume(handle); 7147 + } 7148 + 7149 + struct hns_roce_dev 7150 + *hns_roce_bond_init_client(struct hns_roce_bond_group *bond_grp, 7151 + int func_idx) 7152 + { 7153 + struct hnae3_handle *handle; 7154 + int ret; 7155 + 7156 + handle = bond_grp->bond_func_info[func_idx].handle; 7157 + if (!handle || !handle->client) 7158 + return NULL; 7159 + 7160 + ret = hns_roce_hw_v2_init_instance(handle); 7161 + if (ret) 7162 + return NULL; 7163 + 7164 + return handle->priv; 7165 + } 7166 + 7167 + void hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp, 7168 + int func_idx) 7169 + { 7170 + struct hnae3_handle *handle = bond_grp->bond_func_info[func_idx].handle; 7171 + 7172 + if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) 7173 + return; 7174 + 7175 + handle->rinfo.instance_state = HNS_ROCE_STATE_BOND_UNINIT; 7176 + 7177 + __hns_roce_hw_v2_uninit_instance(handle, false, false); 7217 7178 7218 7179 handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; 7219 7180 } ··· 7260 7143 static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) 7261 7144 { 7262 7145 struct hns_roce_dev *hr_dev; 7146 + 7147 + /* Suspend bond to avoid concurrency */ 7148 + hns_roce_bond_suspend(handle); 7263 7149 7264 7150 if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) { 7265 7151 set_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state); ··· 7294 7174 if (test_and_clear_bit(HNS_ROCE_RST_DIRECT_RETURN, 7295 7175 &handle->rinfo.state)) { 7296 7176 handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INITED; 7177 + hns_roce_bond_resume(handle); 7297 7178 return 0; 7298 7179 } 7299 7180 ··· 7314 7193 dev_info(dev, "reset done, RoCE client reinit finished.\n"); 7315 7194 } 7316 7195 7196 + hns_roce_bond_resume(handle); 7317 7197 return ret; 7318 7198 } 7319 7199 ··· 7326 7204 handle->rinfo.reset_state = HNS_ROCE_STATE_RST_UNINIT; 7327 7205 dev_info(&handle->pdev->dev, "In reset process RoCE client uninit.\n"); 7328 7206 msleep(HNS_ROCE_V2_HW_RST_UNINT_DELAY); 7329 - __hns_roce_hw_v2_uninit_instance(handle, false); 7207 + __hns_roce_hw_v2_uninit_instance(handle, false, false); 7330 7208 7331 7209 return 0; 7332 7210 } ··· 7362 7240 if (linkup || !hr_dev) 7363 7241 return; 7364 7242 7243 + /* For bond device, the link status depends on the upper netdev, 7244 + * and the upper device's link status depends on all the slaves' 7245 + * netdev but not only one. So bond device cannot get a correct 7246 + * link status from this path. 7247 + */ 7248 + if (hns_roce_get_bond_grp(netdev, get_hr_bus_num(hr_dev))) 7249 + return; 7250 + 7365 7251 ib_dispatch_port_state_event(&hr_dev->ib_dev, netdev); 7366 7252 } 7367 7253 ··· 7394 7264 7395 7265 static void __exit hns_roce_hw_v2_exit(void) 7396 7266 { 7267 + hns_roce_dealloc_bond_grp(); 7397 7268 hnae3_unregister_client(&hns_roce_hw_v2_client); 7398 7269 hns_roce_cleanup_debugfs(); 7399 7270 }
+20
drivers/infiniband/hw/hns/hns_roce_hw_v2.h
··· 35 35 36 36 #include <linux/bitops.h> 37 37 #include "hnae3.h" 38 + #include "hns_roce_bond.h" 38 39 39 40 #define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32 40 41 #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 ··· 229 228 HNS_ROCE_OPC_CFG_GMV_BT = 0x8510, 230 229 HNS_ROCE_QUERY_RAM_ECC = 0x8513, 231 230 HNS_SWITCH_PARAMETER_CFG = 0x1033, 231 + HNS_ROCE_OPC_SET_BOND_INFO = 0x8601, 232 + HNS_ROCE_OPC_CLEAR_BOND_INFO = 0x8602, 233 + HNS_ROCE_OPC_CHANGE_ACTIVE_PORT = 0x8603, 232 234 }; 233 235 234 236 #define HNS_ROCE_OPC_POST_MB_TIMEOUT 35000 ··· 1469 1465 __le32 rsv[5]; 1470 1466 }; 1471 1467 1468 + struct hns_roce_bond_info { 1469 + __le32 bond_id; 1470 + __le32 bond_mode; 1471 + __le32 active_slave_cnt; 1472 + __le32 active_slave_mask; 1473 + __le32 slave_mask; 1474 + __le32 hash_policy; 1475 + }; 1476 + 1477 + struct hns_roce_dev 1478 + *hns_roce_bond_init_client(struct hns_roce_bond_group *bond_grp, 1479 + int func_idx); 1480 + void hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp, 1481 + int func_idx); 1472 1482 int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); 1483 + int hns_roce_cmd_bond(struct hns_roce_bond_group *bond_grp, 1484 + enum hns_roce_bond_cmd_type bond_type); 1473 1485 1474 1486 static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2], 1475 1487 void __iomem *dest)
+146 -41
drivers/infiniband/hw/hns/hns_roce_main.c
··· 32 32 */ 33 33 #include <linux/acpi.h> 34 34 #include <linux/module.h> 35 - #include <linux/pci.h> 36 35 #include <rdma/ib_addr.h> 37 36 #include <rdma/ib_smi.h> 38 37 #include <rdma/ib_user_verbs.h> ··· 40 41 #include "hns_roce_device.h" 41 42 #include "hns_roce_hem.h" 42 43 #include "hns_roce_hw_v2.h" 44 + #include "hns_roce_bond.h" 43 45 44 46 static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u32 port, 45 47 const u8 *addr) ··· 89 89 return ret; 90 90 } 91 91 92 - static int handle_en_event(struct hns_roce_dev *hr_dev, u32 port, 93 - unsigned long event) 92 + static int hns_roce_get_port_state(struct hns_roce_dev *hr_dev, u32 port_num, 93 + enum ib_port_state *state) 94 94 { 95 + struct hns_roce_bond_group *bond_grp; 96 + u8 bus_num = get_hr_bus_num(hr_dev); 97 + struct net_device *net_dev; 98 + 99 + net_dev = ib_device_get_netdev(&hr_dev->ib_dev, port_num); 100 + if (!net_dev) 101 + return -ENODEV; 102 + 103 + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) { 104 + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); 105 + if (bond_grp) { 106 + *state = ib_get_curr_port_state(bond_grp->upper_dev); 107 + goto out; 108 + } 109 + } 110 + 111 + *state = ib_get_curr_port_state(net_dev); 112 + out: 113 + dev_put(net_dev); 114 + return 0; 115 + } 116 + 117 + static int handle_en_event(struct net_device *netdev, 118 + struct hns_roce_dev *hr_dev, 119 + u32 port, unsigned long event) 120 + { 121 + struct ib_device *ibdev = &hr_dev->ib_dev; 95 122 struct device *dev = hr_dev->dev; 96 - struct net_device *netdev; 123 + enum ib_port_state curr_state; 124 + struct ib_event ibevent; 97 125 int ret = 0; 98 126 99 - netdev = hr_dev->iboe.netdevs[port]; 100 127 if (!netdev) { 101 128 dev_err(dev, "can't find netdev on port(%u)!\n", port); 102 129 return -ENODEV; 103 130 } 104 131 105 132 switch (event) { 106 - case NETDEV_UP: 107 - case NETDEV_CHANGE: 108 133 case NETDEV_REGISTER: 109 134 case NETDEV_CHANGEADDR: 110 135 ret = hns_roce_set_mac(hr_dev, port, netdev->dev_addr); 111 136 break; 137 + case NETDEV_UP: 138 + case NETDEV_CHANGE: 139 + ret = hns_roce_set_mac(hr_dev, port, netdev->dev_addr); 140 + if (ret) 141 + return ret; 142 + fallthrough; 112 143 case NETDEV_DOWN: 113 - /* 114 - * In v1 engine, only support all ports closed together. 115 - */ 144 + if (!netif_is_lag_master(netdev)) 145 + break; 146 + curr_state = ib_get_curr_port_state(netdev); 147 + 148 + write_lock_irq(&ibdev->cache_lock); 149 + if (ibdev->port_data[port].cache.last_port_state == curr_state) { 150 + write_unlock_irq(&ibdev->cache_lock); 151 + return 0; 152 + } 153 + ibdev->port_data[port].cache.last_port_state = curr_state; 154 + write_unlock_irq(&ibdev->cache_lock); 155 + 156 + ibevent.event = (curr_state == IB_PORT_DOWN) ? 157 + IB_EVENT_PORT_ERR : IB_EVENT_PORT_ACTIVE; 158 + ibevent.device = ibdev; 159 + ibevent.element.port_num = port + 1; 160 + ib_dispatch_event(&ibevent); 116 161 break; 117 162 default: 118 163 dev_dbg(dev, "NETDEV event = 0x%x!\n", (u32)(event)); ··· 171 126 unsigned long event, void *ptr) 172 127 { 173 128 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 129 + struct hns_roce_bond_group *bond_grp; 174 130 struct hns_roce_ib_iboe *iboe = NULL; 175 131 struct hns_roce_dev *hr_dev = NULL; 132 + struct net_device *upper = NULL; 176 133 int ret; 177 134 u32 port; 178 135 179 136 hr_dev = container_of(self, struct hns_roce_dev, iboe.nb); 180 137 iboe = &hr_dev->iboe; 138 + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) { 139 + bond_grp = hns_roce_get_bond_grp(get_hr_netdev(hr_dev, 0), 140 + get_hr_bus_num(hr_dev)); 141 + upper = bond_grp ? bond_grp->upper_dev : NULL; 142 + } 181 143 182 144 for (port = 0; port < hr_dev->caps.num_ports; port++) { 183 - if (dev == iboe->netdevs[port]) { 184 - ret = handle_en_event(hr_dev, port, event); 145 + if ((!upper && dev == iboe->netdevs[port]) || 146 + (upper && dev == upper)) { 147 + ret = handle_en_event(dev, hr_dev, port, event); 185 148 if (ret) 186 149 return NOTIFY_DONE; 187 150 break; ··· 201 148 202 149 static int hns_roce_setup_mtu_mac(struct hns_roce_dev *hr_dev) 203 150 { 151 + struct net_device *net_dev; 204 152 int ret; 205 153 u8 i; 206 154 207 155 for (i = 0; i < hr_dev->caps.num_ports; i++) { 208 - ret = hns_roce_set_mac(hr_dev, i, 209 - hr_dev->iboe.netdevs[i]->dev_addr); 156 + net_dev = get_hr_netdev(hr_dev, i); 157 + ret = hns_roce_set_mac(hr_dev, i, net_dev->dev_addr); 210 158 if (ret) 211 159 return ret; 212 160 } ··· 275 221 struct ib_port_attr *props) 276 222 { 277 223 struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); 278 - struct device *dev = hr_dev->dev; 279 224 struct net_device *net_dev; 280 - unsigned long flags; 281 225 enum ib_mtu mtu; 282 226 u32 port; 283 227 int ret; ··· 296 244 if (ret) 297 245 ibdev_warn(ib_dev, "failed to get speed, ret = %d.\n", ret); 298 246 299 - spin_lock_irqsave(&hr_dev->iboe.lock, flags); 300 - 301 - net_dev = hr_dev->iboe.netdevs[port]; 247 + net_dev = ib_device_get_netdev(ib_dev, port_num); 302 248 if (!net_dev) { 303 - spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); 304 - dev_err(dev, "find netdev %u failed!\n", port); 249 + ibdev_err(ib_dev, "find netdev %u failed!\n", port); 305 250 return -EINVAL; 306 251 } 307 252 308 253 mtu = iboe_get_mtu(net_dev->mtu); 309 254 props->active_mtu = mtu ? min(props->max_mtu, mtu) : IB_MTU_256; 310 - props->state = netif_running(net_dev) && netif_carrier_ok(net_dev) ? 311 - IB_PORT_ACTIVE : 312 - IB_PORT_DOWN; 255 + 256 + dev_put(net_dev); 257 + 258 + ret = hns_roce_get_port_state(hr_dev, port_num, &props->state); 259 + if (ret) { 260 + ibdev_err(ib_dev, "failed to get port state.\n"); 261 + return ret; 262 + } 263 + 313 264 props->phys_state = props->state == IB_PORT_ACTIVE ? 314 265 IB_PORT_PHYS_STATE_LINK_UP : 315 266 IB_PORT_PHYS_STATE_DISABLED; 316 - 317 - spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); 318 - 319 267 return 0; 320 268 } 321 269 ··· 669 617 return num_counters; 670 618 } 671 619 672 - static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) 620 + static void 621 + hns_roce_unregister_bond_cleanup(struct hns_roce_dev *hr_dev, 622 + struct hns_roce_bond_group *bond_grp) 673 623 { 624 + struct net_device *net_dev; 625 + int i; 626 + 627 + /* To avoid the loss of other slave devices when main_hr_dev 628 + * is unregistered, re-initialize the remaining slaves before 629 + * the bond resources cleanup. 630 + */ 631 + bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; 632 + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { 633 + net_dev = bond_grp->bond_func_info[i].net_dev; 634 + if (net_dev && net_dev != get_hr_netdev(hr_dev, 0)) 635 + hns_roce_bond_init_client(bond_grp, i); 636 + } 637 + 638 + hns_roce_cleanup_bond(bond_grp); 639 + } 640 + 641 + static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev, 642 + bool bond_cleanup) 643 + { 644 + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); 674 645 struct hns_roce_ib_iboe *iboe = &hr_dev->iboe; 646 + struct hns_roce_bond_group *bond_grp; 647 + u8 bus_num = get_hr_bus_num(hr_dev); 648 + 649 + if (bond_cleanup && hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) { 650 + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); 651 + if (bond_grp) 652 + hns_roce_unregister_bond_cleanup(hr_dev, bond_grp); 653 + } 675 654 676 655 hr_dev->active = false; 677 656 unregister_netdevice_notifier(&iboe->nb); ··· 791 708 792 709 static int hns_roce_register_device(struct hns_roce_dev *hr_dev) 793 710 { 794 - int ret; 795 711 struct hns_roce_ib_iboe *iboe = NULL; 796 - struct ib_device *ib_dev = NULL; 797 712 struct device *dev = hr_dev->dev; 713 + struct ib_device *ib_dev = NULL; 714 + struct net_device *net_dev; 798 715 unsigned int i; 716 + int ret; 799 717 800 718 iboe = &hr_dev->iboe; 801 719 spin_lock_init(&iboe->lock); ··· 831 747 ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops); 832 748 ib_set_device_ops(ib_dev, &hns_roce_dev_ops); 833 749 ib_set_device_ops(ib_dev, &hns_roce_dev_restrack_ops); 834 - for (i = 0; i < hr_dev->caps.num_ports; i++) { 835 - if (!hr_dev->iboe.netdevs[i]) 836 - continue; 837 750 838 - ret = ib_device_set_netdev(ib_dev, hr_dev->iboe.netdevs[i], 839 - i + 1); 840 - if (ret) 841 - return ret; 842 - } 843 751 dma_set_max_seg_size(dev, SZ_2G); 844 - ret = ib_register_device(ib_dev, "hns_%d", dev); 752 + 753 + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) { 754 + ret = hns_roce_alloc_bond_grp(hr_dev); 755 + if (ret) { 756 + dev_err(dev, "failed to alloc bond_grp for bus %u, ret = %d\n", 757 + get_hr_bus_num(hr_dev), ret); 758 + return ret; 759 + } 760 + } 761 + 762 + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND && 763 + hns_roce_bond_is_active(hr_dev)) { 764 + ret = hns_roce_bond_init(hr_dev); 765 + if (ret) { 766 + dev_err(dev, "failed to init bond!\n"); 767 + return ret; 768 + } 769 + ret = ib_register_device(ib_dev, "hns_bond_%d", dev); 770 + } else { 771 + for (i = 0; i < hr_dev->caps.num_ports; i++) { 772 + net_dev = get_hr_netdev(hr_dev, i); 773 + if (!net_dev) 774 + continue; 775 + 776 + ret = ib_device_set_netdev(ib_dev, net_dev, i + 1); 777 + if (ret) 778 + return ret; 779 + } 780 + ret = ib_register_device(ib_dev, "hns_%d", dev); 781 + } 845 782 if (ret) { 846 783 dev_err(dev, "ib_register_device failed!\n"); 847 784 return ret; ··· 1262 1157 return ret; 1263 1158 } 1264 1159 1265 - void hns_roce_exit(struct hns_roce_dev *hr_dev) 1160 + void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup) 1266 1161 { 1267 1162 hns_roce_unregister_debugfs(hr_dev); 1268 - hns_roce_unregister_device(hr_dev); 1163 + hns_roce_unregister_device(hr_dev, bond_cleanup); 1269 1164 1270 1165 if (hr_dev->hw->hw_exit) 1271 1166 hr_dev->hw->hw_exit(hr_dev);
-1
drivers/infiniband/hw/hns/hns_roce_pd.c
··· 30 30 * SOFTWARE. 31 31 */ 32 32 33 - #include <linux/pci.h> 34 33 #include "hns_roce_device.h" 35 34 36 35 void hns_roce_init_pd_table(struct hns_roce_dev *hr_dev)
+3 -2
drivers/infiniband/hw/hns/hns_roce_qp.c
··· 31 31 * SOFTWARE. 32 32 */ 33 33 34 - #include <linux/pci.h> 35 34 #include <rdma/ib_addr.h> 36 35 #include <rdma/ib_umem.h> 37 36 #include <rdma/uverbs_ioctl.h> ··· 1347 1348 struct hns_roce_qp *hr_qp, 1348 1349 struct ib_qp_attr *attr, int attr_mask) 1349 1350 { 1351 + struct net_device *net_dev; 1350 1352 enum ib_mtu active_mtu; 1351 1353 int p; 1352 1354 1353 1355 p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port; 1354 - active_mtu = iboe_get_mtu(hr_dev->iboe.netdevs[p]->mtu); 1356 + net_dev = get_hr_netdev(hr_dev, p); 1357 + active_mtu = iboe_get_mtu(net_dev->mtu); 1355 1358 1356 1359 if ((hr_dev->caps.max_mtu >= IB_MTU_2048 && 1357 1360 attr->path_mtu > hr_dev->caps.max_mtu) ||
-1
drivers/infiniband/hw/hns/hns_roce_srq.c
··· 3 3 * Copyright (c) 2018 Hisilicon Limited. 4 4 */ 5 5 6 - #include <linux/pci.h> 7 6 #include <rdma/ib_umem.h> 8 7 #include <rdma/uverbs_ioctl.h> 9 8 #include "hns_roce_device.h"
+1 -1
drivers/infiniband/hw/irdma/cm.c
··· 3710 3710 iwpd = iwqp->iwpd; 3711 3711 tagged_offset = (uintptr_t)iwqp->ietf_mem.va; 3712 3712 ibmr = irdma_reg_phys_mr(&iwpd->ibpd, iwqp->ietf_mem.pa, buf_len, 3713 - IB_ACCESS_LOCAL_WRITE, &tagged_offset); 3713 + IB_ACCESS_LOCAL_WRITE, &tagged_offset, false); 3714 3714 if (IS_ERR(ibmr)) { 3715 3715 ret = -ENOMEM; 3716 3716 goto error;
+6 -101
drivers/infiniband/hw/irdma/ctrl.c
··· 2943 2943 __le64 *wqe; 2944 2944 struct irdma_sc_cqp *cqp; 2945 2945 u64 hdr; 2946 - struct irdma_sc_ceq *ceq; 2947 - int ret_code = 0; 2948 2946 2949 2947 cqp = cq->dev->cqp; 2950 2948 if (cq->cq_uk.cq_id >= cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt) ··· 2951 2953 if (cq->ceq_id >= cq->dev->hmc_fpm_misc.max_ceqs) 2952 2954 return -EINVAL; 2953 2955 2954 - ceq = cq->dev->ceq[cq->ceq_id]; 2955 - if (ceq && ceq->reg_cq) 2956 - ret_code = irdma_sc_add_cq_ctx(ceq, cq); 2957 - 2958 - if (ret_code) 2959 - return ret_code; 2960 - 2961 2956 wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); 2962 - if (!wqe) { 2963 - if (ceq && ceq->reg_cq) 2964 - irdma_sc_remove_cq_ctx(ceq, cq); 2957 + if (!wqe) 2965 2958 return -ENOMEM; 2966 - } 2967 2959 2968 2960 set_64bit_val(wqe, 0, cq->cq_uk.cq_size); 2969 2961 set_64bit_val(wqe, 8, (uintptr_t)cq >> 1); ··· 3006 3018 struct irdma_sc_cqp *cqp; 3007 3019 __le64 *wqe; 3008 3020 u64 hdr; 3009 - struct irdma_sc_ceq *ceq; 3010 3021 3011 3022 cqp = cq->dev->cqp; 3012 3023 wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); 3013 3024 if (!wqe) 3014 3025 return -ENOMEM; 3015 - 3016 - ceq = cq->dev->ceq[cq->ceq_id]; 3017 - if (ceq && ceq->reg_cq) 3018 - irdma_sc_remove_cq_ctx(ceq, cq); 3019 3026 3020 3027 set_64bit_val(wqe, 0, cq->cq_uk.cq_size); 3021 3028 set_64bit_val(wqe, 8, (uintptr_t)cq >> 1); ··· 3585 3602 } 3586 3603 3587 3604 /** 3588 - * irdma_sc_find_reg_cq - find cq ctx index 3589 - * @ceq: ceq sc structure 3590 - * @cq: cq sc structure 3591 - */ 3592 - static u32 irdma_sc_find_reg_cq(struct irdma_sc_ceq *ceq, 3593 - struct irdma_sc_cq *cq) 3594 - { 3595 - u32 i; 3596 - 3597 - for (i = 0; i < ceq->reg_cq_size; i++) { 3598 - if (cq == ceq->reg_cq[i]) 3599 - return i; 3600 - } 3601 - 3602 - return IRDMA_INVALID_CQ_IDX; 3603 - } 3604 - 3605 - /** 3606 - * irdma_sc_add_cq_ctx - add cq ctx tracking for ceq 3607 - * @ceq: ceq sc structure 3608 - * @cq: cq sc structure 3609 - */ 3610 - int irdma_sc_add_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq) 3611 - { 3612 - unsigned long flags; 3613 - 3614 - spin_lock_irqsave(&ceq->req_cq_lock, flags); 3615 - 3616 - if (ceq->reg_cq_size == ceq->elem_cnt) { 3617 - spin_unlock_irqrestore(&ceq->req_cq_lock, flags); 3618 - return -ENOMEM; 3619 - } 3620 - 3621 - ceq->reg_cq[ceq->reg_cq_size++] = cq; 3622 - 3623 - spin_unlock_irqrestore(&ceq->req_cq_lock, flags); 3624 - 3625 - return 0; 3626 - } 3627 - 3628 - /** 3629 - * irdma_sc_remove_cq_ctx - remove cq ctx tracking for ceq 3630 - * @ceq: ceq sc structure 3631 - * @cq: cq sc structure 3632 - */ 3633 - void irdma_sc_remove_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq) 3634 - { 3635 - unsigned long flags; 3636 - u32 cq_ctx_idx; 3637 - 3638 - spin_lock_irqsave(&ceq->req_cq_lock, flags); 3639 - cq_ctx_idx = irdma_sc_find_reg_cq(ceq, cq); 3640 - if (cq_ctx_idx == IRDMA_INVALID_CQ_IDX) 3641 - goto exit; 3642 - 3643 - ceq->reg_cq_size--; 3644 - if (cq_ctx_idx != ceq->reg_cq_size) 3645 - ceq->reg_cq[cq_ctx_idx] = ceq->reg_cq[ceq->reg_cq_size]; 3646 - ceq->reg_cq[ceq->reg_cq_size] = NULL; 3647 - 3648 - exit: 3649 - spin_unlock_irqrestore(&ceq->req_cq_lock, flags); 3650 - } 3651 - 3652 - /** 3653 3605 * irdma_sc_cqp_init - Initialize buffers for a control Queue Pair 3654 3606 * @cqp: IWARP control queue pair pointer 3655 3607 * @info: IWARP control queue pair init info pointer ··· 3868 3950 */ 3869 3951 void irdma_sc_ccq_arm(struct irdma_sc_cq *ccq) 3870 3952 { 3953 + unsigned long flags; 3871 3954 u64 temp_val; 3872 3955 u16 sw_cq_sel; 3873 3956 u8 arm_next_se; 3874 3957 u8 arm_seq_num; 3875 3958 3959 + spin_lock_irqsave(&ccq->dev->cqp_lock, flags); 3876 3960 get_64bit_val(ccq->cq_uk.shadow_area, 32, &temp_val); 3877 3961 sw_cq_sel = (u16)FIELD_GET(IRDMA_CQ_DBSA_SW_CQ_SELECT, temp_val); 3878 3962 arm_next_se = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_NEXT_SE, temp_val); ··· 3885 3965 FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT_SE, arm_next_se) | 3886 3966 FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT, 1); 3887 3967 set_64bit_val(ccq->cq_uk.shadow_area, 32, temp_val); 3968 + spin_unlock_irqrestore(&ccq->dev->cqp_lock, flags); 3888 3969 3889 3970 dma_wmb(); /* make sure shadow area is updated before arming */ 3890 3971 ··· 4308 4387 ceq->ceq_elem_pa = info->ceqe_pa; 4309 4388 ceq->virtual_map = info->virtual_map; 4310 4389 ceq->itr_no_expire = info->itr_no_expire; 4311 - ceq->reg_cq = info->reg_cq; 4312 - ceq->reg_cq_size = 0; 4313 - spin_lock_init(&ceq->req_cq_lock); 4314 4390 ceq->pbl_chunk_size = (ceq->virtual_map ? info->pbl_chunk_size : 0); 4315 4391 ceq->first_pm_pbl_idx = (ceq->virtual_map ? info->first_pm_pbl_idx : 0); 4316 4392 ceq->pbl_list = (ceq->virtual_map ? info->pbl_list : NULL); ··· 4390 4472 { 4391 4473 struct irdma_sc_cqp *cqp; 4392 4474 4393 - if (ceq->reg_cq) 4394 - irdma_sc_remove_cq_ctx(ceq, ceq->dev->ccq); 4395 - 4396 4475 cqp = ceq->dev->cqp; 4397 4476 cqp->process_cqp_sds = irdma_update_sds_noccq; 4398 4477 ··· 4408 4493 struct irdma_sc_dev *dev = ceq->dev; 4409 4494 4410 4495 dev->ccq->vsi_idx = ceq->vsi_idx; 4411 - if (ceq->reg_cq) { 4412 - ret_code = irdma_sc_add_cq_ctx(ceq, ceq->dev->ccq); 4413 - if (ret_code) 4414 - return ret_code; 4415 - } 4416 4496 4417 4497 ret_code = irdma_sc_ceq_create(ceq, scratch, true); 4418 4498 if (!ret_code) ··· 4472 4562 struct irdma_sc_cq *temp_cq; 4473 4563 u8 polarity; 4474 4564 u32 cq_idx; 4475 - unsigned long flags; 4476 4565 4477 4566 do { 4478 4567 cq_idx = 0; ··· 4492 4583 } 4493 4584 4494 4585 cq = temp_cq; 4495 - if (ceq->reg_cq) { 4496 - spin_lock_irqsave(&ceq->req_cq_lock, flags); 4497 - cq_idx = irdma_sc_find_reg_cq(ceq, cq); 4498 - spin_unlock_irqrestore(&ceq->req_cq_lock, flags); 4499 - } 4500 4586 4501 4587 IRDMA_RING_MOVE_TAIL(ceq->ceq_ring); 4502 4588 if (!IRDMA_RING_CURRENT_TAIL(ceq->ceq_ring)) ··· 4635 4731 u64 hdr; 4636 4732 4637 4733 dev = aeq->dev; 4638 - if (dev->privileged) 4734 + 4735 + if (dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2) 4639 4736 writel(0, dev->hw_regs[IRDMA_PFINT_AEQCTL]); 4640 4737 4641 4738 cqp = dev->cqp;
-3
drivers/infiniband/hw/irdma/hw.c
··· 2365 2365 2366 2366 cqp_info = &cqp_request->info; 2367 2367 info = &cqp_info->in.u.manage_apbvt_entry.info; 2368 - memset(info, 0, sizeof(*info)); 2369 2368 info->add = add_port; 2370 2369 info->port = accel_local_port; 2371 2370 cqp_info->cqp_cmd = IRDMA_OP_MANAGE_APBVT_ENTRY; ··· 2473 2474 if (action == IRDMA_ARP_ADD) { 2474 2475 cqp_info->cqp_cmd = IRDMA_OP_ADD_ARP_CACHE_ENTRY; 2475 2476 info = &cqp_info->in.u.add_arp_cache_entry.info; 2476 - memset(info, 0, sizeof(*info)); 2477 2477 info->arp_index = (u16)arp_index; 2478 2478 info->permanent = true; 2479 2479 ether_addr_copy(info->mac_addr, mac_addr); ··· 2531 2533 2532 2534 cqp_info = &cqp_request->info; 2533 2535 info = &cqp_info->in.u.manage_qhash_table_entry.info; 2534 - memset(info, 0, sizeof(*info)); 2535 2536 info->vsi = &iwdev->vsi; 2536 2537 info->manage = mtype; 2537 2538 info->entry_type = etype;
+5 -1
drivers/infiniband/hw/irdma/icrdma_if.c
··· 302 302 err_ctrl_init: 303 303 icrdma_deinit_interrupts(rf, cdev_info); 304 304 err_init_interrupts: 305 - kfree(iwdev->rf); 305 + mutex_destroy(&rf->ah_tbl_lock); 306 + kfree(rf); 306 307 ib_dealloc_device(&iwdev->ibdev); 307 308 308 309 return err; ··· 320 319 ice_rdma_update_vsi_filter(cdev_info, iwdev->vsi_num, false); 321 320 irdma_ib_unregister_device(iwdev); 322 321 icrdma_deinit_interrupts(iwdev->rf, cdev_info); 322 + mutex_destroy(&iwdev->rf->ah_tbl_lock); 323 + 324 + kfree(iwdev->rf); 323 325 324 326 pr_debug("INIT: Gen[%d] func[%d] device remove success\n", 325 327 rdma_ver, PCI_FUNC(cdev_info->pdev->devfn));
+4
drivers/infiniband/hw/irdma/ig3rdma_if.c
··· 55 55 ret = irdma_sc_vchnl_init(&rf->sc_dev, &virt_info); 56 56 if (ret) { 57 57 destroy_workqueue(rf->vchnl_wq); 58 + mutex_destroy(&rf->sc_dev.vchnl_mutex); 58 59 return ret; 59 60 } 60 61 ··· 125 124 { 126 125 struct irdma_hw *hw = &rf->hw; 127 126 127 + mutex_destroy(&rf->ah_tbl_lock); 128 128 destroy_workqueue(rf->vchnl_wq); 129 + mutex_destroy(&rf->sc_dev.vchnl_mutex); 129 130 kfree(hw->io_regs); 130 131 iounmap(hw->rdma_reg.addr); 131 132 } ··· 152 149 err = ig3rdma_cfg_regions(&rf->hw, cdev_info); 153 150 if (err) { 154 151 destroy_workqueue(rf->vchnl_wq); 152 + mutex_destroy(&rf->sc_dev.vchnl_mutex); 155 153 return err; 156 154 } 157 155
+1 -2
drivers/infiniband/hw/irdma/main.h
··· 556 556 u16 irdma_get_vlan_ipv4(u32 *addr); 557 557 void irdma_get_vlan_mac_ipv6(u32 *addr, u16 *vlan_id, u8 *mac); 558 558 struct ib_mr *irdma_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size, 559 - int acc, u64 *iova_start); 559 + int acc, u64 *iova_start, bool dma_mr); 560 560 int irdma_upload_qp_context(struct irdma_qp *iwqp, bool freeze, bool raw); 561 561 void irdma_cqp_ce_handler(struct irdma_pci_f *rf, struct irdma_sc_cq *cq); 562 562 int irdma_ah_cqp_op(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd, ··· 564 564 void (*callback_fcn)(struct irdma_cqp_request *cqp_request), 565 565 void *cb_param); 566 566 void irdma_gsi_ud_qp_ah_cb(struct irdma_cqp_request *cqp_request); 567 - bool irdma_cq_empty(struct irdma_cq *iwcq); 568 567 int irdma_inetaddr_event(struct notifier_block *notifier, unsigned long event, 569 568 void *ptr); 570 569 int irdma_inet6addr_event(struct notifier_block *notifier, unsigned long event,
+4 -2
drivers/infiniband/hw/irdma/pble.c
··· 506 506 void irdma_free_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, 507 507 struct irdma_pble_alloc *palloc) 508 508 { 509 - pble_rsrc->freedpbles += palloc->total_cnt; 510 - 511 509 if (palloc->level == PBLE_LEVEL_2) 512 510 free_lvl2(pble_rsrc, palloc); 513 511 else 514 512 irdma_prm_return_pbles(&pble_rsrc->pinfo, 515 513 &palloc->level1.chunkinfo); 514 + 515 + mutex_lock(&pble_rsrc->pble_mutex_lock); 516 + pble_rsrc->freedpbles += palloc->total_cnt; 516 517 pble_rsrc->stats_alloc_freed++; 518 + mutex_unlock(&pble_rsrc->pble_mutex_lock); 517 519 }
+2 -18
drivers/infiniband/hw/irdma/puda.c
··· 685 685 ukqp->rq_size = rsrc->rq_size; 686 686 687 687 IRDMA_RING_INIT(ukqp->sq_ring, ukqp->sq_size); 688 - IRDMA_RING_INIT(ukqp->initial_ring, ukqp->sq_size); 689 688 IRDMA_RING_INIT(ukqp->rq_ring, ukqp->rq_size); 690 689 ukqp->wqe_alloc_db = qp->pd->dev->wqe_alloc_db; 691 690 ··· 725 726 struct irdma_sc_cqp *cqp; 726 727 u64 hdr; 727 728 struct irdma_ccq_cqe_info compl_info; 728 - int status = 0; 729 729 730 730 cqp = dev->cqp; 731 731 wqe = irdma_sc_cqp_get_next_send_wqe(cqp, 0); ··· 754 756 print_hex_dump_debug("PUDA: PUDA CREATE CQ", DUMP_PREFIX_OFFSET, 16, 755 757 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false); 756 758 irdma_sc_cqp_post_sq(dev->cqp); 757 - status = irdma_sc_poll_for_cqp_op_done(dev->cqp, IRDMA_CQP_OP_CREATE_CQ, 758 - &compl_info); 759 - if (!status) { 760 - struct irdma_sc_ceq *ceq = dev->ceq[0]; 761 - 762 - if (ceq && ceq->reg_cq) 763 - status = irdma_sc_add_cq_ctx(ceq, cq); 764 - } 765 - 766 - return status; 759 + return irdma_sc_poll_for_cqp_op_done(dev->cqp, IRDMA_CQP_OP_CREATE_CQ, 760 + &compl_info); 767 761 } 768 762 769 763 /** ··· 887 897 struct irdma_puda_buf *buf = NULL; 888 898 struct irdma_puda_buf *nextbuf = NULL; 889 899 struct irdma_virt_mem *vmem; 890 - struct irdma_sc_ceq *ceq; 891 900 892 - ceq = vsi->dev->ceq[0]; 893 901 switch (type) { 894 902 case IRDMA_PUDA_RSRC_TYPE_ILQ: 895 903 rsrc = vsi->ilq; 896 904 vmem = &vsi->ilq_mem; 897 905 vsi->ilq = NULL; 898 - if (ceq && ceq->reg_cq) 899 - irdma_sc_remove_cq_ctx(ceq, &rsrc->cq); 900 906 break; 901 907 case IRDMA_PUDA_RSRC_TYPE_IEQ: 902 908 rsrc = vsi->ieq; 903 909 vmem = &vsi->ieq_mem; 904 910 vsi->ieq = NULL; 905 - if (ceq && ceq->reg_cq) 906 - irdma_sc_remove_cq_ctx(ceq, &rsrc->cq); 907 911 break; 908 912 default: 909 913 ibdev_dbg(to_ibdev(dev), "PUDA: error resource type = 0x%x\n",
-5
drivers/infiniband/hw/irdma/type.h
··· 492 492 u32 first_pm_pbl_idx; 493 493 u8 polarity; 494 494 u16 vsi_idx; 495 - struct irdma_sc_cq **reg_cq; 496 - u32 reg_cq_size; 497 - spinlock_t req_cq_lock; /* protect access to reg_cq array */ 498 495 bool virtual_map:1; 499 496 bool tph_en:1; 500 497 bool itr_no_expire:1; ··· 891 894 u8 tph_val; 892 895 u16 vsi_idx; 893 896 u32 first_pm_pbl_idx; 894 - struct irdma_sc_cq **reg_cq; 895 - u32 reg_cq_idx; 896 897 }; 897 898 898 899 struct irdma_aeq_init_info {
+36 -31
drivers/infiniband/hw/irdma/uk.c
··· 114 114 */ 115 115 void irdma_uk_qp_post_wr(struct irdma_qp_uk *qp) 116 116 { 117 - u64 temp; 118 - u32 hw_sq_tail; 119 - u32 sw_sq_head; 120 - 121 - /* valid bit is written and loads completed before reading shadow */ 122 - mb(); 123 - 124 - /* read the doorbell shadow area */ 125 - get_64bit_val(qp->shadow_area, 0, &temp); 126 - 127 - hw_sq_tail = (u32)FIELD_GET(IRDMA_QP_DBSA_HW_SQ_TAIL, temp); 128 - sw_sq_head = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); 129 - if (sw_sq_head != qp->initial_ring.head) { 130 - if (sw_sq_head != hw_sq_tail) { 131 - if (sw_sq_head > qp->initial_ring.head) { 132 - if (hw_sq_tail >= qp->initial_ring.head && 133 - hw_sq_tail < sw_sq_head) 134 - writel(qp->qp_id, qp->wqe_alloc_db); 135 - } else { 136 - if (hw_sq_tail >= qp->initial_ring.head || 137 - hw_sq_tail < sw_sq_head) 138 - writel(qp->qp_id, qp->wqe_alloc_db); 139 - } 140 - } 141 - } 142 - 143 - qp->initial_ring.head = qp->sq_ring.head; 117 + dma_wmb(); 118 + writel(qp->qp_id, qp->wqe_alloc_db); 144 119 } 145 120 146 121 /** ··· 169 194 qp->sq_wrtrk_array[*wqe_idx].wrid = info->wr_id; 170 195 qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size; 171 196 qp->sq_wrtrk_array[*wqe_idx].quanta = quanta; 197 + qp->sq_wrtrk_array[*wqe_idx].signaled = info->signaled; 172 198 173 199 return wqe; 174 200 } ··· 1113 1137 } 1114 1138 1115 1139 /** 1140 + * irdma_uk_cq_empty - Check if CQ is empty 1141 + * @cq: hw cq 1142 + */ 1143 + bool irdma_uk_cq_empty(struct irdma_cq_uk *cq) 1144 + { 1145 + __le64 *cqe; 1146 + u8 polarity; 1147 + u64 qword3; 1148 + 1149 + if (cq->avoid_mem_cflct) 1150 + cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(cq); 1151 + else 1152 + cqe = IRDMA_GET_CURRENT_CQ_ELEM(cq); 1153 + 1154 + get_64bit_val(cqe, 24, &qword3); 1155 + polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); 1156 + 1157 + return polarity != cq->polarity; 1158 + } 1159 + 1160 + /** 1116 1161 * irdma_uk_cq_poll_cmpl - get cq completion info 1117 1162 * @cq: hw cq 1118 1163 * @info: cq poll information returned ··· 1284 1287 info->op_type = (u8)FIELD_GET(IRDMACQ_OP, qword3); 1285 1288 1286 1289 if (info->q_type == IRDMA_CQE_QTYPE_RQ && is_srq) { 1290 + unsigned long flags; 1291 + 1287 1292 srq = qp->srq_uk; 1288 1293 1289 1294 get_64bit_val(cqe, 8, &info->wr_id); ··· 1298 1299 } else { 1299 1300 info->stag_invalid_set = false; 1300 1301 } 1302 + spin_lock_irqsave(srq->lock, flags); 1301 1303 IRDMA_RING_MOVE_TAIL(srq->srq_ring); 1304 + spin_unlock_irqrestore(srq->lock, flags); 1302 1305 pring = &srq->srq_ring; 1306 + 1303 1307 } else if (info->q_type == IRDMA_CQE_QTYPE_RQ && !is_srq) { 1304 1308 u32 array_idx; 1305 1309 ··· 1357 1355 info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; 1358 1356 if (!info->comp_status) 1359 1357 info->bytes_xfered = qp->sq_wrtrk_array[wqe_idx].wr_len; 1358 + if (!qp->sq_wrtrk_array[wqe_idx].signaled) { 1359 + ret_code = -EFAULT; 1360 + goto exit; 1361 + } 1360 1362 info->op_type = (u8)FIELD_GET(IRDMACQ_OP, qword3); 1361 1363 IRDMA_RING_SET_TAIL(qp->sq_ring, 1362 1364 wqe_idx + qp->sq_wrtrk_array[wqe_idx].quanta); ··· 1426 1420 IRDMA_RING_MOVE_TAIL(cq->cq_ring); 1427 1421 if (!cq->avoid_mem_cflct && ext_valid) 1428 1422 IRDMA_RING_MOVE_TAIL(cq->cq_ring); 1429 - set_64bit_val(cq->shadow_area, 0, 1430 - IRDMA_RING_CURRENT_HEAD(cq->cq_ring)); 1423 + if (IRDMA_RING_CURRENT_HEAD(cq->cq_ring) & 0x3F || irdma_uk_cq_empty(cq)) 1424 + set_64bit_val(cq->shadow_area, 0, 1425 + IRDMA_RING_CURRENT_HEAD(cq->cq_ring)); 1431 1426 } else { 1432 1427 qword3 &= ~IRDMA_CQ_WQEIDX; 1433 1428 qword3 |= FIELD_PREP(IRDMA_CQ_WQEIDX, pring->tail); ··· 1581 1574 qp->conn_wqes = move_cnt; 1582 1575 IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, move_cnt); 1583 1576 IRDMA_RING_MOVE_TAIL_BY_COUNT(qp->sq_ring, move_cnt); 1584 - IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->initial_ring, move_cnt); 1585 1577 } 1586 1578 1587 1579 /** ··· 1725 1719 qp->max_sq_frag_cnt = info->max_sq_frag_cnt; 1726 1720 sq_ring_size = qp->sq_size << info->sq_shift; 1727 1721 IRDMA_RING_INIT(qp->sq_ring, sq_ring_size); 1728 - IRDMA_RING_INIT(qp->initial_ring, sq_ring_size); 1729 1722 if (info->first_sq_wq) { 1730 1723 irdma_setup_connection_wqes(qp, info); 1731 1724 qp->swqe_polarity = 1;
+4 -2
drivers/infiniband/hw/irdma/user.h
··· 429 429 struct irdma_bind_window *op_info); 430 430 }; 431 431 432 + bool irdma_uk_cq_empty(struct irdma_cq_uk *cq); 432 433 int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, 433 434 struct irdma_cq_poll_info *info); 434 435 void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq, ··· 457 456 struct irdma_uk_attrs *uk_attrs; 458 457 __le64 *shadow_area; 459 458 struct irdma_ring srq_ring; 460 - struct irdma_ring initial_ring; 461 459 u32 srq_id; 462 460 u32 srq_size; 463 461 u32 max_srq_frag_cnt; ··· 465 465 u8 wqe_size; 466 466 u8 wqe_size_multiplier; 467 467 u8 deferred_flag; 468 + spinlock_t *lock; 468 469 }; 469 470 470 471 struct irdma_srq_uk_init_info { ··· 483 482 u64 wrid; 484 483 u32 wr_len; 485 484 u16 quanta; 486 - u8 reserved[2]; 485 + u8 signaled; 486 + u8 reserved[1]; 487 487 }; 488 488 489 489 struct irdma_qp_quanta {
+19 -39
drivers/infiniband/hw/irdma/utils.c
··· 452 452 cqp_request->waiting = wait; 453 453 refcount_set(&cqp_request->refcnt, 1); 454 454 memset(&cqp_request->compl_info, 0, sizeof(cqp_request->compl_info)); 455 + memset(&cqp_request->info, 0, sizeof(cqp_request->info)); 455 456 456 457 return cqp_request; 457 458 } ··· 1069 1068 1070 1069 cqp_info = &cqp_request->info; 1071 1070 qp_info = &cqp_request->info.in.u.qp_create.info; 1072 - memset(qp_info, 0, sizeof(*qp_info)); 1073 1071 qp_info->cq_num_valid = true; 1074 1072 qp_info->next_iwarp_state = IRDMA_QP_STATE_RTS; 1075 1073 cqp_info->cqp_cmd = IRDMA_OP_QP_CREATE; ··· 1343 1343 return -ENOMEM; 1344 1344 1345 1345 cqp_info = &cqp_request->info; 1346 - memset(cqp_info, 0, sizeof(*cqp_info)); 1347 1346 cqp_info->cqp_cmd = IRDMA_OP_QP_DESTROY; 1348 1347 cqp_info->post_sq = 1; 1349 1348 cqp_info->in.u.qp_destroy.qp = qp; ··· 1748 1749 return -ENOMEM; 1749 1750 1750 1751 cqp_info = &cqp_request->info; 1751 - memset(cqp_info, 0, sizeof(*cqp_info)); 1752 1752 cqp_info->cqp_cmd = IRDMA_OP_STATS_GATHER; 1753 1753 cqp_info->post_sq = 1; 1754 1754 cqp_info->in.u.stats_gather.info = pestat->gather_info; ··· 1787 1789 return -ENOMEM; 1788 1790 1789 1791 cqp_info = &cqp_request->info; 1790 - memset(cqp_info, 0, sizeof(*cqp_info)); 1791 1792 cqp_info->cqp_cmd = cmd; 1792 1793 cqp_info->post_sq = 1; 1793 1794 cqp_info->in.u.stats_manage.info = *stats_info; ··· 1887 1890 return -ENOMEM; 1888 1891 1889 1892 cqp_info = &cqp_request->info; 1890 - memset(cqp_info, 0, sizeof(*cqp_info)); 1891 1893 cqp_info->cqp_cmd = cmd; 1892 1894 cqp_info->post_sq = 1; 1893 1895 cqp_info->in.u.ws_node.info = *node_info; ··· 2353 2357 iwqp->ibqp.event_handler(&ibevent, iwqp->ibqp.qp_context); 2354 2358 } 2355 2359 2356 - bool irdma_cq_empty(struct irdma_cq *iwcq) 2357 - { 2358 - struct irdma_cq_uk *ukcq; 2359 - u64 qword3; 2360 - __le64 *cqe; 2361 - u8 polarity; 2362 - 2363 - ukcq = &iwcq->sc_cq.cq_uk; 2364 - if (ukcq->avoid_mem_cflct) 2365 - cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(ukcq); 2366 - else 2367 - cqe = IRDMA_GET_CURRENT_CQ_ELEM(ukcq); 2368 - get_64bit_val(cqe, 24, &qword3); 2369 - polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); 2370 - 2371 - return polarity != ukcq->polarity; 2372 - } 2373 - 2374 2360 void irdma_remove_cmpls_list(struct irdma_cq *iwcq) 2375 2361 { 2376 2362 struct irdma_cmpl_gen *cmpl_node; ··· 2414 2436 struct irdma_qp_uk *qp = &iwqp->sc_qp.qp_uk; 2415 2437 struct irdma_ring *sq_ring = &qp->sq_ring; 2416 2438 struct irdma_ring *rq_ring = &qp->rq_ring; 2439 + struct irdma_cq *iwscq = iwqp->iwscq; 2440 + struct irdma_cq *iwrcq = iwqp->iwrcq; 2417 2441 struct irdma_cmpl_gen *cmpl; 2418 2442 __le64 *sw_wqe; 2419 2443 u64 wqe_qword; ··· 2423 2443 bool compl_generated = false; 2424 2444 unsigned long flags1; 2425 2445 2426 - spin_lock_irqsave(&iwqp->iwscq->lock, flags1); 2427 - if (irdma_cq_empty(iwqp->iwscq)) { 2446 + spin_lock_irqsave(&iwscq->lock, flags1); 2447 + if (irdma_uk_cq_empty(&iwscq->sc_cq.cq_uk)) { 2428 2448 unsigned long flags2; 2429 2449 2430 2450 spin_lock_irqsave(&iwqp->lock, flags2); ··· 2432 2452 cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC); 2433 2453 if (!cmpl) { 2434 2454 spin_unlock_irqrestore(&iwqp->lock, flags2); 2435 - spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1); 2455 + spin_unlock_irqrestore(&iwscq->lock, flags1); 2436 2456 return; 2437 2457 } 2438 2458 ··· 2451 2471 kfree(cmpl); 2452 2472 continue; 2453 2473 } 2454 - ibdev_dbg(iwqp->iwscq->ibcq.device, 2474 + ibdev_dbg(iwscq->ibcq.device, 2455 2475 "DEV: %s: adding wr_id = 0x%llx SQ Completion to list qp_id=%d\n", 2456 2476 __func__, cmpl->cpi.wr_id, qp->qp_id); 2457 - list_add_tail(&cmpl->list, &iwqp->iwscq->cmpl_generated); 2477 + list_add_tail(&cmpl->list, &iwscq->cmpl_generated); 2458 2478 compl_generated = true; 2459 2479 } 2460 2480 spin_unlock_irqrestore(&iwqp->lock, flags2); 2461 - spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1); 2481 + spin_unlock_irqrestore(&iwscq->lock, flags1); 2462 2482 if (compl_generated) 2463 - irdma_comp_handler(iwqp->iwscq); 2483 + irdma_comp_handler(iwscq); 2464 2484 } else { 2465 - spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1); 2485 + spin_unlock_irqrestore(&iwscq->lock, flags1); 2466 2486 mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush, 2467 2487 msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS)); 2468 2488 } 2469 2489 2470 - spin_lock_irqsave(&iwqp->iwrcq->lock, flags1); 2471 - if (irdma_cq_empty(iwqp->iwrcq)) { 2490 + spin_lock_irqsave(&iwrcq->lock, flags1); 2491 + if (irdma_uk_cq_empty(&iwrcq->sc_cq.cq_uk)) { 2472 2492 unsigned long flags2; 2473 2493 2474 2494 spin_lock_irqsave(&iwqp->lock, flags2); ··· 2476 2496 cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC); 2477 2497 if (!cmpl) { 2478 2498 spin_unlock_irqrestore(&iwqp->lock, flags2); 2479 - spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1); 2499 + spin_unlock_irqrestore(&iwrcq->lock, flags1); 2480 2500 return; 2481 2501 } 2482 2502 ··· 2488 2508 cmpl->cpi.q_type = IRDMA_CQE_QTYPE_RQ; 2489 2509 /* remove the RQ WR by moving RQ tail */ 2490 2510 IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1); 2491 - ibdev_dbg(iwqp->iwrcq->ibcq.device, 2511 + ibdev_dbg(iwrcq->ibcq.device, 2492 2512 "DEV: %s: adding wr_id = 0x%llx RQ Completion to list qp_id=%d, wqe_idx=%d\n", 2493 2513 __func__, cmpl->cpi.wr_id, qp->qp_id, 2494 2514 wqe_idx); 2495 - list_add_tail(&cmpl->list, &iwqp->iwrcq->cmpl_generated); 2515 + list_add_tail(&cmpl->list, &iwrcq->cmpl_generated); 2496 2516 2497 2517 compl_generated = true; 2498 2518 } 2499 2519 spin_unlock_irqrestore(&iwqp->lock, flags2); 2500 - spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1); 2520 + spin_unlock_irqrestore(&iwrcq->lock, flags1); 2501 2521 if (compl_generated) 2502 - irdma_comp_handler(iwqp->iwrcq); 2522 + irdma_comp_handler(iwrcq); 2503 2523 } else { 2504 - spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1); 2524 + spin_unlock_irqrestore(&iwrcq->lock, flags1); 2505 2525 mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush, 2506 2526 msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS)); 2507 2527 }
+30 -19
drivers/infiniband/hw/irdma/verbs.c
··· 27 27 irdma_fw_minor_ver(&rf->sc_dev); 28 28 props->device_cap_flags = IB_DEVICE_MEM_WINDOW | 29 29 IB_DEVICE_MEM_MGT_EXTENSIONS; 30 - props->kernel_cap_flags = IBK_LOCAL_DMA_LKEY; 30 + if (hw_attrs->uk_attrs.hw_rev < IRDMA_GEN_3) 31 + props->kernel_cap_flags = IBK_LOCAL_DMA_LKEY; 31 32 props->vendor_id = pcidev->vendor; 32 33 props->vendor_part_id = pcidev->device; 33 34 ··· 772 771 773 772 cqp_info = &cqp_request->info; 774 773 qp_info = &cqp_request->info.in.u.qp_create.info; 775 - memset(qp_info, 0, sizeof(*qp_info)); 776 774 qp_info->mac_valid = true; 777 775 qp_info->cq_num_valid = true; 778 776 qp_info->next_iwarp_state = IRDMA_QP_STATE_IDLE; ··· 2029 2029 struct irdma_pci_f *rf; 2030 2030 struct irdma_cq_buf *cq_buf = NULL; 2031 2031 unsigned long flags; 2032 + u8 cqe_size; 2032 2033 int ret; 2033 2034 2034 2035 iwdev = to_iwdev(ibcq->device); ··· 2046 2045 return -EINVAL; 2047 2046 2048 2047 if (!iwcq->user_mode) { 2049 - entries++; 2048 + entries += 2; 2050 2049 2051 2050 if (!iwcq->sc_cq.cq_uk.avoid_mem_cflct && 2052 2051 dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) ··· 2054 2053 2055 2054 if (entries & 1) 2056 2055 entries += 1; /* cq size must be an even number */ 2056 + 2057 + cqe_size = iwcq->sc_cq.cq_uk.avoid_mem_cflct ? 64 : 32; 2058 + if (entries * cqe_size == IRDMA_HW_PAGE_SIZE) 2059 + entries += 2; 2057 2060 } 2058 2061 2059 2062 info.cq_size = max(entries, 4); ··· 2311 2306 ukinfo->srq_size = depth >> shift; 2312 2307 ukinfo->shadow_area = mem->va + ring_size; 2313 2308 2314 - info->shadow_area_pa = info->srq_pa + ring_size; 2315 2309 info->srq_pa = mem->pa; 2310 + info->shadow_area_pa = info->srq_pa + ring_size; 2316 2311 2317 2312 return 0; 2318 2313 } ··· 2389 2384 info.vsi = &iwdev->vsi; 2390 2385 info.pd = &iwpd->sc_pd; 2391 2386 2387 + iwsrq->sc_srq.srq_uk.lock = &iwsrq->lock; 2392 2388 err_code = irdma_sc_srq_init(&iwsrq->sc_srq, &info); 2393 2389 if (err_code) 2394 2390 goto free_dmem; ··· 2489 2483 int err_code; 2490 2484 int entries = attr->cqe; 2491 2485 bool cqe_64byte_ena; 2486 + u8 cqe_size; 2492 2487 2493 2488 err_code = cq_validate_flags(attr->flags, dev->hw_attrs.uk_attrs.hw_rev); 2494 2489 if (err_code) ··· 2516 2509 ukinfo->cq_id = cq_num; 2517 2510 cqe_64byte_ena = dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_64_BYTE_CQE ? 2518 2511 true : false; 2512 + cqe_size = cqe_64byte_ena ? 64 : 32; 2519 2513 ukinfo->avoid_mem_cflct = cqe_64byte_ena; 2520 2514 iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size; 2521 2515 if (attr->comp_vector < rf->ceqs_count) ··· 2589 2581 goto cq_free_rsrc; 2590 2582 } 2591 2583 2592 - entries++; 2584 + entries += 2; 2593 2585 if (!cqe_64byte_ena && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) 2594 2586 entries *= 2; 2595 2587 2596 2588 if (entries & 1) 2597 2589 entries += 1; /* cq size must be an even number */ 2590 + 2591 + if (entries * cqe_size == IRDMA_HW_PAGE_SIZE) 2592 + entries += 2; 2598 2593 2599 2594 ukinfo->cq_size = entries; 2600 2595 ··· 3114 3103 3115 3104 cqp_info = &cqp_request->info; 3116 3105 info = &cqp_info->in.u.alloc_stag.info; 3117 - memset(info, 0, sizeof(*info)); 3118 3106 info->page_size = PAGE_SIZE; 3119 3107 info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S; 3120 3108 info->pd_id = iwpd->sc_pd.pd_id; 3121 3109 info->total_len = iwmr->len; 3122 - info->all_memory = pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY; 3123 3110 info->remote_access = true; 3124 3111 cqp_info->cqp_cmd = IRDMA_OP_ALLOC_STAG; 3125 3112 cqp_info->post_sq = 1; ··· 3128 3119 if (status) 3129 3120 return status; 3130 3121 3131 - iwmr->is_hwreg = 1; 3122 + iwmr->is_hwreg = true; 3132 3123 return 0; 3133 3124 } 3134 3125 ··· 3262 3253 3263 3254 cqp_info = &cqp_request->info; 3264 3255 stag_info = &cqp_info->in.u.mr_reg_non_shared.info; 3265 - memset(stag_info, 0, sizeof(*stag_info)); 3266 3256 stag_info->va = iwpbl->user_base; 3267 3257 stag_info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S; 3268 3258 stag_info->stag_key = (u8)iwmr->stag; ··· 3271 3263 if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_ATOMIC_OPS) 3272 3264 stag_info->remote_atomics_en = (access & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; 3273 3265 stag_info->pd_id = iwpd->sc_pd.pd_id; 3274 - stag_info->all_memory = pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY; 3266 + stag_info->all_memory = iwmr->dma_mr; 3275 3267 if (stag_info->access_rights & IRDMA_ACCESS_FLAGS_ZERO_BASED) 3276 3268 stag_info->addr_type = IRDMA_ADDR_TYPE_ZERO_BASED; 3277 3269 else ··· 3298 3290 irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); 3299 3291 3300 3292 if (!ret) 3301 - iwmr->is_hwreg = 1; 3293 + iwmr->is_hwreg = true; 3302 3294 3303 3295 return ret; 3304 3296 } ··· 3655 3647 3656 3648 cqp_info = &cqp_request->info; 3657 3649 info = &cqp_info->in.u.dealloc_stag.info; 3658 - memset(info, 0, sizeof(*info)); 3659 3650 info->pd_id = iwpd->sc_pd.pd_id; 3660 3651 info->stag_idx = ib_mr->rkey >> IRDMA_CQPSQ_STAG_IDX_S; 3661 3652 info->mr = true; ··· 3670 3663 if (status) 3671 3664 return status; 3672 3665 3673 - iwmr->is_hwreg = 0; 3666 + iwmr->is_hwreg = false; 3674 3667 return 0; 3675 3668 } 3676 3669 ··· 3793 3786 * @size: size of memory to register 3794 3787 * @access: Access rights 3795 3788 * @iova_start: start of virtual address for physical buffers 3789 + * @dma_mr: Flag indicating whether this region is a PD DMA MR 3796 3790 */ 3797 3791 struct ib_mr *irdma_reg_phys_mr(struct ib_pd *pd, u64 addr, u64 size, int access, 3798 - u64 *iova_start) 3792 + u64 *iova_start, bool dma_mr) 3799 3793 { 3800 3794 struct irdma_device *iwdev = to_iwdev(pd->device); 3801 3795 struct irdma_pbl *iwpbl; ··· 3813 3805 iwpbl = &iwmr->iwpbl; 3814 3806 iwpbl->iwmr = iwmr; 3815 3807 iwmr->type = IRDMA_MEMREG_TYPE_MEM; 3808 + iwmr->dma_mr = dma_mr; 3816 3809 iwpbl->user_base = *iova_start; 3817 3810 stag = irdma_create_stag(iwdev); 3818 3811 if (!stag) { ··· 3852 3843 { 3853 3844 u64 kva = 0; 3854 3845 3855 - return irdma_reg_phys_mr(pd, 0, 0, acc, &kva); 3846 + return irdma_reg_phys_mr(pd, 0, 0, acc, &kva, true); 3856 3847 } 3857 3848 3858 3849 /** ··· 4087 4078 break; 4088 4079 case IB_WR_LOCAL_INV: 4089 4080 info.op_type = IRDMA_OP_TYPE_INV_STAG; 4090 - info.local_fence = info.read_fence; 4081 + info.local_fence = true; 4091 4082 info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey; 4092 4083 err = irdma_uk_stag_local_invalidate(ukqp, &info, true); 4093 4084 break; ··· 4514 4505 } 4515 4506 4516 4507 if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && 4517 - (!irdma_cq_empty(iwcq) || !list_empty(&iwcq->cmpl_generated))) 4508 + (!irdma_uk_cq_empty(ukcq) || !list_empty(&iwcq->cmpl_generated))) 4518 4509 ret = 1; 4519 4510 spin_unlock_irqrestore(&iwcq->lock, flags); 4520 4511 ··· 5213 5204 struct irdma_ah *parent_ah; 5214 5205 int err; 5215 5206 5216 - if (udata && udata->outlen < IRDMA_CREATE_AH_MIN_RESP_LEN) 5207 + if (udata->outlen < IRDMA_CREATE_AH_MIN_RESP_LEN) 5217 5208 return -EINVAL; 5218 5209 5219 5210 err = irdma_setup_ah(ibah, attr); ··· 5509 5500 irdma_rt_deinit_hw(iwdev); 5510 5501 if (!iwdev->is_vport) { 5511 5502 irdma_ctrl_deinit_hw(iwdev->rf); 5512 - if (iwdev->rf->vchnl_wq) 5503 + if (iwdev->rf->vchnl_wq) { 5513 5504 destroy_workqueue(iwdev->rf->vchnl_wq); 5505 + mutex_destroy(&iwdev->rf->sc_dev.vchnl_mutex); 5506 + } 5514 5507 } 5515 5508 }
+2 -1
drivers/infiniband/hw/irdma/verbs.h
··· 111 111 }; 112 112 struct ib_umem *region; 113 113 int access; 114 - u8 is_hwreg; 114 + bool is_hwreg:1; 115 + bool dma_mr:1; 115 116 u16 type; 116 117 u32 page_cnt; 117 118 u64 page_size;
+1 -1
drivers/infiniband/hw/mlx4/cm.c
··· 591 591 592 592 int mlx4_ib_cm_init(void) 593 593 { 594 - cm_wq = alloc_workqueue("mlx4_ib_cm", 0, 0); 594 + cm_wq = alloc_workqueue("mlx4_ib_cm", WQ_PERCPU, 0); 595 595 if (!cm_wq) 596 596 return -ENOMEM; 597 597
+14
drivers/infiniband/hw/mlx5/devx.c
··· 1225 1225 MLX5_GET(create_flow_table_in, in, other_vport)); 1226 1226 MLX5_SET(destroy_flow_table_in, din, vport_number, 1227 1227 MLX5_GET(create_flow_table_in, in, vport_number)); 1228 + MLX5_SET(destroy_flow_table_in, din, other_eswitch, 1229 + MLX5_GET(create_flow_table_in, in, other_eswitch)); 1230 + MLX5_SET(destroy_flow_table_in, din, eswitch_owner_vhca_id, 1231 + MLX5_GET(create_flow_table_in, in, 1232 + eswitch_owner_vhca_id)); 1228 1233 MLX5_SET(destroy_flow_table_in, din, table_type, 1229 1234 MLX5_GET(create_flow_table_in, in, table_type)); 1230 1235 MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id); ··· 1242 1237 MLX5_GET(create_flow_group_in, in, other_vport)); 1243 1238 MLX5_SET(destroy_flow_group_in, din, vport_number, 1244 1239 MLX5_GET(create_flow_group_in, in, vport_number)); 1240 + MLX5_SET(destroy_flow_group_in, din, other_eswitch, 1241 + MLX5_GET(create_flow_group_in, in, other_eswitch)); 1242 + MLX5_SET(destroy_flow_group_in, din, eswitch_owner_vhca_id, 1243 + MLX5_GET(create_flow_group_in, in, 1244 + eswitch_owner_vhca_id)); 1245 1245 MLX5_SET(destroy_flow_group_in, din, table_type, 1246 1246 MLX5_GET(create_flow_group_in, in, table_type)); 1247 1247 MLX5_SET(destroy_flow_group_in, din, table_id, ··· 1261 1251 MLX5_GET(set_fte_in, in, other_vport)); 1262 1252 MLX5_SET(delete_fte_in, din, vport_number, 1263 1253 MLX5_GET(set_fte_in, in, vport_number)); 1254 + MLX5_SET(delete_fte_in, din, other_eswitch, 1255 + MLX5_GET(set_fte_in, in, other_eswitch)); 1256 + MLX5_SET(delete_fte_in, din, eswitch_owner_vhca_id, 1257 + MLX5_GET(set_fte_in, in, eswitch_owner_vhca_id)); 1264 1258 MLX5_SET(delete_fte_in, din, table_type, 1265 1259 MLX5_GET(set_fte_in, in, table_type)); 1266 1260 MLX5_SET(delete_fte_in, din, table_id,
+40 -25
drivers/infiniband/hw/mlx5/fs.c
··· 691 691 return MLX5_CAP_GEN(dev->mdev, shared_object_to_user_object_allowed); 692 692 } 693 693 694 - static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev, 695 - struct mlx5_flow_namespace *ns, 694 + static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns, 696 695 struct mlx5_ib_flow_prio *prio, 697 - int priority, 698 - int num_entries, int num_groups, 699 - u32 flags, u16 vport) 696 + struct mlx5_flow_table_attr *ft_attr) 700 697 { 701 - struct mlx5_flow_table_attr ft_attr = {}; 702 698 struct mlx5_flow_table *ft; 703 699 704 - ft_attr.prio = priority; 705 - ft_attr.max_fte = num_entries; 706 - ft_attr.flags = flags; 707 - ft_attr.vport = vport; 708 - ft_attr.autogroup.max_num_groups = num_groups; 709 - ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); 700 + ft = mlx5_create_auto_grouped_flow_table(ns, ft_attr); 710 701 if (IS_ERR(ft)) 711 702 return ERR_CAST(ft); 712 703 ··· 711 720 enum flow_table_type ft_type) 712 721 { 713 722 bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; 723 + struct mlx5_flow_table_attr ft_attr = {}; 714 724 struct mlx5_flow_namespace *ns = NULL; 715 725 enum mlx5_flow_namespace_type fn_type; 716 726 struct mlx5_ib_flow_prio *prio; ··· 789 797 max_table_size = min_t(int, num_entries, max_table_size); 790 798 791 799 ft = prio->flow_table; 792 - if (!ft) 793 - return _get_prio(dev, ns, prio, priority, max_table_size, 794 - num_groups, flags, 0); 800 + if (ft) 801 + return prio; 795 802 796 - return prio; 803 + ft_attr.prio = priority; 804 + ft_attr.max_fte = max_table_size; 805 + ft_attr.flags = flags; 806 + ft_attr.autogroup.max_num_groups = num_groups; 807 + return _get_prio(ns, prio, &ft_attr); 797 808 } 798 809 799 810 enum { ··· 945 950 enum mlx5_ib_optional_counter_type type) 946 951 { 947 952 enum mlx5_ib_optional_counter_type per_qp_type; 953 + struct mlx5_flow_table_attr ft_attr = {}; 948 954 enum mlx5_flow_namespace_type fn_type; 949 955 struct mlx5_flow_namespace *ns; 950 956 struct mlx5_ib_flow_prio *prio; ··· 999 1003 if (prio->flow_table) 1000 1004 return 0; 1001 1005 1002 - prio = _get_prio(dev, ns, prio, priority, MLX5_FS_MAX_POOL_SIZE, 1, 0, 0); 1006 + ft_attr.prio = priority; 1007 + ft_attr.max_fte = MLX5_FS_MAX_POOL_SIZE; 1008 + ft_attr.autogroup.max_num_groups = 1; 1009 + prio = _get_prio(ns, prio, &ft_attr); 1003 1010 if (IS_ERR(prio)) 1004 1011 return PTR_ERR(prio); 1005 1012 ··· 1222 1223 struct mlx5_ib_op_fc *opfc, 1223 1224 enum mlx5_ib_optional_counter_type type) 1224 1225 { 1226 + struct mlx5_flow_table_attr ft_attr = {}; 1225 1227 enum mlx5_flow_namespace_type fn_type; 1226 1228 int priority, i, err, spec_num; 1227 1229 struct mlx5_flow_act flow_act = {}; ··· 1304 1304 if (err) 1305 1305 goto free; 1306 1306 1307 - prio = _get_prio(dev, ns, prio, priority, 1308 - dev->num_ports * MAX_OPFC_RULES, 1, 0, 0); 1307 + ft_attr.prio = priority; 1308 + ft_attr.max_fte = dev->num_ports * MAX_OPFC_RULES; 1309 + ft_attr.autogroup.max_num_groups = 1; 1310 + prio = _get_prio(ns, prio, &ft_attr); 1309 1311 if (IS_ERR(prio)) { 1310 1312 err = PTR_ERR(prio); 1311 1313 goto put_prio; ··· 1874 1872 u32 *flags, u16 *vport_idx, 1875 1873 u16 *vport, 1876 1874 struct mlx5_core_dev **ft_mdev, 1877 - u32 ib_port) 1875 + u32 ib_port, u16 *esw_owner_vhca_id) 1878 1876 { 1879 1877 struct mlx5_core_dev *esw_mdev; 1880 1878 ··· 1888 1886 return -EINVAL; 1889 1887 1890 1888 esw_mdev = mlx5_eswitch_get_core_dev(dev->port[ib_port - 1].rep->esw); 1891 - if (esw_mdev != dev->mdev) 1892 - return -EOPNOTSUPP; 1889 + if (esw_mdev != dev->mdev) { 1890 + if (!MLX5_CAP_ADV_RDMA(dev->mdev, 1891 + rdma_transport_manager_other_eswitch)) 1892 + return -EOPNOTSUPP; 1893 + *flags |= MLX5_FLOW_TABLE_OTHER_ESWITCH; 1894 + *esw_owner_vhca_id = MLX5_CAP_GEN(esw_mdev, vhca_id); 1895 + } 1893 1896 1894 1897 *flags |= MLX5_FLOW_TABLE_OTHER_VPORT; 1895 1898 *ft_mdev = esw_mdev; ··· 1910 1903 bool mcast, u32 ib_port) 1911 1904 { 1912 1905 struct mlx5_core_dev *ft_mdev = dev->mdev; 1906 + struct mlx5_flow_table_attr ft_attr = {}; 1913 1907 struct mlx5_flow_namespace *ns = NULL; 1914 1908 struct mlx5_ib_flow_prio *prio = NULL; 1909 + u16 esw_owner_vhca_id = 0; 1915 1910 int max_table_size = 0; 1916 1911 u16 vport_idx = 0; 1917 1912 bool esw_encap; ··· 1975 1966 return ERR_PTR(-EINVAL); 1976 1967 ret = mlx5_ib_fill_transport_ns_info(dev, ns_type, &flags, 1977 1968 &vport_idx, &vport, 1978 - &ft_mdev, ib_port); 1969 + &ft_mdev, ib_port, 1970 + &esw_owner_vhca_id); 1979 1971 if (ret) 1980 1972 return ERR_PTR(ret); 1981 1973 ··· 2036 2026 if (prio->flow_table) 2037 2027 return prio; 2038 2028 2039 - return _get_prio(dev, ns, prio, priority, max_table_size, 2040 - MLX5_FS_MAX_TYPES, flags, vport); 2029 + ft_attr.prio = priority; 2030 + ft_attr.max_fte = max_table_size; 2031 + ft_attr.flags = flags; 2032 + ft_attr.vport = vport; 2033 + ft_attr.esw_owner_vhca_id = esw_owner_vhca_id; 2034 + ft_attr.autogroup.max_num_groups = MLX5_FS_MAX_TYPES; 2035 + return _get_prio(ns, prio, &ft_attr); 2041 2036 } 2042 2037 2043 2038 static struct mlx5_ib_flow_handler *
+72 -2
drivers/infiniband/hw/mlx5/ib_rep.c
··· 44 44 } 45 45 } 46 46 47 + static int mlx5_ib_set_owner_transport(struct mlx5_core_dev *cur_owner, 48 + struct mlx5_core_dev *new_owner) 49 + { 50 + int ret; 51 + 52 + if (!MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(cur_owner, ft_support) || 53 + !MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(cur_owner, ft_support)) 54 + return 0; 55 + 56 + if (!MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager) || 57 + !MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager_other_eswitch)) 58 + return 0; 59 + 60 + ret = mlx5_fs_set_root_dev(cur_owner, new_owner, 61 + FS_FT_RDMA_TRANSPORT_TX); 62 + if (ret) 63 + return ret; 64 + 65 + ret = mlx5_fs_set_root_dev(cur_owner, new_owner, 66 + FS_FT_RDMA_TRANSPORT_RX); 67 + if (ret) { 68 + mlx5_fs_set_root_dev(cur_owner, cur_owner, 69 + FS_FT_RDMA_TRANSPORT_TX); 70 + return ret; 71 + } 72 + 73 + return 0; 74 + } 75 + 76 + static void mlx5_ib_release_transport(struct mlx5_core_dev *dev) 77 + { 78 + struct mlx5_core_dev *peer_dev; 79 + int i, ret; 80 + 81 + mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) { 82 + ret = mlx5_ib_set_owner_transport(peer_dev, peer_dev); 83 + WARN_ON_ONCE(ret); 84 + } 85 + } 86 + 87 + static int mlx5_ib_take_transport(struct mlx5_core_dev *dev) 88 + { 89 + struct mlx5_core_dev *peer_dev; 90 + int ret; 91 + int i; 92 + 93 + mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) { 94 + ret = mlx5_ib_set_owner_transport(peer_dev, dev); 95 + if (ret) { 96 + mlx5_ib_release_transport(dev); 97 + return ret; 98 + } 99 + } 100 + 101 + return 0; 102 + } 103 + 47 104 static int 48 105 mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) 49 106 { ··· 145 88 else 146 89 return mlx5_ib_set_vport_rep(lag_master, rep, vport_index); 147 90 91 + if (mlx5_lag_is_shared_fdb(dev)) { 92 + ret = mlx5_ib_take_transport(lag_master); 93 + if (ret) 94 + return ret; 95 + } 96 + 148 97 ibdev = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev, 149 98 mlx5_core_net(lag_master)); 150 - if (!ibdev) 151 - return -ENOMEM; 99 + if (!ibdev) { 100 + ret = -ENOMEM; 101 + goto release_transport; 102 + } 152 103 153 104 ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port), 154 105 GFP_KERNEL); ··· 192 127 kfree(ibdev->port); 193 128 fail_port: 194 129 ib_dealloc_device(&ibdev->ib_dev); 130 + release_transport: 131 + if (mlx5_lag_is_shared_fdb(lag_master)) 132 + mlx5_ib_release_transport(lag_master); 133 + 195 134 return ret; 196 135 } 197 136 ··· 251 182 esw = peer_mdev->priv.eswitch; 252 183 mlx5_eswitch_unregister_vport_reps(esw, REP_IB); 253 184 } 185 + mlx5_ib_release_transport(mdev); 254 186 } 255 187 __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); 256 188 }
+4
drivers/infiniband/hw/mlx5/main.c
··· 511 511 *active_width = IB_WIDTH_4X; 512 512 *active_speed = IB_SPEED_XDR; 513 513 break; 514 + case MLX5E_PROT_MASK(MLX5E_1600TAUI_8_1600TBASE_CR8_KR8): 515 + *active_width = IB_WIDTH_8X; 516 + *active_speed = IB_SPEED_XDR; 517 + break; 514 518 default: 515 519 return -EINVAL; 516 520 }
+54 -41
drivers/infiniband/hw/mlx5/odp.c
··· 97 97 * a pagefault. */ 98 98 #define MMU_NOTIFIER_TIMEOUT 1000 99 99 100 - #define MLX5_IMR_MTT_BITS (30 - PAGE_SHIFT) 101 - #define MLX5_IMR_MTT_SHIFT (MLX5_IMR_MTT_BITS + PAGE_SHIFT) 102 - #define MLX5_IMR_MTT_ENTRIES BIT_ULL(MLX5_IMR_MTT_BITS) 103 - #define MLX5_IMR_MTT_SIZE BIT_ULL(MLX5_IMR_MTT_SHIFT) 104 - #define MLX5_IMR_MTT_MASK (~(MLX5_IMR_MTT_SIZE - 1)) 105 - 106 - #define MLX5_KSM_PAGE_SHIFT MLX5_IMR_MTT_SHIFT 107 - 108 100 static u64 mlx5_imr_ksm_entries; 101 + static u64 mlx5_imr_mtt_entries; 102 + static u64 mlx5_imr_mtt_size; 103 + static u8 mlx5_imr_mtt_shift; 104 + static u8 mlx5_imr_ksm_page_shift; 109 105 110 - static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, 106 + static void populate_ksm(struct mlx5_ksm *pksm, size_t idx, size_t nentries, 111 107 struct mlx5_ib_mr *imr, int flags) 112 108 { 113 109 struct mlx5_core_dev *dev = mr_to_mdev(imr)->mdev; 114 - struct mlx5_klm *end = pklm + nentries; 115 - int step = MLX5_CAP_ODP(dev, mem_page_fault) ? MLX5_IMR_MTT_SIZE : 0; 110 + struct mlx5_ksm *end = pksm + nentries; 111 + u64 step = MLX5_CAP_ODP(dev, mem_page_fault) ? mlx5_imr_mtt_size : 0; 116 112 __be32 key = MLX5_CAP_ODP(dev, mem_page_fault) ? 117 113 cpu_to_be32(imr->null_mmkey.key) : 118 114 mr_to_mdev(imr)->mkeys.null_mkey; 119 115 u64 va = 120 - MLX5_CAP_ODP(dev, mem_page_fault) ? idx * MLX5_IMR_MTT_SIZE : 0; 116 + MLX5_CAP_ODP(dev, mem_page_fault) ? idx * mlx5_imr_mtt_size : 0; 121 117 122 118 if (flags & MLX5_IB_UPD_XLT_ZAP) { 123 - for (; pklm != end; pklm++, idx++, va += step) { 124 - pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); 125 - pklm->key = key; 126 - pklm->va = cpu_to_be64(va); 119 + for (; pksm != end; pksm++, idx++, va += step) { 120 + pksm->key = key; 121 + pksm->va = cpu_to_be64(va); 127 122 } 128 123 return; 129 124 } ··· 142 147 */ 143 148 lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex); 144 149 145 - for (; pklm != end; pklm++, idx++, va += step) { 150 + for (; pksm != end; pksm++, idx++, va += step) { 146 151 struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); 147 152 148 - pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); 149 153 if (mtt) { 150 - pklm->key = cpu_to_be32(mtt->ibmr.lkey); 151 - pklm->va = cpu_to_be64(idx * MLX5_IMR_MTT_SIZE); 154 + pksm->key = cpu_to_be32(mtt->ibmr.lkey); 155 + pksm->va = cpu_to_be64(idx * mlx5_imr_mtt_size); 152 156 } else { 153 - pklm->key = key; 154 - pklm->va = cpu_to_be64(va); 157 + pksm->key = key; 158 + pksm->va = cpu_to_be64(va); 155 159 } 156 160 } 157 161 } ··· 195 201 struct mlx5_ib_mr *mr, int flags) 196 202 { 197 203 if (flags & MLX5_IB_UPD_XLT_INDIRECT) { 198 - populate_klm(xlt, idx, nentries, mr, flags); 204 + populate_ksm(xlt, idx, nentries, mr, flags); 199 205 return 0; 200 206 } else { 201 207 return populate_mtt(xlt, idx, nentries, mr, flags); ··· 220 226 221 227 mutex_lock(&odp_imr->umem_mutex); 222 228 mlx5r_umr_update_xlt(mr->parent, 223 - ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT, 1, 0, 229 + ib_umem_start(odp) >> mlx5_imr_mtt_shift, 1, 0, 224 230 MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); 225 231 mutex_unlock(&odp_imr->umem_mutex); 226 232 mlx5_ib_dereg_mr(&mr->ibmr, NULL); ··· 231 237 static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) 232 238 { 233 239 struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); 234 - unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; 240 + unsigned long idx = ib_umem_start(odp) >> mlx5_imr_mtt_shift; 235 241 struct mlx5_ib_mr *imr = mr->parent; 236 242 237 243 /* ··· 259 265 260 266 /* Freeing a MR is a sleeping operation, so bounce to a work queue */ 261 267 INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work); 262 - queue_work(system_unbound_wq, &mr->odp_destroy.work); 268 + queue_work(system_dfl_wq, &mr->odp_destroy.work); 263 269 } 264 270 265 271 static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, ··· 419 425 if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) && 420 426 MLX5_CAP_GEN(dev->mdev, null_mkey) && 421 427 MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) && 422 - !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled)) 428 + !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled) && 429 + mlx5_imr_ksm_entries != 0 && 430 + !(mlx5_imr_ksm_page_shift > 431 + get_max_log_entity_size_cap(dev, MLX5_MKC_ACCESS_MODE_KSM))) 423 432 caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT; 424 433 } 425 434 ··· 473 476 int err; 474 477 475 478 odp = ib_umem_odp_alloc_child(to_ib_umem_odp(imr->umem), 476 - idx * MLX5_IMR_MTT_SIZE, 477 - MLX5_IMR_MTT_SIZE, &mlx5_mn_ops); 479 + idx * mlx5_imr_mtt_size, 480 + mlx5_imr_mtt_size, &mlx5_mn_ops); 478 481 if (IS_ERR(odp)) 479 482 return ERR_CAST(odp); 480 483 481 484 mr = mlx5_mr_cache_alloc(dev, imr->access_flags, 482 485 MLX5_MKC_ACCESS_MODE_MTT, 483 - MLX5_IMR_MTT_ENTRIES); 486 + mlx5_imr_mtt_entries); 484 487 if (IS_ERR(mr)) { 485 488 ib_umem_odp_release(odp); 486 489 return mr; ··· 492 495 mr->umem = &odp->umem; 493 496 mr->ibmr.lkey = mr->mmkey.key; 494 497 mr->ibmr.rkey = mr->mmkey.key; 495 - mr->ibmr.iova = idx * MLX5_IMR_MTT_SIZE; 498 + mr->ibmr.iova = idx * mlx5_imr_mtt_size; 496 499 mr->parent = imr; 497 500 odp->private = mr; 498 501 ··· 503 506 refcount_set(&mr->mmkey.usecount, 2); 504 507 505 508 err = mlx5r_umr_update_xlt(mr, 0, 506 - MLX5_IMR_MTT_ENTRIES, 509 + mlx5_imr_mtt_entries, 507 510 PAGE_SHIFT, 508 511 MLX5_IB_UPD_XLT_ZAP | 509 512 MLX5_IB_UPD_XLT_ENABLE); ··· 608 611 struct mlx5_ib_mr *imr; 609 612 int err; 610 613 611 - if (!mlx5r_umr_can_load_pas(dev, MLX5_IMR_MTT_ENTRIES * PAGE_SIZE)) 614 + if (!mlx5r_umr_can_load_pas(dev, mlx5_imr_mtt_entries * PAGE_SIZE)) 612 615 return ERR_PTR(-EOPNOTSUPP); 613 616 614 617 umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags); ··· 644 647 645 648 err = mlx5r_umr_update_xlt(imr, 0, 646 649 mlx5_imr_ksm_entries, 647 - MLX5_KSM_PAGE_SHIFT, 650 + mlx5_imr_ksm_page_shift, 648 651 MLX5_IB_UPD_XLT_INDIRECT | 649 652 MLX5_IB_UPD_XLT_ZAP | 650 653 MLX5_IB_UPD_XLT_ENABLE); ··· 747 750 struct ib_umem_odp *odp_imr, u64 user_va, 748 751 size_t bcnt, u32 *bytes_mapped, u32 flags) 749 752 { 750 - unsigned long end_idx = (user_va + bcnt - 1) >> MLX5_IMR_MTT_SHIFT; 753 + unsigned long end_idx = (user_va + bcnt - 1) >> mlx5_imr_mtt_shift; 751 754 unsigned long upd_start_idx = end_idx + 1; 752 755 unsigned long upd_len = 0; 753 756 unsigned long npages = 0; 754 757 int err; 755 758 int ret; 756 759 757 - if (unlikely(user_va >= mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE || 758 - mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - user_va < bcnt)) 760 + if (unlikely(user_va >= mlx5_imr_ksm_entries * mlx5_imr_mtt_size || 761 + mlx5_imr_ksm_entries * mlx5_imr_mtt_size - user_va < bcnt)) 759 762 return -EFAULT; 760 763 761 764 /* Fault each child mr that intersects with our interval. */ 762 765 while (bcnt) { 763 - unsigned long idx = user_va >> MLX5_IMR_MTT_SHIFT; 766 + unsigned long idx = user_va >> mlx5_imr_mtt_shift; 764 767 struct ib_umem_odp *umem_odp; 765 768 struct mlx5_ib_mr *mtt; 766 769 u64 len; ··· 1921 1924 1922 1925 int mlx5_ib_odp_init(void) 1923 1926 { 1924 - mlx5_imr_ksm_entries = BIT_ULL(get_order(TASK_SIZE) - 1925 - MLX5_IMR_MTT_BITS); 1927 + u32 log_va_pages = ilog2(TASK_SIZE) - PAGE_SHIFT; 1928 + u8 mlx5_imr_mtt_bits; 1926 1929 1930 + /* 48 is default ARM64 VA space and covers X86 4-level paging which is 47 */ 1931 + if (log_va_pages <= 48 - PAGE_SHIFT) 1932 + mlx5_imr_mtt_shift = 30; 1933 + /* 56 is x86-64, 5-level paging */ 1934 + else if (log_va_pages <= 56 - PAGE_SHIFT) 1935 + mlx5_imr_mtt_shift = 34; 1936 + else 1937 + return 0; 1938 + 1939 + mlx5_imr_mtt_size = BIT_ULL(mlx5_imr_mtt_shift); 1940 + mlx5_imr_mtt_bits = mlx5_imr_mtt_shift - PAGE_SHIFT; 1941 + mlx5_imr_mtt_entries = BIT_ULL(mlx5_imr_mtt_bits); 1942 + mlx5_imr_ksm_entries = BIT_ULL(get_order(TASK_SIZE) - 1943 + mlx5_imr_mtt_bits); 1944 + 1945 + mlx5_imr_ksm_page_shift = mlx5_imr_mtt_shift; 1927 1946 return 0; 1928 1947 } 1929 1948 ··· 2106 2093 destroy_prefetch_work(work); 2107 2094 return rc; 2108 2095 } 2109 - queue_work(system_unbound_wq, &work->work); 2096 + queue_work(system_dfl_wq, &work->work); 2110 2097 return 0; 2111 2098 }
+3 -2
drivers/infiniband/hw/mlx5/qp.c
··· 3451 3451 { 3452 3452 u32 stat_rate_support; 3453 3453 3454 - if (rate == IB_RATE_PORT_CURRENT || rate == IB_RATE_800_GBPS) 3454 + if (rate == IB_RATE_PORT_CURRENT || rate == IB_RATE_800_GBPS || 3455 + rate == IB_RATE_1600_GBPS) 3455 3456 return 0; 3456 3457 3457 - if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_800_GBPS) 3458 + if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_1600_GBPS) 3458 3459 return -EINVAL; 3459 3460 3460 3461 stat_rate_support = MLX5_CAP_GEN(dev->mdev, stat_rate_support);
+2 -1
drivers/infiniband/sw/rdmavt/cq.c
··· 518 518 */ 519 519 int rvt_driver_cq_init(void) 520 520 { 521 - comp_vector_wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_CPU_INTENSIVE, 521 + comp_vector_wq = alloc_workqueue("%s", 522 + WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_PERCPU, 522 523 0, "rdmavt_cq"); 523 524 if (!comp_vector_wq) 524 525 return -ENOMEM;
-1
drivers/infiniband/sw/rxe/rxe_mr.c
··· 452 452 453 453 length -= bytes; 454 454 iova += bytes; 455 - page_offset = 0; 456 455 } 457 456 458 457 return 0;
+49
drivers/infiniband/sw/rxe/rxe_net.c
··· 20 20 21 21 static struct rxe_recv_sockets recv_sockets; 22 22 23 + #ifdef CONFIG_DEBUG_LOCK_ALLOC 24 + /* 25 + * lockdep can detect false positive circular dependencies 26 + * when there are user-space socket API users or in kernel 27 + * users switching between a tcp and rdma transport. 28 + * Maybe also switching between siw and rxe may cause 29 + * problems as per default sockets are only classified 30 + * by family and not by ip protocol. And there might 31 + * be different locks used between the application 32 + * and the low level sockets. 33 + * 34 + * Problems were seen with ksmbd.ko and cifs.ko, 35 + * switching transports, use git blame to find 36 + * more details. 37 + */ 38 + static struct lock_class_key rxe_recv_sk_key[2]; 39 + static struct lock_class_key rxe_recv_slock_key[2]; 40 + #endif /* CONFIG_DEBUG_LOCK_ALLOC */ 41 + 42 + static inline void rxe_reclassify_recv_socket(struct socket *sock) 43 + { 44 + #ifdef CONFIG_DEBUG_LOCK_ALLOC 45 + struct sock *sk = sock->sk; 46 + 47 + if (WARN_ON_ONCE(!sock_allow_reclassification(sk))) 48 + return; 49 + 50 + switch (sk->sk_family) { 51 + case AF_INET: 52 + sock_lock_init_class_and_name(sk, 53 + "slock-AF_INET-RDMA-RXE-RECV", 54 + &rxe_recv_slock_key[0], 55 + "sk_lock-AF_INET-RDMA-RXE-RECV", 56 + &rxe_recv_sk_key[0]); 57 + break; 58 + case AF_INET6: 59 + sock_lock_init_class_and_name(sk, 60 + "slock-AF_INET6-RDMA-RXE-RECV", 61 + &rxe_recv_slock_key[1], 62 + "sk_lock-AF_INET6-RDMA-RXE-RECV", 63 + &rxe_recv_sk_key[1]); 64 + break; 65 + default: 66 + WARN_ON_ONCE(1); 67 + } 68 + #endif /* CONFIG_DEBUG_LOCK_ALLOC */ 69 + } 70 + 23 71 static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, 24 72 struct net_device *ndev, 25 73 struct in_addr *saddr, ··· 240 192 err = udp_sock_create(net, &udp_cfg, &sock); 241 193 if (err < 0) 242 194 return ERR_PTR(err); 195 + rxe_reclassify_recv_socket(sock); 243 196 244 197 tnl_cfg.encap_type = 1; 245 198 tnl_cfg.encap_rcv = rxe_udp_encap_recv;
-1
drivers/infiniband/sw/rxe/rxe_odp.c
··· 358 358 359 359 length -= bytes; 360 360 iova += bytes; 361 - page_offset = 0; 362 361 } 363 362 364 363 mutex_unlock(&umem_odp->umem_mutex);
+49
drivers/infiniband/sw/rxe/rxe_qp.c
··· 15 15 #include "rxe_queue.h" 16 16 #include "rxe_task.h" 17 17 18 + #ifdef CONFIG_DEBUG_LOCK_ALLOC 19 + /* 20 + * lockdep can detect false positive circular dependencies 21 + * when there are user-space socket API users or in kernel 22 + * users switching between a tcp and rdma transport. 23 + * Maybe also switching between siw and rxe may cause 24 + * problems as per default sockets are only classified 25 + * by family and not by ip protocol. And there might 26 + * be different locks used between the application 27 + * and the low level sockets. 28 + * 29 + * Problems were seen with ksmbd.ko and cifs.ko, 30 + * switching transports, use git blame to find 31 + * more details. 32 + */ 33 + static struct lock_class_key rxe_send_sk_key[2]; 34 + static struct lock_class_key rxe_send_slock_key[2]; 35 + #endif /* CONFIG_DEBUG_LOCK_ALLOC */ 36 + 37 + static inline void rxe_reclassify_send_socket(struct socket *sock) 38 + { 39 + #ifdef CONFIG_DEBUG_LOCK_ALLOC 40 + struct sock *sk = sock->sk; 41 + 42 + if (WARN_ON_ONCE(!sock_allow_reclassification(sk))) 43 + return; 44 + 45 + switch (sk->sk_family) { 46 + case AF_INET: 47 + sock_lock_init_class_and_name(sk, 48 + "slock-AF_INET-RDMA-RXE-SEND", 49 + &rxe_send_slock_key[0], 50 + "sk_lock-AF_INET-RDMA-RXE-SEND", 51 + &rxe_send_sk_key[0]); 52 + break; 53 + case AF_INET6: 54 + sock_lock_init_class_and_name(sk, 55 + "slock-AF_INET6-RDMA-RXE-SEND", 56 + &rxe_send_slock_key[1], 57 + "sk_lock-AF_INET6-RDMA-RXE-SEND", 58 + &rxe_send_sk_key[1]); 59 + break; 60 + default: 61 + WARN_ON_ONCE(1); 62 + } 63 + #endif /* CONFIG_DEBUG_LOCK_ALLOC */ 64 + } 65 + 18 66 static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap, 19 67 int has_srq) 20 68 { ··· 292 244 err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk); 293 245 if (err < 0) 294 246 return err; 247 + rxe_reclassify_send_socket(qp->sk); 295 248 qp->sk->sk->sk_user_data = qp; 296 249 297 250 /* pick a source UDP port number for this QP based on
+1 -6
drivers/infiniband/sw/rxe/rxe_srq.c
··· 171 171 udata, mi, &srq->rq.producer_lock, 172 172 &srq->rq.consumer_lock); 173 173 if (err) 174 - goto err_free; 174 + return err; 175 175 176 176 srq->rq.max_wr = attr->max_wr; 177 177 } ··· 180 180 srq->limit = attr->srq_limit; 181 181 182 182 return 0; 183 - 184 - err_free: 185 - rxe_queue_cleanup(q); 186 - srq->rq.queue = NULL; 187 - return err; 188 183 } 189 184 190 185 void rxe_srq_cleanup(struct rxe_pool_elem *elem)
+51
drivers/infiniband/sw/siw/siw_cm.c
··· 39 39 static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason, 40 40 int status); 41 41 42 + 43 + #ifdef CONFIG_DEBUG_LOCK_ALLOC 44 + /* 45 + * lockdep can detect false positive circular dependencies 46 + * when there are user-space socket API users or in kernel 47 + * users switching between a tcp and rdma transport. 48 + * Maybe also switching between siw and rxe may cause 49 + * problems as per default sockets are only classified 50 + * by family and not by ip protocol. And there might 51 + * be different locks used between the application 52 + * and the low level sockets. 53 + * 54 + * Problems were seen with ksmbd.ko and cifs.ko, 55 + * switching transports, use git blame to find 56 + * more details. 57 + */ 58 + static struct lock_class_key siw_sk_key[2]; 59 + static struct lock_class_key siw_slock_key[2]; 60 + #endif /* CONFIG_DEBUG_LOCK_ALLOC */ 61 + 62 + static inline void siw_reclassify_socket(struct socket *sock) 63 + { 64 + #ifdef CONFIG_DEBUG_LOCK_ALLOC 65 + struct sock *sk = sock->sk; 66 + 67 + if (WARN_ON_ONCE(!sock_allow_reclassification(sk))) 68 + return; 69 + 70 + switch (sk->sk_family) { 71 + case AF_INET: 72 + sock_lock_init_class_and_name(sk, 73 + "slock-AF_INET-RDMA-SIW", 74 + &siw_slock_key[0], 75 + "sk_lock-AF_INET-RDMA-SIW", 76 + &siw_sk_key[0]); 77 + break; 78 + case AF_INET6: 79 + sock_lock_init_class_and_name(sk, 80 + "slock-AF_INET6-RDMA-SIW", 81 + &siw_slock_key[1], 82 + "sk_lock-AF_INET6-RDMA-SIW", 83 + &siw_sk_key[1]); 84 + break; 85 + default: 86 + WARN_ON_ONCE(1); 87 + } 88 + #endif /* CONFIG_DEBUG_LOCK_ALLOC */ 89 + } 90 + 42 91 static void siw_sk_assign_cm_upcalls(struct sock *sk) 43 92 { 44 93 struct siw_cep *cep = sk_to_cep(sk); ··· 1443 1394 rv = sock_create(v4 ? AF_INET : AF_INET6, SOCK_STREAM, IPPROTO_TCP, &s); 1444 1395 if (rv < 0) 1445 1396 goto error; 1397 + siw_reclassify_socket(s); 1446 1398 1447 1399 /* 1448 1400 * NOTE: For simplification, connect() is called in blocking ··· 1820 1770 rv = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s); 1821 1771 if (rv < 0) 1822 1772 return rv; 1773 + siw_reclassify_socket(s); 1823 1774 1824 1775 /* 1825 1776 * Allow binding local port when still in TIME_WAIT from last close.
+1 -1
drivers/infiniband/ulp/iser/iscsi_iser.c
··· 1029 1029 mutex_init(&ig.connlist_mutex); 1030 1030 INIT_LIST_HEAD(&ig.connlist); 1031 1031 1032 - release_wq = alloc_workqueue("release workqueue", 0, 0); 1032 + release_wq = alloc_workqueue("release workqueue", WQ_PERCPU, 0); 1033 1033 if (!release_wq) { 1034 1034 iser_err("failed to allocate release workqueue\n"); 1035 1035 err = -ENOMEM;
+1 -1
drivers/infiniband/ulp/isert/ib_isert.c
··· 2613 2613 2614 2614 static int __init isert_init(void) 2615 2615 { 2616 - isert_login_wq = alloc_workqueue("isert_login_wq", 0, 0); 2616 + isert_login_wq = alloc_workqueue("isert_login_wq", WQ_PERCPU, 0); 2617 2617 if (!isert_login_wq) { 2618 2618 isert_err("Unable to allocate isert_login_wq\n"); 2619 2619 return -ENOMEM;
+1 -1
drivers/infiniband/ulp/rtrs/rtrs-srv.c
··· 1450 1450 kfree(srv->chunks); 1451 1451 1452 1452 err_free_srv: 1453 - kfree(srv); 1453 + put_device(&srv->dev); 1454 1454 return ERR_PTR(-ENOMEM); 1455 1455 } 1456 1456
+2 -1
drivers/net/ethernet/broadcom/bnge/Makefile
··· 9 9 bnge_rmem.o \ 10 10 bnge_resc.o \ 11 11 bnge_netdev.o \ 12 - bnge_ethtool.o 12 + bnge_ethtool.o \ 13 + bnge_auxr.o
+10
drivers/net/ethernet/broadcom/bnge/bnge.h
··· 11 11 #include <linux/bnxt/hsi.h> 12 12 #include "bnge_rmem.h" 13 13 #include "bnge_resc.h" 14 + #include "bnge_auxr.h" 14 15 15 16 #define DRV_VER_MAJ 1 16 17 #define DRV_VER_MIN 15 ··· 21 20 22 21 enum board_idx { 23 22 BCM57708, 23 + }; 24 + 25 + struct bnge_auxr_priv { 26 + struct auxiliary_device aux_dev; 27 + struct bnge_auxr_dev *auxr_dev; 28 + int id; 24 29 }; 25 30 26 31 struct bnge_pf_info { ··· 204 197 205 198 struct bnge_irq *irq_tbl; 206 199 u16 irqs_acquired; 200 + 201 + struct bnge_auxr_priv *aux_priv; 202 + struct bnge_auxr_dev *auxr_dev; 207 203 }; 208 204 209 205 static inline bool bnge_is_roce_en(struct bnge_dev *bd)
+258
drivers/net/ethernet/broadcom/bnge/bnge_auxr.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (c) 2025 Broadcom. 3 + 4 + #include <linux/module.h> 5 + 6 + #include <linux/kernel.h> 7 + #include <linux/errno.h> 8 + #include <linux/interrupt.h> 9 + #include <linux/pci.h> 10 + #include <linux/netdevice.h> 11 + #include <linux/rtnetlink.h> 12 + #include <linux/bitops.h> 13 + #include <linux/irq.h> 14 + #include <asm/byteorder.h> 15 + #include <linux/bitmap.h> 16 + #include <linux/auxiliary_bus.h> 17 + #include <linux/bnxt/hsi.h> 18 + 19 + #include "bnge.h" 20 + #include "bnge_hwrm.h" 21 + #include "bnge_auxr.h" 22 + 23 + static DEFINE_IDA(bnge_aux_dev_ids); 24 + 25 + static void bnge_fill_msix_vecs(struct bnge_dev *bd, 26 + struct bnge_msix_info *info) 27 + { 28 + struct bnge_auxr_dev *auxr_dev = bd->auxr_dev; 29 + int num_msix, i; 30 + 31 + if (!auxr_dev->auxr_info->msix_requested) { 32 + dev_warn(bd->dev, "Requested MSI-X vectors not allocated\n"); 33 + return; 34 + } 35 + num_msix = auxr_dev->auxr_info->msix_requested; 36 + for (i = 0; i < num_msix; i++) { 37 + info[i].vector = bd->irq_tbl[i].vector; 38 + info[i].db_offset = bd->db_offset; 39 + info[i].ring_idx = i; 40 + } 41 + } 42 + 43 + int bnge_register_dev(struct bnge_auxr_dev *auxr_dev, 44 + void *handle) 45 + { 46 + struct bnge_dev *bd = pci_get_drvdata(auxr_dev->pdev); 47 + struct bnge_auxr_info *auxr_info; 48 + int rc = 0; 49 + 50 + netdev_lock(bd->netdev); 51 + mutex_lock(&auxr_dev->auxr_dev_lock); 52 + if (!bd->irq_tbl) { 53 + rc = -ENODEV; 54 + goto exit; 55 + } 56 + 57 + if (!bnge_aux_has_enough_resources(bd)) { 58 + rc = -ENOMEM; 59 + goto exit; 60 + } 61 + 62 + auxr_info = auxr_dev->auxr_info; 63 + auxr_info->handle = handle; 64 + 65 + auxr_info->msix_requested = bd->aux_num_msix; 66 + 67 + bnge_fill_msix_vecs(bd, bd->auxr_dev->msix_info); 68 + auxr_dev->flags |= BNGE_ARDEV_MSIX_ALLOC; 69 + 70 + exit: 71 + mutex_unlock(&auxr_dev->auxr_dev_lock); 72 + netdev_unlock(bd->netdev); 73 + return rc; 74 + } 75 + EXPORT_SYMBOL(bnge_register_dev); 76 + 77 + void bnge_unregister_dev(struct bnge_auxr_dev *auxr_dev) 78 + { 79 + struct bnge_dev *bd = pci_get_drvdata(auxr_dev->pdev); 80 + struct bnge_auxr_info *auxr_info; 81 + 82 + auxr_info = auxr_dev->auxr_info; 83 + netdev_lock(bd->netdev); 84 + mutex_lock(&auxr_dev->auxr_dev_lock); 85 + if (auxr_info->msix_requested) 86 + auxr_dev->flags &= ~BNGE_ARDEV_MSIX_ALLOC; 87 + auxr_info->msix_requested = 0; 88 + 89 + mutex_unlock(&auxr_dev->auxr_dev_lock); 90 + netdev_unlock(bd->netdev); 91 + } 92 + EXPORT_SYMBOL(bnge_unregister_dev); 93 + 94 + int bnge_send_msg(struct bnge_auxr_dev *auxr_dev, struct bnge_fw_msg *fw_msg) 95 + { 96 + struct bnge_dev *bd = pci_get_drvdata(auxr_dev->pdev); 97 + struct output *resp; 98 + struct input *req; 99 + u32 resp_len; 100 + int rc; 101 + 102 + rc = bnge_hwrm_req_init(bd, req, 0 /* don't care */); 103 + if (rc) 104 + return rc; 105 + 106 + rc = bnge_hwrm_req_replace(bd, req, fw_msg->msg, fw_msg->msg_len); 107 + if (rc) 108 + goto drop_req; 109 + 110 + bnge_hwrm_req_timeout(bd, req, fw_msg->timeout); 111 + resp = bnge_hwrm_req_hold(bd, req); 112 + rc = bnge_hwrm_req_send(bd, req); 113 + resp_len = le16_to_cpu(resp->resp_len); 114 + if (resp_len) { 115 + if (fw_msg->resp_max_len < resp_len) 116 + resp_len = fw_msg->resp_max_len; 117 + 118 + memcpy(fw_msg->resp, resp, resp_len); 119 + } 120 + drop_req: 121 + bnge_hwrm_req_drop(bd, req); 122 + return rc; 123 + } 124 + EXPORT_SYMBOL(bnge_send_msg); 125 + 126 + void bnge_rdma_aux_device_uninit(struct bnge_dev *bd) 127 + { 128 + struct bnge_auxr_priv *aux_priv; 129 + struct auxiliary_device *adev; 130 + 131 + /* Skip if no auxiliary device init was done. */ 132 + if (!bd->aux_priv) 133 + return; 134 + 135 + aux_priv = bd->aux_priv; 136 + adev = &aux_priv->aux_dev; 137 + auxiliary_device_uninit(adev); 138 + } 139 + 140 + static void bnge_aux_dev_release(struct device *dev) 141 + { 142 + struct bnge_auxr_priv *aux_priv = 143 + container_of(dev, struct bnge_auxr_priv, aux_dev.dev); 144 + struct bnge_dev *bd = pci_get_drvdata(aux_priv->auxr_dev->pdev); 145 + 146 + ida_free(&bnge_aux_dev_ids, aux_priv->id); 147 + kfree(aux_priv->auxr_dev->auxr_info); 148 + bd->auxr_dev = NULL; 149 + kfree(aux_priv->auxr_dev); 150 + kfree(aux_priv); 151 + bd->aux_priv = NULL; 152 + } 153 + 154 + void bnge_rdma_aux_device_del(struct bnge_dev *bd) 155 + { 156 + if (!bd->auxr_dev) 157 + return; 158 + 159 + auxiliary_device_delete(&bd->aux_priv->aux_dev); 160 + } 161 + 162 + static void bnge_set_auxr_dev_info(struct bnge_auxr_dev *auxr_dev, 163 + struct bnge_dev *bd) 164 + { 165 + auxr_dev->pdev = bd->pdev; 166 + auxr_dev->l2_db_size = bd->db_size; 167 + auxr_dev->l2_db_size_nc = bd->db_size; 168 + auxr_dev->l2_db_offset = bd->db_offset; 169 + mutex_init(&auxr_dev->auxr_dev_lock); 170 + 171 + if (bd->flags & BNGE_EN_ROCE_V1) 172 + auxr_dev->flags |= BNGE_ARDEV_ROCEV1_SUPP; 173 + if (bd->flags & BNGE_EN_ROCE_V2) 174 + auxr_dev->flags |= BNGE_ARDEV_ROCEV2_SUPP; 175 + 176 + auxr_dev->chip_num = bd->chip_num; 177 + auxr_dev->hw_ring_stats_size = bd->hw_ring_stats_size; 178 + auxr_dev->pf_port_id = bd->pf.port_id; 179 + auxr_dev->en_state = bd->state; 180 + auxr_dev->bar0 = bd->bar0; 181 + } 182 + 183 + void bnge_rdma_aux_device_add(struct bnge_dev *bd) 184 + { 185 + struct auxiliary_device *aux_dev; 186 + int rc; 187 + 188 + if (!bd->auxr_dev) 189 + return; 190 + 191 + aux_dev = &bd->aux_priv->aux_dev; 192 + rc = auxiliary_device_add(aux_dev); 193 + if (rc) { 194 + dev_warn(bd->dev, "Failed to add auxiliary device for ROCE\n"); 195 + auxiliary_device_uninit(aux_dev); 196 + bd->flags &= ~BNGE_EN_ROCE; 197 + } 198 + 199 + bd->auxr_dev->net = bd->netdev; 200 + } 201 + 202 + void bnge_rdma_aux_device_init(struct bnge_dev *bd) 203 + { 204 + struct auxiliary_device *aux_dev; 205 + struct bnge_auxr_info *auxr_info; 206 + struct bnge_auxr_priv *aux_priv; 207 + struct bnge_auxr_dev *auxr_dev; 208 + int rc; 209 + 210 + if (!bnge_is_roce_en(bd)) 211 + return; 212 + 213 + aux_priv = kzalloc(sizeof(*aux_priv), GFP_KERNEL); 214 + if (!aux_priv) 215 + goto exit; 216 + 217 + aux_priv->id = ida_alloc(&bnge_aux_dev_ids, GFP_KERNEL); 218 + if (aux_priv->id < 0) { 219 + dev_warn(bd->dev, "ida alloc failed for aux device\n"); 220 + kfree(aux_priv); 221 + goto exit; 222 + } 223 + 224 + aux_dev = &aux_priv->aux_dev; 225 + aux_dev->id = aux_priv->id; 226 + aux_dev->name = "rdma"; 227 + aux_dev->dev.parent = &bd->pdev->dev; 228 + aux_dev->dev.release = bnge_aux_dev_release; 229 + 230 + rc = auxiliary_device_init(aux_dev); 231 + if (rc) { 232 + ida_free(&bnge_aux_dev_ids, aux_priv->id); 233 + kfree(aux_priv); 234 + goto exit; 235 + } 236 + bd->aux_priv = aux_priv; 237 + 238 + auxr_dev = kzalloc(sizeof(*auxr_dev), GFP_KERNEL); 239 + if (!auxr_dev) 240 + goto aux_dev_uninit; 241 + 242 + aux_priv->auxr_dev = auxr_dev; 243 + 244 + auxr_info = kzalloc(sizeof(*auxr_info), GFP_KERNEL); 245 + if (!auxr_info) 246 + goto aux_dev_uninit; 247 + 248 + auxr_dev->auxr_info = auxr_info; 249 + bd->auxr_dev = auxr_dev; 250 + bnge_set_auxr_dev_info(auxr_dev, bd); 251 + 252 + return; 253 + 254 + aux_dev_uninit: 255 + auxiliary_device_uninit(aux_dev); 256 + exit: 257 + bd->flags &= ~BNGE_EN_ROCE; 258 + }
+84
drivers/net/ethernet/broadcom/bnge/bnge_auxr.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* Copyright (c) 2025 Broadcom */ 3 + 4 + #ifndef _BNGE_AUXR_H_ 5 + #define _BNGE_AUXR_H_ 6 + 7 + #include <linux/auxiliary_bus.h> 8 + 9 + #define BNGE_MIN_ROCE_CP_RINGS 2 10 + #define BNGE_MIN_ROCE_STAT_CTXS 1 11 + 12 + #define BNGE_MAX_ROCE_MSIX 64 13 + 14 + struct hwrm_async_event_cmpl; 15 + struct bnge; 16 + 17 + struct bnge_msix_info { 18 + u32 vector; 19 + u32 ring_idx; 20 + u32 db_offset; 21 + }; 22 + 23 + struct bnge_fw_msg { 24 + void *msg; 25 + int msg_len; 26 + void *resp; 27 + int resp_max_len; 28 + int timeout; 29 + }; 30 + 31 + struct bnge_auxr_info { 32 + void *handle; 33 + u16 msix_requested; 34 + }; 35 + 36 + enum { 37 + BNGE_ARDEV_ROCEV1_SUPP = BIT(0), 38 + BNGE_ARDEV_ROCEV2_SUPP = BIT(1), 39 + BNGE_ARDEV_MSIX_ALLOC = BIT(2), 40 + }; 41 + 42 + #define BNGE_ARDEV_ROCE_SUPP (BNGE_ARDEV_ROCEV1_SUPP | \ 43 + BNGE_ARDEV_ROCEV2_SUPP) 44 + 45 + struct bnge_auxr_dev { 46 + struct net_device *net; 47 + struct pci_dev *pdev; 48 + void __iomem *bar0; 49 + 50 + struct bnge_msix_info msix_info[BNGE_MAX_ROCE_MSIX]; 51 + 52 + u32 flags; 53 + 54 + struct bnge_auxr_info *auxr_info; 55 + 56 + /* Doorbell BAR size in bytes mapped by L2 driver. */ 57 + int l2_db_size; 58 + /* Doorbell BAR size in bytes mapped as non-cacheable. */ 59 + int l2_db_size_nc; 60 + /* Doorbell offset in bytes within l2_db_size_nc. */ 61 + int l2_db_offset; 62 + 63 + u16 chip_num; 64 + u16 hw_ring_stats_size; 65 + u16 pf_port_id; 66 + unsigned long en_state; 67 + 68 + u16 auxr_num_msix_vec; 69 + u16 auxr_num_ctxs; 70 + 71 + /* serialize auxr operations */ 72 + struct mutex auxr_dev_lock; 73 + }; 74 + 75 + void bnge_rdma_aux_device_uninit(struct bnge_dev *bdev); 76 + void bnge_rdma_aux_device_del(struct bnge_dev *bdev); 77 + void bnge_rdma_aux_device_add(struct bnge_dev *bdev); 78 + void bnge_rdma_aux_device_init(struct bnge_dev *bdev); 79 + int bnge_register_dev(struct bnge_auxr_dev *adev, 80 + void *handle); 81 + void bnge_unregister_dev(struct bnge_auxr_dev *adev); 82 + int bnge_send_msg(struct bnge_auxr_dev *adev, struct bnge_fw_msg *fw_msg); 83 + 84 + #endif /* _BNGE_AUXR_H_ */
+17 -1
drivers/net/ethernet/broadcom/bnge/bnge_core.c
··· 41 41 42 42 bool bnge_aux_registered(struct bnge_dev *bd) 43 43 { 44 + struct bnge_auxr_dev *ba_dev = bd->auxr_dev; 45 + 46 + if (ba_dev && ba_dev->auxr_info->msix_requested) 47 + return true; 48 + 44 49 return false; 45 50 } 46 51 ··· 317 312 spin_lock_init(&bd->db_lock); 318 313 #endif 319 314 315 + bnge_rdma_aux_device_init(bd); 316 + 320 317 rc = bnge_alloc_irqs(bd); 321 318 if (rc) { 322 319 dev_err(&pdev->dev, "Error IRQ allocation rc = %d\n", rc); 323 - goto err_config_uninit; 320 + goto err_uninit_auxr; 324 321 } 325 322 326 323 rc = bnge_netdev_alloc(bd, max_irqs); 327 324 if (rc) 328 325 goto err_free_irq; 326 + 327 + bnge_rdma_aux_device_add(bd); 329 328 330 329 pci_save_state(pdev); 331 330 ··· 337 328 338 329 err_free_irq: 339 330 bnge_free_irqs(bd); 331 + 332 + err_uninit_auxr: 333 + bnge_rdma_aux_device_uninit(bd); 340 334 341 335 err_config_uninit: 342 336 bnge_net_uninit_dflt_config(bd); ··· 366 354 { 367 355 struct bnge_dev *bd = pci_get_drvdata(pdev); 368 356 357 + bnge_rdma_aux_device_del(bd); 358 + 369 359 bnge_netdev_free(bd); 370 360 371 361 bnge_free_irqs(bd); 362 + 363 + bnge_rdma_aux_device_uninit(bd); 372 364 373 365 bnge_net_uninit_dflt_config(bd); 374 366
+40
drivers/net/ethernet/broadcom/bnge/bnge_hwrm.c
··· 98 98 ctx->gfp = gfp; 99 99 } 100 100 101 + int bnge_hwrm_req_replace(struct bnge_dev *bd, void *req, void *new_req, 102 + u32 len) 103 + { 104 + struct bnge_hwrm_ctx *ctx = __hwrm_ctx_get(bd, req); 105 + struct input *internal_req = req; 106 + u16 req_type; 107 + 108 + if (!ctx) 109 + return -EINVAL; 110 + 111 + if (len > BNGE_HWRM_CTX_OFFSET) 112 + return -E2BIG; 113 + 114 + /* free any existing slices */ 115 + ctx->allocated = BNGE_HWRM_DMA_SIZE - BNGE_HWRM_CTX_OFFSET; 116 + if (ctx->slice_addr) { 117 + dma_free_coherent(bd->dev, ctx->slice_size, 118 + ctx->slice_addr, ctx->slice_handle); 119 + ctx->slice_addr = NULL; 120 + } 121 + ctx->gfp = GFP_KERNEL; 122 + 123 + if ((bd->fw_cap & BNGE_FW_CAP_SHORT_CMD) || len > BNGE_HWRM_MAX_REQ_LEN) { 124 + memcpy(internal_req, new_req, len); 125 + } else { 126 + internal_req->req_type = ((struct input *)new_req)->req_type; 127 + ctx->req = new_req; 128 + } 129 + 130 + ctx->req_len = len; 131 + ctx->req->resp_addr = cpu_to_le64(ctx->dma_handle + 132 + BNGE_HWRM_RESP_OFFSET); 133 + 134 + /* update sentinel for potentially new request type */ 135 + req_type = le16_to_cpu(internal_req->req_type); 136 + ctx->sentinel = bnge_cal_sentinel(ctx, req_type); 137 + 138 + return 0; 139 + } 140 + 101 141 void bnge_hwrm_req_flags(struct bnge_dev *bd, void *req, 102 142 enum bnge_hwrm_ctx_flags flags) 103 143 {
+2
drivers/net/ethernet/broadcom/bnge/bnge_hwrm.h
··· 107 107 void bnge_hwrm_req_alloc_flags(struct bnge_dev *bd, void *req, gfp_t flags); 108 108 void *bnge_hwrm_req_dma_slice(struct bnge_dev *bd, void *req, u32 size, 109 109 dma_addr_t *dma); 110 + int bnge_hwrm_req_replace(struct bnge_dev *bd, void *req, void *new_req, 111 + u32 len); 110 112 #endif /* _BNGE_HWRM_H_ */
+12
drivers/net/ethernet/broadcom/bnge/bnge_resc.c
··· 34 34 return bd->hw_resc.max_stat_ctxs; 35 35 } 36 36 37 + bool bnge_aux_has_enough_resources(struct bnge_dev *bd) 38 + { 39 + unsigned int max_stat_ctxs; 40 + 41 + max_stat_ctxs = bnge_get_max_func_stat_ctxs(bd); 42 + if (max_stat_ctxs <= BNGE_MIN_ROCE_STAT_CTXS || 43 + bd->nq_nr_rings == max_stat_ctxs) 44 + return false; 45 + 46 + return true; 47 + } 48 + 37 49 static unsigned int bnge_get_max_func_cp_rings(struct bnge_dev *bd) 38 50 { 39 51 return bd->hw_resc.max_cp_rings;
+1
drivers/net/ethernet/broadcom/bnge/bnge_resc.h
··· 74 74 void bnge_aux_init_dflt_config(struct bnge_dev *bd); 75 75 u32 bnge_get_rxfh_indir_size(struct bnge_dev *bd); 76 76 int bnge_cal_nr_rss_ctxs(u16 rx_rings); 77 + bool bnge_aux_has_enough_resources(struct bnge_dev *bd); 77 78 78 79 static inline u32 79 80 bnge_adjust_pow_two(u32 total_ent, u16 ent_per_blk)
+2 -2
include/rdma/ib_cm.h
··· 271 271 #define CM_APR_ATTR_ID cpu_to_be16(0x001A) 272 272 273 273 /** 274 - * ib_cm_handler - User-defined callback to process communication events. 274 + * typedef ib_cm_handler - User-defined callback to process communication events. 275 275 * @cm_id: Communication identifier associated with the reported event. 276 276 * @event: Information about the communication event. 277 277 * ··· 482 482 483 483 /** 484 484 * ib_prepare_cm_mra - Prepares to send a message receipt acknowledgment to a 485 - connection message in case duplicates are received. 485 + * connection message in case duplicates are received. 486 486 * @cm_id: Connection identifier associated with the connection message. 487 487 */ 488 488 int ib_prepare_cm_mra(struct ib_cm_id *cm_id);
+51 -49
include/rdma/ib_verbs.h
··· 586 586 }; 587 587 588 588 /** 589 - * struct rdma_stat_desc 590 - * @name - The name of the counter 591 - * @flags - Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL 592 - * @priv - Driver private information; Core code should not use 589 + * struct rdma_stat_desc - description of one rdma stat/counter 590 + * @name: The name of the counter 591 + * @flags: Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL 592 + * @priv: Driver private information; Core code should not use 593 593 */ 594 594 struct rdma_stat_desc { 595 595 const char *name; ··· 598 598 }; 599 599 600 600 /** 601 - * struct rdma_hw_stats 602 - * @lock - Mutex to protect parallel write access to lifespan and values 601 + * struct rdma_hw_stats - collection of hardware stats and their management 602 + * @lock: Mutex to protect parallel write access to lifespan and values 603 603 * of counters, which are 64bits and not guaranteed to be written 604 604 * atomicaly on 32bits systems. 605 - * @timestamp - Used by the core code to track when the last update was 606 - * @lifespan - Used by the core code to determine how old the counters 605 + * @timestamp: Used by the core code to track when the last update was 606 + * @lifespan: Used by the core code to determine how old the counters 607 607 * should be before being updated again. Stored in jiffies, defaults 608 608 * to 10 milliseconds, drivers can override the default be specifying 609 609 * their own value during their allocation routine. 610 - * @descs - Array of pointers to static descriptors used for the counters 610 + * @descs: Array of pointers to static descriptors used for the counters 611 611 * in directory. 612 - * @is_disabled - A bitmap to indicate each counter is currently disabled 612 + * @is_disabled: A bitmap to indicate each counter is currently disabled 613 613 * or not. 614 - * @num_counters - How many hardware counters there are. If name is 614 + * @num_counters: How many hardware counters there are. If name is 615 615 * shorter than this number, a kernel oops will result. Driver authors 616 616 * are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters) 617 617 * in their code to prevent this. 618 - * @value - Array of u64 counters that are accessed by the sysfs code and 618 + * @value: Array of u64 counters that are accessed by the sysfs code and 619 619 * filled in by the drivers get_stats routine 620 620 */ 621 621 struct rdma_hw_stats { ··· 859 859 IB_RATE_400_GBPS = 21, 860 860 IB_RATE_600_GBPS = 22, 861 861 IB_RATE_800_GBPS = 23, 862 + IB_RATE_1600_GBPS = 25, 862 863 }; 863 864 864 865 /** ··· 2406 2405 int (*modify_port)(struct ib_device *device, u32 port_num, 2407 2406 int port_modify_mask, 2408 2407 struct ib_port_modify *port_modify); 2409 - /** 2408 + /* 2410 2409 * The following mandatory functions are used only at device 2411 2410 * registration. Keep functions such as these at the end of this 2412 2411 * structure to avoid cache line misses when accessing struct ib_device ··· 2416 2415 struct ib_port_immutable *immutable); 2417 2416 enum rdma_link_layer (*get_link_layer)(struct ib_device *device, 2418 2417 u32 port_num); 2419 - /** 2418 + /* 2420 2419 * When calling get_netdev, the HW vendor's driver should return the 2421 2420 * net device of device @device at port @port_num or NULL if such 2422 2421 * a net device doesn't exist. The vendor driver should call dev_hold ··· 2426 2425 */ 2427 2426 struct net_device *(*get_netdev)(struct ib_device *device, 2428 2427 u32 port_num); 2429 - /** 2428 + /* 2430 2429 * rdma netdev operation 2431 2430 * 2432 2431 * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params ··· 2440 2439 int (*rdma_netdev_get_params)(struct ib_device *device, u32 port_num, 2441 2440 enum rdma_netdev_t type, 2442 2441 struct rdma_netdev_alloc_params *params); 2443 - /** 2442 + /* 2444 2443 * query_gid should be return GID value for @device, when @port_num 2445 2444 * link layer is either IB or iWarp. It is no-op if @port_num port 2446 2445 * is RoCE link layer. 2447 2446 */ 2448 2447 int (*query_gid)(struct ib_device *device, u32 port_num, int index, 2449 2448 union ib_gid *gid); 2450 - /** 2449 + /* 2451 2450 * When calling add_gid, the HW vendor's driver should add the gid 2452 2451 * of device of port at gid index available at @attr. Meta-info of 2453 2452 * that gid (for example, the network device related to this gid) is ··· 2461 2460 * roce_gid_table is used. 2462 2461 */ 2463 2462 int (*add_gid)(const struct ib_gid_attr *attr, void **context); 2464 - /** 2463 + /* 2465 2464 * When calling del_gid, the HW vendor's driver should delete the 2466 2465 * gid of device @device at gid index gid_index of port port_num 2467 2466 * available in @attr. ··· 2476 2475 struct ib_udata *udata); 2477 2476 void (*dealloc_ucontext)(struct ib_ucontext *context); 2478 2477 int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma); 2479 - /** 2478 + /* 2480 2479 * This will be called once refcount of an entry in mmap_xa reaches 2481 2480 * zero. The type of the memory that was mapped may differ between 2482 2481 * entries and is opaque to the rdma_user_mmap interface. ··· 2517 2516 int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); 2518 2517 int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata); 2519 2518 int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); 2520 - /** 2519 + /* 2521 2520 * pre_destroy_cq - Prevent a cq from generating any new work 2522 2521 * completions, but not free any kernel resources 2523 2522 */ 2524 2523 int (*pre_destroy_cq)(struct ib_cq *cq); 2525 - /** 2524 + /* 2526 2525 * post_destroy_cq - Free all kernel resources 2527 2526 */ 2528 2527 void (*post_destroy_cq)(struct ib_cq *cq); ··· 2616 2615 struct scatterlist *meta_sg, int meta_sg_nents, 2617 2616 unsigned int *meta_sg_offset); 2618 2617 2619 - /** 2618 + /* 2620 2619 * alloc_hw_[device,port]_stats - Allocate a struct rdma_hw_stats and 2621 2620 * fill in the driver initialized data. The struct is kfree()'ed by 2622 2621 * the sysfs core when the device is removed. A lifespan of -1 in the ··· 2625 2624 struct rdma_hw_stats *(*alloc_hw_device_stats)(struct ib_device *device); 2626 2625 struct rdma_hw_stats *(*alloc_hw_port_stats)(struct ib_device *device, 2627 2626 u32 port_num); 2628 - /** 2627 + /* 2629 2628 * get_hw_stats - Fill in the counter value(s) in the stats struct. 2630 2629 * @index - The index in the value array we wish to have updated, or 2631 2630 * num_counters if we want all stats updated ··· 2640 2639 int (*get_hw_stats)(struct ib_device *device, 2641 2640 struct rdma_hw_stats *stats, u32 port, int index); 2642 2641 2643 - /** 2642 + /* 2644 2643 * modify_hw_stat - Modify the counter configuration 2645 2644 * @enable: true/false when enable/disable a counter 2646 2645 * Return codes - 0 on success or error code otherwise. 2647 2646 */ 2648 2647 int (*modify_hw_stat)(struct ib_device *device, u32 port, 2649 2648 unsigned int counter_index, bool enable); 2650 - /** 2649 + /* 2651 2650 * Allows rdma drivers to add their own restrack attributes. 2652 2651 */ 2653 2652 int (*fill_res_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr); ··· 2683 2682 u8 pdata_len); 2684 2683 int (*iw_create_listen)(struct iw_cm_id *cm_id, int backlog); 2685 2684 int (*iw_destroy_listen)(struct iw_cm_id *cm_id); 2686 - /** 2685 + /* 2687 2686 * counter_bind_qp - Bind a QP to a counter. 2688 2687 * @counter - The counter to be bound. If counter->id is zero then 2689 2688 * the driver needs to allocate a new counter and set counter->id 2690 2689 */ 2691 2690 int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp, 2692 2691 u32 port); 2693 - /** 2692 + /* 2694 2693 * counter_unbind_qp - Unbind the qp from the dynamically-allocated 2695 2694 * counter and bind it onto the default one 2696 2695 */ 2697 2696 int (*counter_unbind_qp)(struct ib_qp *qp, u32 port); 2698 - /** 2697 + /* 2699 2698 * counter_dealloc -De-allocate the hw counter 2700 2699 */ 2701 2700 int (*counter_dealloc)(struct rdma_counter *counter); 2702 - /** 2701 + /* 2703 2702 * counter_alloc_stats - Allocate a struct rdma_hw_stats and fill in 2704 2703 * the driver initialized data. 2705 2704 */ 2706 2705 struct rdma_hw_stats *(*counter_alloc_stats)( 2707 2706 struct rdma_counter *counter); 2708 - /** 2707 + /* 2709 2708 * counter_update_stats - Query the stats value of this counter 2710 2709 */ 2711 2710 int (*counter_update_stats)(struct rdma_counter *counter); 2712 2711 2713 - /** 2712 + /* 2714 2713 * counter_init - Initialize the driver specific rdma counter struct. 2715 2714 */ 2716 2715 void (*counter_init)(struct rdma_counter *counter); 2717 2716 2718 - /** 2717 + /* 2719 2718 * Allows rdma drivers to add their own restrack attributes 2720 2719 * dumped via 'rdma stat' iproute2 command. 2721 2720 */ ··· 2731 2730 */ 2732 2731 int (*get_numa_node)(struct ib_device *dev); 2733 2732 2734 - /** 2733 + /* 2735 2734 * add_sub_dev - Add a sub IB device 2736 2735 */ 2737 2736 struct ib_device *(*add_sub_dev)(struct ib_device *parent, 2738 2737 enum rdma_nl_dev_type type, 2739 2738 const char *name); 2740 2739 2741 - /** 2740 + /* 2742 2741 * del_sub_dev - Delete a sub IB device 2743 2742 */ 2744 2743 void (*del_sub_dev)(struct ib_device *sub_dev); 2745 2744 2746 - /** 2745 + /* 2747 2746 * ufile_cleanup - Attempt to cleanup ubojects HW resources inside 2748 2747 * the ufile. 2749 2748 */ 2750 2749 void (*ufile_hw_cleanup)(struct ib_uverbs_file *ufile); 2751 2750 2752 - /** 2751 + /* 2753 2752 * report_port_event - Drivers need to implement this if they have 2754 2753 * some private stuff to handle when link status changes. 2755 2754 */ ··· 3158 3157 3159 3158 /** 3160 3159 * rdma_for_each_port - Iterate over all valid port numbers of the IB device 3161 - * @device - The struct ib_device * to iterate over 3162 - * @iter - The unsigned int to store the port number 3160 + * @device: The struct ib_device * to iterate over 3161 + * @iter: The unsigned int to store the port number 3163 3162 */ 3164 3163 #define rdma_for_each_port(device, iter) \ 3165 3164 for (iter = rdma_start_port(device + \ ··· 3525 3524 /** 3526 3525 * rdma_mtu_enum_to_int - Return the mtu of the port as an integer value. 3527 3526 * @device: Device 3528 - * @port_num: Port number 3527 + * @port: Port number 3529 3528 * @mtu: enum value of MTU 3530 3529 * 3531 3530 * Return the MTU size supported by the port as an integer value. Will return ··· 3543 3542 /** 3544 3543 * rdma_mtu_from_attr - Return the mtu of the port from the port attribute. 3545 3544 * @device: Device 3546 - * @port_num: Port number 3545 + * @port: Port number 3547 3546 * @attr: port attribute 3548 3547 * 3549 3548 * Return the MTU size supported by the port as an integer value. ··· 3920 3919 3921 3920 /** 3922 3921 * ib_open_qp - Obtain a reference to an existing sharable QP. 3923 - * @xrcd - XRC domain 3922 + * @xrcd: XRC domain 3924 3923 * @qp_open_attr: Attributes identifying the QP to open. 3925 3924 * 3926 3925 * Returns a reference to a sharable QP. ··· 4274 4273 /** 4275 4274 * ib_dma_map_sgtable_attrs - Map a scatter/gather table to DMA addresses 4276 4275 * @dev: The device for which the DMA addresses are to be created 4277 - * @sg: The sg_table object describing the buffer 4276 + * @sgt: The sg_table object describing the buffer 4278 4277 * @direction: The direction of the DMA 4279 - * @attrs: Optional DMA attributes for the map operation 4278 + * @dma_attrs: Optional DMA attributes for the map operation 4280 4279 */ 4281 4280 static inline int ib_dma_map_sgtable_attrs(struct ib_device *dev, 4282 4281 struct sg_table *sgt, ··· 4420 4419 /** 4421 4420 * ib_update_fast_reg_key - updates the key portion of the fast_reg MR 4422 4421 * R_Key and L_Key. 4423 - * @mr - struct ib_mr pointer to be updated. 4424 - * @newkey - new key to be used. 4422 + * @mr: struct ib_mr pointer to be updated. 4423 + * @newkey: new key to be used. 4425 4424 */ 4426 4425 static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey) 4427 4426 { ··· 4432 4431 /** 4433 4432 * ib_inc_rkey - increments the key portion of the given rkey. Can be used 4434 4433 * for calculating a new rkey for type 2 memory windows. 4435 - * @rkey - the rkey to increment. 4434 + * @rkey: the rkey to increment. 4436 4435 */ 4437 4436 static inline u32 ib_inc_rkey(u32 rkey) 4438 4437 { ··· 4526 4525 4527 4526 /** 4528 4527 * ib_device_try_get: Hold a registration lock 4529 - * device: The device to lock 4528 + * @dev: The device to lock 4530 4529 * 4531 4530 * A device under an active registration lock cannot become unregistered. It 4532 4531 * is only possible to obtain a registration lock on a device that is fully ··· 4833 4832 * rdma_roce_rescan_device - Rescan all of the network devices in the system 4834 4833 * and add their gids, as needed, to the relevant RoCE devices. 4835 4834 * 4836 - * @device: the rdma device 4835 + * @ibdev: the rdma device 4837 4836 */ 4838 4837 void rdma_roce_rescan_device(struct ib_device *ibdev); 4839 4838 void rdma_roce_rescan_port(struct ib_device *ib_dev, u32 port); ··· 4886 4885 4887 4886 /** 4888 4887 * ibdev_to_node - return the NUMA node for a given ib_device 4889 - * @dev: device to get the NUMA node for. 4888 + * @ibdev: device to get the NUMA node for. 4890 4889 */ 4891 4890 static inline int ibdev_to_node(struct ib_device *ibdev) 4892 4891 { ··· 4924 4923 /** 4925 4924 * rdma_flow_label_to_udp_sport - generate a RoCE v2 UDP src port value based 4926 4925 * on the flow_label 4926 + * @fl: flow_label value 4927 4927 * 4928 4928 * This function will convert the 20 bit flow_label input to a valid RoCE v2 4929 4929 * UDP src port 14 bit value. All RoCE V2 drivers should use this same
+36 -34
include/rdma/rdmavt_qp.h
··· 144 144 #define RVT_SEND_COMPLETION_ONLY (IB_SEND_RESERVED_START << 1) 145 145 146 146 /** 147 - * rvt_ud_wr - IB UD work plus AH cache 147 + * struct rvt_ud_wr - IB UD work plus AH cache 148 148 * @wr: valid IB work request 149 149 * @attr: pointer to an allocated AH attribute 150 150 * ··· 184 184 * struct rvt_krwq - kernel struct receive work request 185 185 * @p_lock: lock to protect producer of the kernel buffer 186 186 * @head: index of next entry to fill 187 - * @c_lock:lock to protect consumer of the kernel buffer 187 + * @c_lock: lock to protect consumer of the kernel buffer 188 188 * @tail: index of next entry to pull 189 - * @count: count is aproximate of total receive enteries posted 190 - * @rvt_rwqe: struct of receive work request queue entry 189 + * @count: count is approximate of total receive entries posted 190 + * @curr_wq: struct of receive work request queue entry 191 191 * 192 192 * This structure is used to contain the head pointer, 193 193 * tail pointer and receive work queue entries for kernel ··· 309 309 #define RVT_OPERATION_MAX (IB_WR_RESERVED10 + 1) 310 310 311 311 /** 312 - * rvt_operation_params - op table entry 313 - * @length - the length to copy into the swqe entry 314 - * @qpt_support - a bit mask indicating QP type support 315 - * @flags - RVT_OPERATION flags (see above) 312 + * struct rvt_operation_params - op table entry 313 + * @length: the length to copy into the swqe entry 314 + * @qpt_support: a bit mask indicating QP type support 315 + * @flags: RVT_OPERATION flags (see above) 316 316 * 317 317 * This supports table driven post send so that 318 318 * the driver can have differing an potentially ··· 552 552 553 553 /** 554 554 * rvt_is_user_qp - return if this is user mode QP 555 - * @qp - the target QP 555 + * @qp: the target QP 556 556 */ 557 557 static inline bool rvt_is_user_qp(struct rvt_qp *qp) 558 558 { ··· 561 561 562 562 /** 563 563 * rvt_get_qp - get a QP reference 564 - * @qp - the QP to hold 564 + * @qp: the QP to hold 565 565 */ 566 566 static inline void rvt_get_qp(struct rvt_qp *qp) 567 567 { ··· 570 570 571 571 /** 572 572 * rvt_put_qp - release a QP reference 573 - * @qp - the QP to release 573 + * @qp: the QP to release 574 574 */ 575 575 static inline void rvt_put_qp(struct rvt_qp *qp) 576 576 { ··· 580 580 581 581 /** 582 582 * rvt_put_swqe - drop mr refs held by swqe 583 - * @wqe - the send wqe 583 + * @wqe: the send wqe 584 584 * 585 585 * This drops any mr references held by the swqe 586 586 */ ··· 597 597 598 598 /** 599 599 * rvt_qp_wqe_reserve - reserve operation 600 - * @qp - the rvt qp 601 - * @wqe - the send wqe 600 + * @qp: the rvt qp 601 + * @wqe: the send wqe 602 602 * 603 603 * This routine used in post send to record 604 604 * a wqe relative reserved operation use. ··· 612 612 613 613 /** 614 614 * rvt_qp_wqe_unreserve - clean reserved operation 615 - * @qp - the rvt qp 616 - * @flags - send wqe flags 615 + * @qp: the rvt qp 616 + * @flags: send wqe flags 617 617 * 618 618 * This decrements the reserve use count. 619 619 * ··· 653 653 654 654 /** 655 655 * rvt_div_round_up_mtu - round up divide 656 - * @qp - the qp pair 657 - * @len - the length 656 + * @qp: the qp pair 657 + * @len: the length 658 658 * 659 659 * Perform a shift based mtu round up divide 660 660 */ ··· 664 664 } 665 665 666 666 /** 667 - * @qp - the qp pair 668 - * @len - the length 667 + * rvt_div_mtu - shift-based divide 668 + * @qp: the qp pair 669 + * @len: the length 669 670 * 670 671 * Perform a shift based mtu divide 671 672 */ ··· 677 676 678 677 /** 679 678 * rvt_timeout_to_jiffies - Convert a ULP timeout input into jiffies 680 - * @timeout - timeout input(0 - 31). 679 + * @timeout: timeout input(0 - 31). 681 680 * 682 681 * Return a timeout value in jiffies. 683 682 */ ··· 691 690 692 691 /** 693 692 * rvt_lookup_qpn - return the QP with the given QPN 694 - * @ibp: the ibport 693 + * @rdi: rvt device info structure 694 + * @rvp: the ibport 695 695 * @qpn: the QP number to look up 696 696 * 697 697 * The caller must hold the rcu_read_lock(), and keep the lock until ··· 718 716 } 719 717 720 718 /** 721 - * rvt_mod_retry_timer - mod a retry timer 722 - * @qp - the QP 723 - * @shift - timeout shift to wait for multiple packets 719 + * rvt_mod_retry_timer_ext - mod a retry timer 720 + * @qp: the QP 721 + * @shift: timeout shift to wait for multiple packets 724 722 * Modify a potentially already running retry timer 725 723 */ 726 724 static inline void rvt_mod_retry_timer_ext(struct rvt_qp *qp, u8 shift) ··· 755 753 } 756 754 757 755 /** 758 - * rvt_qp_sqwe_incr - increment ring index 756 + * rvt_qp_swqe_incr - increment ring index 759 757 * @qp: the qp 760 758 * @val: the starting value 761 759 * ··· 813 811 814 812 /** 815 813 * rvt_qp_complete_swqe - insert send completion 816 - * @qp - the qp 817 - * @wqe - the send wqe 818 - * @opcode - wc operation (driver dependent) 819 - * @status - completion status 814 + * @qp: the qp 815 + * @wqe: the send wqe 816 + * @opcode: wc operation (driver dependent) 817 + * @status: completion status 820 818 * 821 819 * Update the s_last information, and then insert a send 822 820 * completion into the completion ··· 893 891 894 892 /** 895 893 * struct rvt_qp_iter - the iterator for QPs 896 - * @qp - the current QP 894 + * @qp: the current QP 897 895 * 898 896 * This structure defines the current iterator 899 897 * state for sequenced access to all QPs relative ··· 915 913 916 914 /** 917 915 * ib_cq_tail - Return tail index of cq buffer 918 - * @send_cq - The cq for send 916 + * @send_cq: The cq for send 919 917 * 920 918 * This is called in qp_iter_print to get tail 921 919 * of cq buffer. ··· 931 929 932 930 /** 933 931 * ib_cq_head - Return head index of cq buffer 934 - * @send_cq - The cq for send 932 + * @send_cq: The cq for send 935 933 * 936 934 * This is called in qp_iter_print to get head 937 935 * of cq buffer. ··· 947 945 948 946 /** 949 947 * rvt_free_rq - free memory allocated for rvt_rq struct 950 - * @rvt_rq: request queue data structure 948 + * @rq: request queue data structure 951 949 * 952 950 * This function should only be called if the rvt_mmap_info() 953 951 * has not succeeded.