Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

RDMA/hns: Implement bonding init/uninit process

Implement hns_roce_slave_init() and hns_roce_slave_uninit() for device
init/uninit in bonding cases. The former is used to initialize a slave
ibdev (when the slave is unlinked from a bond) or a bond ibdev, while
the latter does the opposite. Most of the process is the same as
regular device init/uninit, while some bonding‑specific steps below are
also added.

In bond device init flow, choose one slave to re-initialize as the
main_hr_dev of the bond, and it will be the only device presented for
multiple slaves. During registration, set and active netdev to the
ibdev based on the link state of the slaves. When this main_hr_dev
slave is being unlinked while the bond is still valid, choose a new
slave from the rest and initialize it as the new bond device.

In uninit flow, add a bond cleanup process, restore all the other
slaves and clean up bond resource. This is only for the case where
the port of main_hr_dev is directly removed without unlinking it
from bond.

Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
Link: https://patch.msgid.link/20251112093510.3696363-6-huangjunxian6@hisilicon.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>

authored by

Junxian Huang and committed by
Leon Romanovsky
d9023e46 14f0455e

+282 -17
+178
drivers/infiniband/hw/hns/hns_roce_bond.c
··· 3 3 * Copyright (c) 2025 Hisilicon Limited. 4 4 */ 5 5 6 + #include <net/bonding.h> 6 7 #include "hns_roce_device.h" 7 8 #include "hns_roce_hw_v2.h" 8 9 #include "hns_roce_bond.h" ··· 73 72 } 74 73 75 74 return NULL; 75 + } 76 + 77 + static int hns_roce_set_bond_netdev(struct hns_roce_bond_group *bond_grp, 78 + struct hns_roce_dev *hr_dev) 79 + { 80 + struct net_device *active_dev; 81 + struct net_device *old_dev; 82 + int i, ret = 0; 83 + 84 + if (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { 85 + rcu_read_lock(); 86 + active_dev = 87 + bond_option_active_slave_get_rcu(netdev_priv(bond_grp->upper_dev)); 88 + rcu_read_unlock(); 89 + } else { 90 + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { 91 + active_dev = bond_grp->bond_func_info[i].net_dev; 92 + if (active_dev && 93 + ib_get_curr_port_state(active_dev) == IB_PORT_ACTIVE) 94 + break; 95 + } 96 + } 97 + 98 + if (!active_dev || i == ROCE_BOND_FUNC_MAX) 99 + active_dev = get_hr_netdev(hr_dev, 0); 100 + 101 + old_dev = ib_device_get_netdev(&hr_dev->ib_dev, 1); 102 + if (old_dev == active_dev) 103 + goto out; 104 + 105 + ret = ib_device_set_netdev(&hr_dev->ib_dev, active_dev, 1); 106 + if (ret) { 107 + dev_err(hr_dev->dev, "failed to set netdev for bond.\n"); 108 + goto out; 109 + } 110 + 111 + if (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { 112 + if (old_dev) 113 + roce_del_all_netdev_gids(&hr_dev->ib_dev, 1, old_dev); 114 + rdma_roce_rescan_port(&hr_dev->ib_dev, 1); 115 + } 116 + out: 117 + dev_put(old_dev); 118 + return ret; 119 + } 120 + 121 + bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev) 122 + { 123 + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); 124 + struct hns_roce_bond_group *bond_grp; 125 + u8 bus_num = get_hr_bus_num(hr_dev); 126 + 127 + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); 128 + if (bond_grp && bond_grp->bond_state != HNS_ROCE_BOND_NOT_BONDED && 129 + bond_grp->bond_state != HNS_ROCE_BOND_NOT_ATTACHED) 130 + return true; 131 + 132 + return false; 133 + } 134 + 135 + static void hns_roce_slave_uninit(struct hns_roce_bond_group *bond_grp, 136 + u8 func_idx) 137 + { 138 + struct hnae3_handle *handle; 139 + 140 + handle = bond_grp->bond_func_info[func_idx].handle; 141 + if (handle->priv) 142 + hns_roce_bond_uninit_client(bond_grp, func_idx); 143 + } 144 + 145 + static struct hns_roce_dev 146 + *hns_roce_slave_init(struct hns_roce_bond_group *bond_grp, 147 + u8 func_idx, bool need_switch); 148 + 149 + static int switch_main_dev(struct hns_roce_bond_group *bond_grp, 150 + u8 main_func_idx) 151 + { 152 + struct hns_roce_dev *hr_dev; 153 + struct net_device *net_dev; 154 + u8 i; 155 + 156 + bond_grp->main_hr_dev = NULL; 157 + hns_roce_bond_uninit_client(bond_grp, main_func_idx); 158 + 159 + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { 160 + net_dev = bond_grp->bond_func_info[i].net_dev; 161 + if ((bond_grp->slave_map & (1U << i)) && net_dev) { 162 + /* In case this slave is still being registered as 163 + * a non-bonded PF, uninit it first and then re-init 164 + * it as the main device. 165 + */ 166 + hns_roce_slave_uninit(bond_grp, i); 167 + hr_dev = hns_roce_slave_init(bond_grp, i, false); 168 + if (hr_dev) { 169 + bond_grp->main_hr_dev = hr_dev; 170 + break; 171 + } 172 + } 173 + } 174 + 175 + if (!bond_grp->main_hr_dev) 176 + return -ENODEV; 177 + 178 + return 0; 179 + } 180 + 181 + static struct hns_roce_dev 182 + *hns_roce_slave_init(struct hns_roce_bond_group *bond_grp, 183 + u8 func_idx, bool need_switch) 184 + { 185 + struct hns_roce_dev *hr_dev = NULL; 186 + struct hnae3_handle *handle; 187 + u8 main_func_idx; 188 + int ret; 189 + 190 + if (need_switch) { 191 + main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn); 192 + if (func_idx == main_func_idx) { 193 + ret = switch_main_dev(bond_grp, main_func_idx); 194 + if (ret == -ENODEV) 195 + return NULL; 196 + } 197 + } 198 + 199 + handle = bond_grp->bond_func_info[func_idx].handle; 200 + if (handle) { 201 + if (handle->priv) 202 + return handle->priv; 203 + /* Prevent this device from being initialized as a bond device */ 204 + if (need_switch) 205 + bond_grp->bond_func_info[func_idx].net_dev = NULL; 206 + hr_dev = hns_roce_bond_init_client(bond_grp, func_idx); 207 + if (!hr_dev) 208 + BOND_ERR_LOG("failed to init slave %u.\n", func_idx); 209 + } 210 + 211 + return hr_dev; 76 212 } 77 213 78 214 static struct hns_roce_die_info *alloc_die_info(int bus_num) ··· 340 202 bond_grp->main_hr_dev = hr_dev; 341 203 bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; 342 204 bond_grp->bond_ready = false; 205 + } 206 + 207 + static void hns_roce_detach_bond_grp(struct hns_roce_bond_group *bond_grp) 208 + { 209 + mutex_lock(&bond_grp->bond_mutex); 210 + 211 + bond_grp->upper_dev = NULL; 212 + bond_grp->main_hr_dev = NULL; 213 + bond_grp->bond_ready = false; 214 + bond_grp->bond_state = HNS_ROCE_BOND_NOT_ATTACHED; 215 + bond_grp->slave_map = 0; 216 + memset(bond_grp->bond_func_info, 0, sizeof(bond_grp->bond_func_info)); 217 + 218 + mutex_unlock(&bond_grp->bond_mutex); 219 + } 220 + 221 + void hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp) 222 + { 223 + int ret; 224 + 225 + ret = bond_grp->main_hr_dev ? 226 + hns_roce_cmd_bond(bond_grp, HNS_ROCE_CLEAR_BOND) : -EIO; 227 + if (ret) 228 + BOND_ERR_LOG("failed to clear RoCE bond, ret = %d.\n", ret); 229 + else 230 + ibdev_info(&bond_grp->main_hr_dev->ib_dev, 231 + "RoCE clear bond finished!\n"); 232 + 233 + hns_roce_detach_bond_grp(bond_grp); 343 234 } 344 235 345 236 static bool lowerstate_event_filter(struct hns_roce_bond_group *bond_grp, ··· 670 503 kvfree(bond_grp); 671 504 } 672 505 } 506 + } 507 + 508 + int hns_roce_bond_init(struct hns_roce_dev *hr_dev) 509 + { 510 + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); 511 + struct hns_roce_bond_group *bond_grp; 512 + u8 bus_num = get_hr_bus_num(hr_dev); 513 + 514 + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); 515 + 516 + return hns_roce_set_bond_netdev(bond_grp, hr_dev); 673 517 }
+6
drivers/infiniband/hw/hns/hns_roce_bond.h
··· 14 14 15 15 #define BOND_ID(id) BIT(id) 16 16 17 + #define BOND_ERR_LOG(fmt, ...) \ 18 + pr_err("HNS RoCE Bonding: " fmt, ##__VA_ARGS__) 19 + 17 20 enum { 18 21 BOND_MODE_1, 19 22 BOND_MODE_2_4, ··· 83 80 u8 bus_num); 84 81 int hns_roce_alloc_bond_grp(struct hns_roce_dev *hr_dev); 85 82 void hns_roce_dealloc_bond_grp(void); 83 + void hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp); 84 + bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev); 85 + int hns_roce_bond_init(struct hns_roce_dev *hr_dev); 86 86 87 87 #endif
+2 -1
drivers/infiniband/hw/hns/hns_roce_device.h
··· 179 179 HNS_ROCE_STATE_INIT, 180 180 HNS_ROCE_STATE_INITED, 181 181 HNS_ROCE_STATE_UNINIT, 182 + HNS_ROCE_STATE_BOND_UNINIT, 182 183 }; 183 184 184 185 enum { ··· 1305 1304 void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type); 1306 1305 void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); 1307 1306 int hns_roce_init(struct hns_roce_dev *hr_dev); 1308 - void hns_roce_exit(struct hns_roce_dev *hr_dev); 1307 + void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup); 1309 1308 int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq); 1310 1309 int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq); 1311 1310 int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp);
+37 -4
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
··· 7141 7141 } 7142 7142 7143 7143 static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, 7144 - bool reset) 7144 + bool reset, bool bond_cleanup) 7145 7145 { 7146 7146 struct hns_roce_dev *hr_dev = handle->priv; 7147 7147 ··· 7153 7153 hr_dev->state = HNS_ROCE_DEVICE_STATE_UNINIT; 7154 7154 hns_roce_handle_device_err(hr_dev); 7155 7155 7156 - hns_roce_exit(hr_dev); 7156 + hns_roce_exit(hr_dev, bond_cleanup); 7157 7157 kfree(hr_dev->priv); 7158 7158 ib_dealloc_device(&hr_dev->ib_dev); 7159 7159 } ··· 7209 7209 7210 7210 handle->rinfo.instance_state = HNS_ROCE_STATE_UNINIT; 7211 7211 7212 - __hns_roce_hw_v2_uninit_instance(handle, reset); 7212 + __hns_roce_hw_v2_uninit_instance(handle, reset, true); 7213 + 7214 + handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; 7215 + } 7216 + 7217 + struct hns_roce_dev 7218 + *hns_roce_bond_init_client(struct hns_roce_bond_group *bond_grp, 7219 + int func_idx) 7220 + { 7221 + struct hnae3_handle *handle; 7222 + int ret; 7223 + 7224 + handle = bond_grp->bond_func_info[func_idx].handle; 7225 + if (!handle || !handle->client) 7226 + return NULL; 7227 + 7228 + ret = hns_roce_hw_v2_init_instance(handle); 7229 + if (ret) 7230 + return NULL; 7231 + 7232 + return handle->priv; 7233 + } 7234 + 7235 + void hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp, 7236 + int func_idx) 7237 + { 7238 + struct hnae3_handle *handle = bond_grp->bond_func_info[func_idx].handle; 7239 + 7240 + if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) 7241 + return; 7242 + 7243 + handle->rinfo.instance_state = HNS_ROCE_STATE_BOND_UNINIT; 7244 + 7245 + __hns_roce_hw_v2_uninit_instance(handle, false, false); 7213 7246 7214 7247 handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; 7215 7248 } ··· 7311 7278 handle->rinfo.reset_state = HNS_ROCE_STATE_RST_UNINIT; 7312 7279 dev_info(&handle->pdev->dev, "In reset process RoCE client uninit.\n"); 7313 7280 msleep(HNS_ROCE_V2_HW_RST_UNINT_DELAY); 7314 - __hns_roce_hw_v2_uninit_instance(handle, false); 7281 + __hns_roce_hw_v2_uninit_instance(handle, false, false); 7315 7282 7316 7283 return 0; 7317 7284 }
+5
drivers/infiniband/hw/hns/hns_roce_hw_v2.h
··· 1478 1478 __le32 hash_policy; 1479 1479 }; 1480 1480 1481 + struct hns_roce_dev 1482 + *hns_roce_bond_init_client(struct hns_roce_bond_group *bond_grp, 1483 + int func_idx); 1484 + void hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp, 1485 + int func_idx); 1481 1486 int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); 1482 1487 int hns_roce_cmd_bond(struct hns_roce_bond_group *bond_grp, 1483 1488 enum hns_roce_bond_cmd_type bond_type);
+54 -12
drivers/infiniband/hw/hns/hns_roce_main.c
··· 614 614 return num_counters; 615 615 } 616 616 617 - static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) 617 + static void 618 + hns_roce_unregister_bond_cleanup(struct hns_roce_dev *hr_dev, 619 + struct hns_roce_bond_group *bond_grp) 618 620 { 621 + struct net_device *net_dev; 622 + int i; 623 + 624 + /* To avoid the loss of other slave devices when main_hr_dev 625 + * is unregistered, re-initialize the remaining slaves before 626 + * the bond resources cleanup. 627 + */ 628 + bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; 629 + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { 630 + net_dev = bond_grp->bond_func_info[i].net_dev; 631 + if (net_dev && net_dev != get_hr_netdev(hr_dev, 0)) 632 + hns_roce_bond_init_client(bond_grp, i); 633 + } 634 + 635 + hns_roce_cleanup_bond(bond_grp); 636 + } 637 + 638 + static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev, 639 + bool bond_cleanup) 640 + { 641 + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); 619 642 struct hns_roce_ib_iboe *iboe = &hr_dev->iboe; 643 + struct hns_roce_bond_group *bond_grp; 644 + u8 bus_num = get_hr_bus_num(hr_dev); 645 + 646 + if (bond_cleanup && hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) { 647 + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); 648 + if (bond_grp) 649 + hns_roce_unregister_bond_cleanup(hr_dev, bond_grp); 650 + } 620 651 621 652 hr_dev->active = false; 622 653 unregister_netdevice_notifier(&iboe->nb); ··· 777 746 ib_set_device_ops(ib_dev, &hns_roce_dev_ops); 778 747 ib_set_device_ops(ib_dev, &hns_roce_dev_restrack_ops); 779 748 749 + dma_set_max_seg_size(dev, SZ_2G); 750 + 780 751 if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) { 781 752 ret = hns_roce_alloc_bond_grp(hr_dev); 782 753 if (ret) { ··· 788 755 } 789 756 } 790 757 791 - for (i = 0; i < hr_dev->caps.num_ports; i++) { 792 - net_dev = get_hr_netdev(hr_dev, i); 793 - if (!net_dev) 794 - continue; 795 - 796 - ret = ib_device_set_netdev(ib_dev, net_dev, i + 1); 797 - if (ret) 758 + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND && 759 + hns_roce_bond_is_active(hr_dev)) { 760 + ret = hns_roce_bond_init(hr_dev); 761 + if (ret) { 762 + dev_err(dev, "failed to init bond!\n"); 798 763 return ret; 764 + } 765 + ret = ib_register_device(ib_dev, "hns_bond_%d", dev); 766 + } else { 767 + for (i = 0; i < hr_dev->caps.num_ports; i++) { 768 + net_dev = get_hr_netdev(hr_dev, i); 769 + if (!net_dev) 770 + continue; 771 + 772 + ret = ib_device_set_netdev(ib_dev, net_dev, i + 1); 773 + if (ret) 774 + return ret; 775 + } 776 + ret = ib_register_device(ib_dev, "hns_%d", dev); 799 777 } 800 - dma_set_max_seg_size(dev, SZ_2G); 801 - ret = ib_register_device(ib_dev, "hns_%d", dev); 802 778 if (ret) { 803 779 dev_err(dev, "ib_register_device failed!\n"); 804 780 return ret; ··· 1207 1165 return ret; 1208 1166 } 1209 1167 1210 - void hns_roce_exit(struct hns_roce_dev *hr_dev) 1168 + void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup) 1211 1169 { 1212 1170 hns_roce_unregister_debugfs(hr_dev); 1213 - hns_roce_unregister_device(hr_dev); 1171 + hns_roce_unregister_device(hr_dev, bond_cleanup); 1214 1172 1215 1173 if (hr_dev->hw->hw_exit) 1216 1174 hr_dev->hw->hw_exit(hr_dev);