Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

RDMA/hns: Add delayed work for bonding

When conditions are met, schedule a delayed work in bond event handler
to perform bonding operation according to the bond state. In the case
of changing slave number or link state, re-set the netdev for the bond
ibdev after the modification is complete, since these two operations
may not call hns_roce_set_bond_netdev() in hns_roce_init().

The delayed work will be paused when there is a driver reset or exit
to avoid concurrency.

Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
Link: https://patch.msgid.link/20251112093510.3696363-7-huangjunxian6@hisilicon.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>

authored by

Junxian Huang and committed by
Leon Romanovsky
5d91677b d9023e46

+325 -1
+308
drivers/infiniband/hw/hns/hns_roce_bond.c
··· 3 3 * Copyright (c) 2025 Hisilicon Limited. 4 4 */ 5 5 6 + #include <net/lag.h> 6 7 #include <net/bonding.h> 7 8 #include "hns_roce_device.h" 8 9 #include "hns_roce_hw_v2.h" ··· 134 133 return false; 135 134 } 136 135 136 + static void hns_roce_bond_get_active_slave(struct hns_roce_bond_group *bond_grp) 137 + { 138 + struct net_device *net_dev; 139 + u32 active_slave_map = 0; 140 + u8 active_slave_num = 0; 141 + bool active; 142 + u8 i; 143 + 144 + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { 145 + net_dev = bond_grp->bond_func_info[i].net_dev; 146 + if (!net_dev || !(bond_grp->slave_map & (1U << i))) 147 + continue; 148 + 149 + active = (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) ? 150 + net_lag_port_dev_txable(net_dev) : 151 + (ib_get_curr_port_state(net_dev) == IB_PORT_ACTIVE); 152 + if (active) { 153 + active_slave_num++; 154 + active_slave_map |= (1U << i); 155 + } 156 + } 157 + 158 + bond_grp->active_slave_num = active_slave_num; 159 + bond_grp->active_slave_map = active_slave_map; 160 + } 161 + 137 162 static void hns_roce_slave_uninit(struct hns_roce_bond_group *bond_grp, 138 163 u8 func_idx) 139 164 { ··· 254 227 return NULL; 255 228 } 256 229 230 + mutex_init(&die_info->die_mutex); 231 + 257 232 return die_info; 258 233 } 259 234 260 235 static void dealloc_die_info(struct hns_roce_die_info *die_info, u8 bus_num) 261 236 { 237 + mutex_destroy(&die_info->die_mutex); 262 238 xa_erase(&roce_bond_xa, bus_num); 263 239 kfree(die_info); 264 240 } ··· 308 278 dealloc_die_info(die_info, bus_num); 309 279 310 280 return 0; 281 + } 282 + 283 + static void hns_roce_set_bond(struct hns_roce_bond_group *bond_grp) 284 + { 285 + struct hns_roce_dev *hr_dev; 286 + int ret; 287 + int i; 288 + 289 + for (i = ROCE_BOND_FUNC_MAX - 1; i >= 0; i--) { 290 + if (bond_grp->slave_map & (1 << i)) 291 + hns_roce_slave_uninit(bond_grp, i); 292 + } 293 + 294 + mutex_lock(&bond_grp->bond_mutex); 295 + bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; 296 + mutex_unlock(&bond_grp->bond_mutex); 297 + bond_grp->main_hr_dev = NULL; 298 + 299 + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { 300 + if (bond_grp->slave_map & (1 << i)) { 301 + hr_dev = hns_roce_slave_init(bond_grp, i, false); 302 + if (hr_dev) { 303 + bond_grp->main_hr_dev = hr_dev; 304 + break; 305 + } 306 + } 307 + } 308 + 309 + if (!bond_grp->main_hr_dev) { 310 + ret = -ENODEV; 311 + goto out; 312 + } 313 + 314 + hns_roce_bond_get_active_slave(bond_grp); 315 + 316 + ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_SET_BOND); 317 + 318 + out: 319 + if (ret) { 320 + BOND_ERR_LOG("failed to set RoCE bond, ret = %d.\n", ret); 321 + hns_roce_cleanup_bond(bond_grp); 322 + } else { 323 + ibdev_info(&bond_grp->main_hr_dev->ib_dev, 324 + "RoCE set bond finished!\n"); 325 + } 326 + } 327 + 328 + static void hns_roce_clear_bond(struct hns_roce_bond_group *bond_grp) 329 + { 330 + u8 main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn); 331 + struct hns_roce_dev *hr_dev; 332 + u8 i; 333 + 334 + if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_BONDED) 335 + goto out; 336 + 337 + bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; 338 + bond_grp->main_hr_dev = NULL; 339 + 340 + hns_roce_slave_uninit(bond_grp, main_func_idx); 341 + 342 + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { 343 + hr_dev = hns_roce_slave_init(bond_grp, i, false); 344 + if (hr_dev) 345 + bond_grp->main_hr_dev = hr_dev; 346 + } 347 + 348 + out: 349 + hns_roce_cleanup_bond(bond_grp); 350 + } 351 + 352 + static void hns_roce_slave_changestate(struct hns_roce_bond_group *bond_grp) 353 + { 354 + int ret; 355 + 356 + hns_roce_bond_get_active_slave(bond_grp); 357 + 358 + ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND); 359 + 360 + mutex_lock(&bond_grp->bond_mutex); 361 + if (bond_grp->bond_state == HNS_ROCE_BOND_SLAVE_CHANGESTATE) 362 + bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; 363 + mutex_unlock(&bond_grp->bond_mutex); 364 + 365 + if (ret) 366 + ibdev_err(&bond_grp->main_hr_dev->ib_dev, 367 + "failed to change RoCE bond slave state, ret = %d.\n", 368 + ret); 369 + else 370 + ibdev_info(&bond_grp->main_hr_dev->ib_dev, 371 + "RoCE slave changestate finished!\n"); 372 + } 373 + 374 + static void hns_roce_slave_change_num(struct hns_roce_bond_group *bond_grp) 375 + { 376 + int ret; 377 + u8 i; 378 + 379 + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { 380 + if (bond_grp->slave_map & (1U << i)) { 381 + if (i == PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn)) 382 + continue; 383 + hns_roce_slave_uninit(bond_grp, i); 384 + } else { 385 + hns_roce_slave_init(bond_grp, i, true); 386 + if (!bond_grp->main_hr_dev) { 387 + ret = -ENODEV; 388 + goto out; 389 + } 390 + bond_grp->bond_func_info[i].net_dev = NULL; 391 + bond_grp->bond_func_info[i].handle = NULL; 392 + } 393 + } 394 + 395 + hns_roce_bond_get_active_slave(bond_grp); 396 + 397 + ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND); 398 + 399 + out: 400 + if (ret) { 401 + BOND_ERR_LOG("failed to change RoCE bond slave num, ret = %d.\n", ret); 402 + hns_roce_cleanup_bond(bond_grp); 403 + } else { 404 + mutex_lock(&bond_grp->bond_mutex); 405 + if (bond_grp->bond_state == HNS_ROCE_BOND_SLAVE_CHANGE_NUM) 406 + bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; 407 + mutex_unlock(&bond_grp->bond_mutex); 408 + ibdev_info(&bond_grp->main_hr_dev->ib_dev, 409 + "RoCE slave change num finished!\n"); 410 + } 411 + } 412 + 413 + static void hns_roce_bond_info_update_nolock(struct hns_roce_bond_group *bond_grp, 414 + struct net_device *upper_dev) 415 + { 416 + struct hns_roce_v2_priv *priv; 417 + struct hns_roce_dev *hr_dev; 418 + struct net_device *net_dev; 419 + int func_idx; 420 + 421 + bond_grp->slave_map = 0; 422 + rcu_read_lock(); 423 + for_each_netdev_in_bond_rcu(upper_dev, net_dev) { 424 + func_idx = get_netdev_bond_slave_id(net_dev, bond_grp); 425 + if (func_idx < 0) { 426 + hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); 427 + if (!hr_dev) 428 + continue; 429 + func_idx = PCI_FUNC(hr_dev->pci_dev->devfn); 430 + if (!bond_grp->bond_func_info[func_idx].net_dev) { 431 + priv = hr_dev->priv; 432 + bond_grp->bond_func_info[func_idx].net_dev = 433 + net_dev; 434 + bond_grp->bond_func_info[func_idx].handle = 435 + priv->handle; 436 + } 437 + ib_device_put(&hr_dev->ib_dev); 438 + } 439 + 440 + bond_grp->slave_map |= (1 << func_idx); 441 + } 442 + rcu_read_unlock(); 311 443 } 312 444 313 445 static bool is_dev_bond_supported(struct hns_roce_bond_group *bond_grp, ··· 524 332 return (slave_num > 1 && slave_num <= ROCE_BOND_FUNC_MAX); 525 333 } 526 334 335 + static void hns_roce_bond_work(struct work_struct *work) 336 + { 337 + struct delayed_work *delayed_work = to_delayed_work(work); 338 + struct hns_roce_bond_group *bond_grp = 339 + container_of(delayed_work, struct hns_roce_bond_group, 340 + bond_work); 341 + enum hns_roce_bond_state bond_state; 342 + bool bond_ready; 343 + 344 + mutex_lock(&bond_grp->bond_mutex); 345 + bond_ready = check_slave_support(bond_grp, bond_grp->upper_dev); 346 + hns_roce_bond_info_update_nolock(bond_grp, bond_grp->upper_dev); 347 + bond_state = bond_grp->bond_state; 348 + bond_grp->bond_ready = bond_ready; 349 + mutex_unlock(&bond_grp->bond_mutex); 350 + 351 + ibdev_info(&bond_grp->main_hr_dev->ib_dev, 352 + "bond work: bond_ready - %d, bond_state - %d.\n", 353 + bond_ready, bond_state); 354 + 355 + if (!bond_ready) { 356 + hns_roce_clear_bond(bond_grp); 357 + return; 358 + } 359 + 360 + switch (bond_state) { 361 + case HNS_ROCE_BOND_NOT_BONDED: 362 + hns_roce_set_bond(bond_grp); 363 + /* In set_bond flow, we don't need to set bond netdev here as 364 + * it has been done when bond_grp->main_hr_dev is registered. 365 + */ 366 + return; 367 + case HNS_ROCE_BOND_SLAVE_CHANGESTATE: 368 + hns_roce_slave_changestate(bond_grp); 369 + break; 370 + case HNS_ROCE_BOND_SLAVE_CHANGE_NUM: 371 + hns_roce_slave_change_num(bond_grp); 372 + break; 373 + default: 374 + return; 375 + } 376 + hns_roce_set_bond_netdev(bond_grp, bond_grp->main_hr_dev); 377 + } 378 + 527 379 static void hns_roce_attach_bond_grp(struct hns_roce_bond_group *bond_grp, 528 380 struct hns_roce_dev *hr_dev, 529 381 struct net_device *upper_dev) ··· 582 346 { 583 347 mutex_lock(&bond_grp->bond_mutex); 584 348 349 + cancel_delayed_work(&bond_grp->bond_work); 585 350 bond_grp->upper_dev = NULL; 586 351 bond_grp->main_hr_dev = NULL; 587 352 bond_grp->bond_ready = false; ··· 828 591 if (event == NETDEV_CHANGELOWERSTATE) 829 592 changed = hns_roce_bond_lowerstate_event(bond_grp, ptr); 830 593 594 + if (changed) 595 + schedule_delayed_work(&bond_grp->bond_work, HZ); 596 + 831 597 return NOTIFY_DONE; 832 598 } 833 599 ··· 853 613 } 854 614 855 615 mutex_init(&bond_grp->bond_mutex); 616 + INIT_DELAYED_WORK(&bond_grp->bond_work, hns_roce_bond_work); 856 617 857 618 bond_grp->bond_ready = false; 858 619 bond_grp->bond_state = HNS_ROCE_BOND_NOT_ATTACHED; ··· 886 645 mem_err: 887 646 for (i--; i >= 0; i--) { 888 647 unregister_netdevice_notifier(&bgrps[i]->bond_nb); 648 + cancel_delayed_work_sync(&bgrps[i]->bond_work); 889 649 remove_bond_id(bgrps[i]->bus_num, bgrps[i]->bond_id); 890 650 mutex_destroy(&bgrps[i]->bond_mutex); 891 651 kvfree(bgrps[i]); ··· 907 665 if (!bond_grp) 908 666 continue; 909 667 unregister_netdevice_notifier(&bond_grp->bond_nb); 668 + cancel_delayed_work_sync(&bond_grp->bond_work); 910 669 remove_bond_id(bond_grp->bus_num, bond_grp->bond_id); 911 670 mutex_destroy(&bond_grp->bond_mutex); 912 671 kvfree(bond_grp); ··· 924 681 bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); 925 682 926 683 return hns_roce_set_bond_netdev(bond_grp, hr_dev); 684 + } 685 + 686 + void hns_roce_bond_suspend(struct hnae3_handle *handle) 687 + { 688 + u8 bus_num = handle->pdev->bus->number; 689 + struct hns_roce_bond_group *bond_grp; 690 + struct hns_roce_die_info *die_info; 691 + int i; 692 + 693 + die_info = xa_load(&roce_bond_xa, bus_num); 694 + if (!die_info) 695 + return; 696 + 697 + mutex_lock(&die_info->die_mutex); 698 + 699 + /* 700 + * Avoid duplicated processing when calling this function 701 + * multiple times. 702 + */ 703 + if (die_info->suspend_cnt) 704 + goto out; 705 + 706 + for (i = 0; i < ROCE_BOND_NUM_MAX; i++) { 707 + bond_grp = die_info->bgrps[i]; 708 + if (!bond_grp) 709 + continue; 710 + unregister_netdevice_notifier(&bond_grp->bond_nb); 711 + cancel_delayed_work_sync(&bond_grp->bond_work); 712 + } 713 + 714 + out: 715 + die_info->suspend_cnt++; 716 + mutex_unlock(&die_info->die_mutex); 717 + } 718 + 719 + void hns_roce_bond_resume(struct hnae3_handle *handle) 720 + { 721 + u8 bus_num = handle->pdev->bus->number; 722 + struct hns_roce_bond_group *bond_grp; 723 + struct hns_roce_die_info *die_info; 724 + int i, ret; 725 + 726 + die_info = xa_load(&roce_bond_xa, bus_num); 727 + if (!die_info) 728 + return; 729 + 730 + mutex_lock(&die_info->die_mutex); 731 + 732 + die_info->suspend_cnt--; 733 + if (die_info->suspend_cnt) 734 + goto out; 735 + 736 + for (i = 0; i < ROCE_BOND_NUM_MAX; i++) { 737 + bond_grp = die_info->bgrps[i]; 738 + if (!bond_grp) 739 + continue; 740 + ret = register_netdevice_notifier(&bond_grp->bond_nb); 741 + if (ret) 742 + dev_err(&handle->pdev->dev, 743 + "failed to resume bond notifier(bus_num = %u, id = %u), ret = %d.\n", 744 + bus_num, bond_grp->bond_id, ret); 745 + } 746 + 747 + out: 748 + mutex_unlock(&die_info->die_mutex); 927 749 }
+5
drivers/infiniband/hw/hns/hns_roce_bond.h
··· 72 72 enum netdev_lag_hash hash_type; 73 73 struct mutex bond_mutex; 74 74 struct notifier_block bond_nb; 75 + struct delayed_work bond_work; 75 76 }; 76 77 77 78 struct hns_roce_die_info { 78 79 u8 bond_id_mask; 79 80 struct hns_roce_bond_group *bgrps[ROCE_BOND_NUM_MAX]; 81 + struct mutex die_mutex; 82 + u8 suspend_cnt; 80 83 }; 81 84 82 85 struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev, ··· 89 86 void hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp); 90 87 bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev); 91 88 int hns_roce_bond_init(struct hns_roce_dev *hr_dev); 89 + void hns_roce_bond_suspend(struct hnae3_handle *handle); 90 + void hns_roce_bond_resume(struct hnae3_handle *handle); 92 91 93 92 #endif
+12 -1
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
··· 7204 7204 static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, 7205 7205 bool reset) 7206 7206 { 7207 + /* Suspend bond to avoid concurrency */ 7208 + hns_roce_bond_suspend(handle); 7209 + 7207 7210 if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) 7208 - return; 7211 + goto out; 7209 7212 7210 7213 handle->rinfo.instance_state = HNS_ROCE_STATE_UNINIT; 7211 7214 7212 7215 __hns_roce_hw_v2_uninit_instance(handle, reset, true); 7213 7216 7214 7217 handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; 7218 + 7219 + out: 7220 + hns_roce_bond_resume(handle); 7215 7221 } 7216 7222 7217 7223 struct hns_roce_dev ··· 7257 7251 { 7258 7252 struct hns_roce_dev *hr_dev; 7259 7253 7254 + /* Suspend bond to avoid concurrency */ 7255 + hns_roce_bond_suspend(handle); 7256 + 7260 7257 if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) { 7261 7258 set_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state); 7262 7259 return 0; ··· 7290 7281 if (test_and_clear_bit(HNS_ROCE_RST_DIRECT_RETURN, 7291 7282 &handle->rinfo.state)) { 7292 7283 handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INITED; 7284 + hns_roce_bond_resume(handle); 7293 7285 return 0; 7294 7286 } 7295 7287 ··· 7310 7300 dev_info(dev, "reset done, RoCE client reinit finished.\n"); 7311 7301 } 7312 7302 7303 + hns_roce_bond_resume(handle); 7313 7304 return ret; 7314 7305 } 7315 7306