Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'net-mlx5-fixes-for-socket-direct'

Tariq Toukan says:

====================
net/mlx5: Fixes for Socket-Direct

This series fixes several race conditions and bugs in the mlx5
Socket-Direct (SD) single netdev flow.

Patch 1 serializes mlx5_sd_init()/mlx5_sd_cleanup() with
mlx5_devcom_comp_lock() and tracks the SD group state on the primary
device, preventing concurrent or duplicate bring-up/tear-down.

Patch 2 fixes the debugfs "multi-pf" directory being stored on the
calling device's sd struct instead of the primary's, which caused
memory leaks and recreation errors when cleanup ran from a different PF.

Patch 3 fixes a race where a secondary PF could access the primary's
auxiliary device after it had been unbound, by holding the primary's
device lock while operating on its auxiliary device.

Patch 4 fixes missing cleanup on ETH probe errors. The analogous gap on
the resume path requires introducing sd_suspend/resume APIs that only
destroy FW resources and is left for a follow-up series.
====================

Link: https://patch.msgid.link/20260504180206.268568-1-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+122 -20
+21 -5
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
··· 6774 6774 return err; 6775 6775 6776 6776 actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); 6777 - if (actual_adev) 6778 - return _mlx5e_resume(actual_adev); 6779 - return 0; 6777 + if (actual_adev) { 6778 + err = _mlx5e_resume(actual_adev); 6779 + mlx5_sd_put_adev(actual_adev, adev); 6780 + } 6781 + return err; 6780 6782 } 6781 6783 6782 6784 static int _mlx5e_suspend(struct auxiliary_device *adev, bool pre_netdev_reg) ··· 6817 6815 err = _mlx5e_suspend(actual_adev, false); 6818 6816 6819 6817 mlx5_sd_cleanup(mdev); 6818 + if (actual_adev) 6819 + mlx5_sd_put_adev(actual_adev, adev); 6820 6820 return err; 6821 6821 } 6822 6822 ··· 6916 6912 return err; 6917 6913 6918 6914 actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); 6919 - if (actual_adev) 6920 - return _mlx5e_probe(actual_adev); 6915 + if (actual_adev) { 6916 + err = _mlx5e_probe(actual_adev); 6917 + if (err) 6918 + goto sd_cleanup; 6919 + mlx5_sd_put_adev(actual_adev, adev); 6920 + } 6921 6921 return 0; 6922 + 6923 + sd_cleanup: 6924 + mlx5_sd_cleanup(mdev); 6925 + if (actual_adev) 6926 + mlx5_sd_put_adev(actual_adev, adev); 6927 + return err; 6922 6928 } 6923 6929 6924 6930 static void _mlx5e_remove(struct auxiliary_device *adev) ··· 6980 6966 _mlx5e_remove(actual_adev); 6981 6967 6982 6968 mlx5_sd_cleanup(mdev); 6969 + if (actual_adev) 6970 + mlx5_sd_put_adev(actual_adev, adev); 6983 6971 } 6984 6972 6985 6973 static const struct auxiliary_device_id mlx5e_id_table[] = {
+99 -15
drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
··· 18 18 u8 host_buses; 19 19 struct mlx5_devcom_comp_dev *devcom; 20 20 struct dentry *dfs; 21 + u8 state; 21 22 bool primary; 22 23 union { 23 24 struct { /* primary */ ··· 30 29 u32 alias_obj_id; 31 30 }; 32 31 }; 32 + }; 33 + 34 + enum mlx5_sd_state { 35 + MLX5_SD_STATE_DOWN = 0, 36 + MLX5_SD_STATE_UP, 33 37 }; 34 38 35 39 static int mlx5_sd_get_host_buses(struct mlx5_core_dev *dev) ··· 276 270 { 277 271 struct mlx5_sd *sd = mlx5_get_sd(dev); 278 272 279 - mlx5_devcom_comp_lock(sd->devcom); 280 - mlx5_devcom_comp_set_ready(sd->devcom, false); 281 - mlx5_devcom_comp_unlock(sd->devcom); 282 273 mlx5_devcom_unregister_component(sd->devcom); 283 274 } 284 275 ··· 429 426 struct mlx5_core_dev *primary, *pos, *to; 430 427 struct mlx5_sd *sd = mlx5_get_sd(dev); 431 428 u8 alias_key[ACCESS_KEY_LEN]; 429 + struct mlx5_sd *primary_sd; 432 430 int err, i; 433 431 434 432 err = sd_init(dev); ··· 444 440 if (err) 445 441 goto err_sd_cleanup; 446 442 443 + mlx5_devcom_comp_lock(sd->devcom); 447 444 if (!mlx5_devcom_comp_is_ready(sd->devcom)) 448 - return 0; 445 + goto out; 449 446 450 447 primary = mlx5_sd_get_primary(dev); 448 + if (!primary) 449 + goto out; 450 + 451 + primary_sd = mlx5_get_sd(primary); 452 + if (primary_sd->state != MLX5_SD_STATE_DOWN) 453 + goto out; 451 454 452 455 for (i = 0; i < ACCESS_KEY_LEN; i++) 453 456 alias_key[i] = get_random_u8(); ··· 463 452 if (err) 464 453 goto err_sd_unregister; 465 454 466 - sd->dfs = debugfs_create_dir("multi-pf", mlx5_debugfs_get_dev_root(primary)); 467 - debugfs_create_x32("group_id", 0400, sd->dfs, &sd->group_id); 468 - debugfs_create_file("primary", 0400, sd->dfs, primary, &dev_fops); 455 + primary_sd->dfs = 456 + debugfs_create_dir("multi-pf", 457 + mlx5_debugfs_get_dev_root(primary)); 458 + debugfs_create_x32("group_id", 0400, primary_sd->dfs, 459 + &primary_sd->group_id); 460 + debugfs_create_file("primary", 0400, primary_sd->dfs, primary, 461 + &dev_fops); 469 462 470 463 mlx5_sd_for_each_secondary(i, primary, pos) { 471 464 char name[32]; ··· 479 464 goto err_unset_secondaries; 480 465 481 466 snprintf(name, sizeof(name), "secondary_%d", i - 1); 482 - debugfs_create_file(name, 0400, sd->dfs, pos, &dev_fops); 467 + debugfs_create_file(name, 0400, primary_sd->dfs, pos, 468 + &dev_fops); 483 469 484 470 } 485 471 ··· 488 472 sd->group_id, mlx5_devcom_comp_get_size(sd->devcom)); 489 473 sd_print_group(primary); 490 474 475 + primary_sd->state = MLX5_SD_STATE_UP; 476 + out: 477 + mlx5_devcom_comp_unlock(sd->devcom); 491 478 return 0; 492 479 493 480 err_unset_secondaries: ··· 498 479 mlx5_sd_for_each_secondary_to(i, primary, to, pos) 499 480 sd_cmd_unset_secondary(pos); 500 481 sd_cmd_unset_primary(primary); 501 - debugfs_remove_recursive(sd->dfs); 482 + debugfs_remove_recursive(primary_sd->dfs); 483 + primary_sd->dfs = NULL; 502 484 err_sd_unregister: 485 + mlx5_sd_for_each_secondary(i, primary, pos) { 486 + struct mlx5_sd *peer_sd = mlx5_get_sd(pos); 487 + 488 + primary_sd->secondaries[i - 1] = NULL; 489 + peer_sd->primary_dev = NULL; 490 + } 491 + primary_sd->primary = false; 492 + mlx5_devcom_comp_set_ready(sd->devcom, false); 493 + mlx5_devcom_comp_unlock(sd->devcom); 503 494 sd_unregister(dev); 504 495 err_sd_cleanup: 505 496 sd_cleanup(dev); ··· 520 491 { 521 492 struct mlx5_sd *sd = mlx5_get_sd(dev); 522 493 struct mlx5_core_dev *primary, *pos; 494 + struct mlx5_sd *primary_sd; 523 495 int i; 524 496 525 497 if (!sd) 526 498 return; 527 499 500 + mlx5_devcom_comp_lock(sd->devcom); 528 501 if (!mlx5_devcom_comp_is_ready(sd->devcom)) 529 - goto out; 502 + goto out_unlock; 530 503 531 504 primary = mlx5_sd_get_primary(dev); 505 + if (!primary) 506 + goto out_ready_false; 507 + 508 + primary_sd = mlx5_get_sd(primary); 509 + if (primary_sd->state != MLX5_SD_STATE_UP) 510 + goto out_clear_peers; 511 + 532 512 mlx5_sd_for_each_secondary(i, primary, pos) 533 513 sd_cmd_unset_secondary(pos); 534 514 sd_cmd_unset_primary(primary); 535 - debugfs_remove_recursive(sd->dfs); 515 + debugfs_remove_recursive(primary_sd->dfs); 516 + primary_sd->dfs = NULL; 536 517 537 518 sd_info(primary, "group id %#x, uncombined\n", sd->group_id); 538 - out: 519 + primary_sd->state = MLX5_SD_STATE_DOWN; 520 + out_clear_peers: 521 + mlx5_sd_for_each_secondary(i, primary, pos) { 522 + struct mlx5_sd *peer_sd = mlx5_get_sd(pos); 523 + 524 + primary_sd->secondaries[i - 1] = NULL; 525 + peer_sd->primary_dev = NULL; 526 + } 527 + primary_sd->primary = false; 528 + out_ready_false: 529 + mlx5_devcom_comp_set_ready(sd->devcom, false); 530 + out_unlock: 531 + mlx5_devcom_comp_unlock(sd->devcom); 539 532 sd_unregister(dev); 540 533 sd_cleanup(dev); 541 534 } 542 535 536 + /* Lock order: 537 + * primary: actual_adev_lock -> SD devcom comp lock 538 + * secondary: SD devcom comp lock -> (drop) -> actual_adev_lock 539 + * The two locks are never held together, so no ABBA. 540 + */ 543 541 struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev, 544 542 struct auxiliary_device *adev, 545 543 int idx) 546 544 { 547 545 struct mlx5_sd *sd = mlx5_get_sd(dev); 548 546 struct mlx5_core_dev *primary; 547 + struct mlx5_adev *primary_adev; 549 548 550 549 if (!sd) 551 550 return adev; 552 551 553 - if (!mlx5_devcom_comp_is_ready(sd->devcom)) 552 + mlx5_devcom_comp_lock(sd->devcom); 553 + if (!mlx5_devcom_comp_is_ready(sd->devcom)) { 554 + mlx5_devcom_comp_unlock(sd->devcom); 554 555 return NULL; 556 + } 555 557 556 558 primary = mlx5_sd_get_primary(dev); 557 - if (dev == primary) 559 + if (!primary || dev == primary) { 560 + mlx5_devcom_comp_unlock(sd->devcom); 558 561 return adev; 562 + } 559 563 560 - return &primary->priv.adev[idx]->adev; 564 + primary_adev = primary->priv.adev[idx]; 565 + get_device(&primary_adev->adev.dev); 566 + mlx5_devcom_comp_unlock(sd->devcom); 567 + 568 + device_lock(&primary_adev->adev.dev); 569 + /* Primary may have completed remove between dropping devcom and 570 + * acquiring device_lock; recheck. 571 + */ 572 + if (!mlx5_devcom_comp_is_ready(sd->devcom)) { 573 + device_unlock(&primary_adev->adev.dev); 574 + put_device(&primary_adev->adev.dev); 575 + return NULL; 576 + } 577 + return &primary_adev->adev; 578 + } 579 + 580 + void mlx5_sd_put_adev(struct auxiliary_device *actual_adev, 581 + struct auxiliary_device *adev) 582 + { 583 + if (actual_adev != adev) { 584 + device_unlock(&actual_adev->dev); 585 + put_device(&actual_adev->dev); 586 + } 561 587 }
+2
drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h
··· 15 15 struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev, 16 16 struct auxiliary_device *adev, 17 17 int idx); 18 + void mlx5_sd_put_adev(struct auxiliary_device *actual_adev, 19 + struct auxiliary_device *adev); 18 20 19 21 int mlx5_sd_init(struct mlx5_core_dev *dev); 20 22 void mlx5_sd_cleanup(struct mlx5_core_dev *dev);