Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mlx4: Move the bond work to the core driver

Function mlx4_en_queue_bond_work() is used in mlx4_en to start a bond
reconfiguration. It gathers data about a new port map setting, takes
a reference on the netdev that triggered the change and queues a work
object on mlx4_en_priv.mdev.workqueue to perform the operation. The
scheduled work is mlx4_en_bond_work() which calls
mlx4_bond()/mlx4_unbond() and consequently mlx4_do_bond().

At the same time, function mlx4_change_port_types() in mlx4_core might
be invoked to change the port type configuration. As part of its logic,
it re-registers the whole device by calling mlx4_unregister_device(),
followed by mlx4_register_device().

The two operations can result in concurrent access to the data about
currently active interfaces on the device.

Functions mlx4_register_device() and mlx4_unregister_device() lock the
intf_mutex to gain exclusive access to this data. The current
implementation of mlx4_do_bond() doesn't do that which could result in
an unexpected behavior. An updated version of mlx4_do_bond() for use
with an auxiliary bus goes and locks the intf_mutex when accessing a new
auxiliary device array.

However, doing so can then result in the following deadlock:
* A two-port mlx4 device is configured as an Ethernet bond.
* One of the ports is changed from eth to ib, for instance, by writing
into a mlx4_port<x> sysfs attribute file.
* mlx4_change_port_types() is called to update port types. It invokes
mlx4_unregister_device() to unregister the device which locks the
intf_mutex and starts removing all associated interfaces.
* Function mlx4_en_remove() gets invoked and starts destroying its first
netdev. This triggers mlx4_en_netdev_event() which recognizes that the
configured bond is broken. It runs mlx4_en_queue_bond_work() which
takes a reference on the netdev. Removing the netdev now cannot
proceed until the work is completed.
* Work function mlx4_en_bond_work() gets scheduled. It calls
mlx4_unbond() -> mlx4_do_bond(). The latter function tries to lock the
intf_mutex but that is not possible because it is held already by
mlx4_unregister_device().

This particular case could be possibly solved by unregistering the
mlx4_en_netdev_event() notifier in mlx4_en_remove() earlier, but it
seems better to decouple mlx4_en more and break this reference order.

Avoid then this scenario by recognizing that the bond reconfiguration
operates only on a mlx4_dev. The logic to queue and execute the bond
work can be moved into the mlx4_core driver. Only a reference on the
respective mlx4_dev object is needed to be taken during the work's
lifetime. This removes a call from mlx4_en that can directly result in
needing to lock the intf_mutex, it remains a privilege of the core
driver.

Signed-off-by: Petr Pavlu <petr.pavlu@suse.com>
Tested-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Acked-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Petr Pavlu and committed by
David S. Miller
e2fb47d4 13f85711

+77 -87
+1 -61
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
··· 2894 2894 .xmo_rx_hash = mlx4_en_xdp_rx_hash, 2895 2895 }; 2896 2896 2897 - struct mlx4_en_bond { 2898 - struct work_struct work; 2899 - struct mlx4_en_priv *priv; 2900 - int is_bonded; 2901 - struct mlx4_port_map port_map; 2902 - }; 2903 - 2904 - static void mlx4_en_bond_work(struct work_struct *work) 2905 - { 2906 - struct mlx4_en_bond *bond = container_of(work, 2907 - struct mlx4_en_bond, 2908 - work); 2909 - int err = 0; 2910 - struct mlx4_dev *dev = bond->priv->mdev->dev; 2911 - 2912 - if (bond->is_bonded) { 2913 - if (!mlx4_is_bonded(dev)) { 2914 - err = mlx4_bond(dev); 2915 - if (err) 2916 - en_err(bond->priv, "Fail to bond device\n"); 2917 - } 2918 - if (!err) { 2919 - err = mlx4_port_map_set(dev, &bond->port_map); 2920 - if (err) 2921 - en_err(bond->priv, "Fail to set port map [%d][%d]: %d\n", 2922 - bond->port_map.port1, 2923 - bond->port_map.port2, 2924 - err); 2925 - } 2926 - } else if (mlx4_is_bonded(dev)) { 2927 - err = mlx4_unbond(dev); 2928 - if (err) 2929 - en_err(bond->priv, "Fail to unbond device\n"); 2930 - } 2931 - dev_put(bond->priv->dev); 2932 - kfree(bond); 2933 - } 2934 - 2935 - static int mlx4_en_queue_bond_work(struct mlx4_en_priv *priv, int is_bonded, 2936 - u8 v2p_p1, u8 v2p_p2) 2937 - { 2938 - struct mlx4_en_bond *bond; 2939 - 2940 - bond = kzalloc(sizeof(*bond), GFP_ATOMIC); 2941 - if (!bond) 2942 - return -ENOMEM; 2943 - 2944 - INIT_WORK(&bond->work, mlx4_en_bond_work); 2945 - bond->priv = priv; 2946 - bond->is_bonded = is_bonded; 2947 - bond->port_map.port1 = v2p_p1; 2948 - bond->port_map.port2 = v2p_p2; 2949 - dev_hold(priv->dev); 2950 - queue_work(priv->mdev->workqueue, &bond->work); 2951 - return 0; 2952 - } 2953 - 2954 2897 int mlx4_en_netdev_event(struct notifier_block *this, 2955 2898 unsigned long event, void *ptr) 2956 2899 { ··· 2903 2960 struct mlx4_dev *dev; 2904 2961 int i, num_eth_ports = 0; 2905 2962 bool do_bond = true; 2906 - struct mlx4_en_priv *priv; 2907 2963 u8 v2p_port1 = 0; 2908 2964 u8 v2p_port2 = 0; 2909 2965 ··· 2937 2995 if ((do_bond && (event != NETDEV_BONDING_INFO)) || !port) 2938 2996 return NOTIFY_DONE; 2939 2997 2940 - priv = netdev_priv(ndev); 2941 2998 if (do_bond) { 2942 2999 struct netdev_notifier_bonding_info *notifier_info = ptr; 2943 3000 struct netdev_bonding_info *bonding_info = ··· 3003 3062 } 3004 3063 } 3005 3064 3006 - mlx4_en_queue_bond_work(priv, do_bond, 3007 - v2p_port1, v2p_port2); 3065 + mlx4_queue_bond_work(dev, do_bond, v2p_port1, v2p_port2); 3008 3066 3009 3067 return NOTIFY_DONE; 3010 3068 }
+58 -7
drivers/net/ethernet/mellanox/mlx4/main.c
··· 1441 1441 return ret; 1442 1442 } 1443 1443 1444 - int mlx4_bond(struct mlx4_dev *dev) 1444 + static int mlx4_bond(struct mlx4_dev *dev) 1445 1445 { 1446 1446 int ret = 0; 1447 1447 struct mlx4_priv *priv = mlx4_priv(dev); ··· 1467 1467 1468 1468 return ret; 1469 1469 } 1470 - EXPORT_SYMBOL_GPL(mlx4_bond); 1471 1470 1472 - int mlx4_unbond(struct mlx4_dev *dev) 1471 + static int mlx4_unbond(struct mlx4_dev *dev) 1473 1472 { 1474 1473 int ret = 0; 1475 1474 struct mlx4_priv *priv = mlx4_priv(dev); ··· 1495 1496 1496 1497 return ret; 1497 1498 } 1498 - EXPORT_SYMBOL_GPL(mlx4_unbond); 1499 1499 1500 - 1501 - int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p) 1500 + static int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p) 1502 1501 { 1503 1502 u8 port1 = v2p->port1; 1504 1503 u8 port2 = v2p->port2; ··· 1538 1541 mutex_unlock(&priv->bond_mutex); 1539 1542 return err; 1540 1543 } 1541 - EXPORT_SYMBOL_GPL(mlx4_port_map_set); 1544 + 1545 + struct mlx4_bond { 1546 + struct work_struct work; 1547 + struct mlx4_dev *dev; 1548 + int is_bonded; 1549 + struct mlx4_port_map port_map; 1550 + }; 1551 + 1552 + static void mlx4_bond_work(struct work_struct *work) 1553 + { 1554 + struct mlx4_bond *bond = container_of(work, struct mlx4_bond, work); 1555 + int err = 0; 1556 + 1557 + if (bond->is_bonded) { 1558 + if (!mlx4_is_bonded(bond->dev)) { 1559 + err = mlx4_bond(bond->dev); 1560 + if (err) 1561 + mlx4_err(bond->dev, "Fail to bond device\n"); 1562 + } 1563 + if (!err) { 1564 + err = mlx4_port_map_set(bond->dev, &bond->port_map); 1565 + if (err) 1566 + mlx4_err(bond->dev, 1567 + "Fail to set port map [%d][%d]: %d\n", 1568 + bond->port_map.port1, 1569 + bond->port_map.port2, err); 1570 + } 1571 + } else if (mlx4_is_bonded(bond->dev)) { 1572 + err = mlx4_unbond(bond->dev); 1573 + if (err) 1574 + mlx4_err(bond->dev, "Fail to unbond device\n"); 1575 + } 1576 + put_device(&bond->dev->persist->pdev->dev); 1577 + kfree(bond); 1578 + } 1579 + 1580 + int mlx4_queue_bond_work(struct mlx4_dev *dev, int is_bonded, u8 v2p_p1, 1581 + u8 v2p_p2) 1582 + { 1583 + struct mlx4_bond *bond; 1584 + 1585 + bond = kzalloc(sizeof(*bond), GFP_ATOMIC); 1586 + if (!bond) 1587 + return -ENOMEM; 1588 + 1589 + INIT_WORK(&bond->work, mlx4_bond_work); 1590 + get_device(&dev->persist->pdev->dev); 1591 + bond->dev = dev; 1592 + bond->is_bonded = is_bonded; 1593 + bond->port_map.port1 = v2p_p1; 1594 + bond->port_map.port2 = v2p_p2; 1595 + queue_work(mlx4_wq, &bond->work); 1596 + return 0; 1597 + } 1598 + EXPORT_SYMBOL(mlx4_queue_bond_work); 1542 1599 1543 1600 static int mlx4_load_fw(struct mlx4_dev *dev) 1544 1601 {
+5
drivers/net/ethernet/mellanox/mlx4/mlx4.h
··· 863 863 struct list_head steer_entries[MLX4_NUM_STEERS]; 864 864 }; 865 865 866 + struct mlx4_port_map { 867 + u8 port1; 868 + u8 port2; 869 + }; 870 + 866 871 enum { 867 872 MLX4_PCI_DEV_IS_VF = 1 << 0, 868 873 MLX4_PCI_DEV_FORCE_SENSE_PORT = 1 << 1,
+13
include/linux/mlx4/device.h
··· 1087 1087 (offset & (PAGE_SIZE - 1)); 1088 1088 } 1089 1089 1090 + static inline int mlx4_is_bonded(struct mlx4_dev *dev) 1091 + { 1092 + return !!(dev->flags & MLX4_FLAG_BONDED); 1093 + } 1094 + 1095 + static inline int mlx4_is_mf_bonded(struct mlx4_dev *dev) 1096 + { 1097 + return (mlx4_is_bonded(dev) && mlx4_is_mfunc(dev)); 1098 + } 1099 + 1100 + int mlx4_queue_bond_work(struct mlx4_dev *dev, int is_bonded, u8 v2p_p1, 1101 + u8 v2p_p2); 1102 + 1090 1103 int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn); 1091 1104 void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn); 1092 1105 int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn);
-19
include/linux/mlx4/driver.h
··· 66 66 int mlx4_register_interface(struct mlx4_interface *intf); 67 67 void mlx4_unregister_interface(struct mlx4_interface *intf); 68 68 69 - int mlx4_bond(struct mlx4_dev *dev); 70 - int mlx4_unbond(struct mlx4_dev *dev); 71 - static inline int mlx4_is_bonded(struct mlx4_dev *dev) 72 - { 73 - return !!(dev->flags & MLX4_FLAG_BONDED); 74 - } 75 - 76 - static inline int mlx4_is_mf_bonded(struct mlx4_dev *dev) 77 - { 78 - return (mlx4_is_bonded(dev) && mlx4_is_mfunc(dev)); 79 - } 80 - 81 - struct mlx4_port_map { 82 - u8 port1; 83 - u8 port2; 84 - }; 85 - 86 - int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p); 87 - 88 69 int mlx4_register_event_notifier(struct mlx4_dev *dev, 89 70 struct notifier_block *nb); 90 71 int mlx4_unregister_event_notifier(struct mlx4_dev *dev,