Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

net/mlx5: E-Switch, support eswitch inactive mode

Add support for eswitch switchdev inactive mode

Inactive mode: Drop all traffic going to FDB, Remove
mpfs l2 rules and disconnect adjacent vports.

Active mode: Traffic flows through FDB, mpfs table populated, and
adjacent vports are connected.

Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Adithya Jayachandran <ajayachandra@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Link: https://patch.msgid.link/20251108070404.1551708-4-saeed@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

authored by

Saeed Mahameed and committed by
Paolo Abeni
9da611df 9902b638

+214 -22
+4 -11
drivers/net/ethernet/mellanox/mlx5/core/esw/adj_vport.c
··· 4 4 #include "fs_core.h" 5 5 #include "eswitch.h" 6 6 7 - enum { 8 - MLX5_ADJ_VPORT_DISCONNECT = 0x0, 9 - MLX5_ADJ_VPORT_CONNECT = 0x1, 10 - }; 11 - 12 - static int mlx5_esw_adj_vport_modify(struct mlx5_core_dev *dev, 13 - u16 vport, bool connect) 7 + int mlx5_esw_adj_vport_modify(struct mlx5_core_dev *dev, u16 vport, 8 + bool connect) 14 9 { 15 10 u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {}; 16 11 ··· 19 24 MLX5_SET(modify_vport_state_in, in, egress_connect_valid, 1); 20 25 MLX5_SET(modify_vport_state_in, in, ingress_connect, connect); 21 26 MLX5_SET(modify_vport_state_in, in, egress_connect, connect); 22 - 27 + MLX5_SET(modify_vport_state_in, in, admin_state, connect); 23 28 return mlx5_cmd_exec_in(dev, modify_vport_state, in); 24 29 } 25 30 ··· 91 96 if (err) 92 97 goto acl_ns_remove; 93 98 94 - mlx5_esw_adj_vport_modify(esw->dev, vport_num, MLX5_ADJ_VPORT_CONNECT); 95 99 return 0; 96 100 97 101 acl_ns_remove: ··· 111 117 112 118 esw_debug(esw->dev, "Destroying adjacent vport %d for vhca_id 0x%x\n", 113 119 vport_num, vport->vhca_id); 114 - mlx5_esw_adj_vport_modify(esw->dev, vport_num, 115 - MLX5_ADJ_VPORT_DISCONNECT); 120 + 116 121 mlx5_esw_offloads_rep_remove(esw, vport); 117 122 mlx5_fs_vport_egress_acl_ns_remove(esw->dev->priv.steering, 118 123 vport->index);
+6
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
··· 264 264 265 265 struct offloads_fdb { 266 266 struct mlx5_flow_namespace *ns; 267 + struct mlx5_flow_table *drop_root; 268 + struct mlx5_flow_handle *drop_root_rule; 269 + struct mlx5_fc *drop_root_fc; 267 270 struct mlx5_flow_table *tc_miss_table; 268 271 struct mlx5_flow_table *slow_fdb; 269 272 struct mlx5_flow_group *send_to_vport_grp; ··· 395 392 struct mlx5_esw_offload offloads; 396 393 u32 last_vport_idx; 397 394 int mode; 395 + bool offloads_inactive; 398 396 u16 manager_vport; 399 397 u16 first_host_vport; 400 398 u8 num_peers; ··· 638 634 639 635 void mlx5_esw_adjacent_vhcas_setup(struct mlx5_eswitch *esw); 640 636 void mlx5_esw_adjacent_vhcas_cleanup(struct mlx5_eswitch *esw); 637 + int mlx5_esw_adj_vport_modify(struct mlx5_core_dev *dev, u16 vport, 638 + bool connect); 641 639 642 640 #define MLX5_DEBUG_ESWITCH_MASK BIT(3) 643 641
+197 -10
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
··· 1577 1577 attr.max_grp_num = esw->params.large_group_num; 1578 1578 attr.default_ft = miss_fdb; 1579 1579 attr.mapping = esw->offloads.reg_c0_obj_pool; 1580 + attr.fs_base_prio = FDB_BYPASS_PATH; 1580 1581 1581 1582 chains = mlx5_chains_create(dev, &attr); 1582 1583 if (IS_ERR(chains)) { ··· 2354 2353 esw->dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 2355 2354 mlx5_rescan_drivers_locked(esw->dev); 2356 2355 mlx5_devcom_comp_unlock(esw->dev->priv.hca_devcom_comp); 2356 + } 2357 + 2358 + static void mlx5_esw_fdb_drop_destroy(struct mlx5_eswitch *esw) 2359 + { 2360 + if (!esw->fdb_table.offloads.drop_root) 2361 + return; 2362 + 2363 + esw_debug(esw->dev, "Destroying FDB drop root table %#x fc %#x\n", 2364 + esw->fdb_table.offloads.drop_root->id, 2365 + esw->fdb_table.offloads.drop_root_fc->id); 2366 + mlx5_del_flow_rules(esw->fdb_table.offloads.drop_root_rule); 2367 + /* Don't free flow counter here, can be reused on a later activation */ 2368 + mlx5_destroy_flow_table(esw->fdb_table.offloads.drop_root); 2369 + esw->fdb_table.offloads.drop_root_rule = NULL; 2370 + esw->fdb_table.offloads.drop_root = NULL; 2371 + } 2372 + 2373 + static int mlx5_esw_fdb_drop_create(struct mlx5_eswitch *esw) 2374 + { 2375 + struct mlx5_flow_destination drop_fc_dst = {}; 2376 + struct mlx5_flow_table_attr ft_attr = {}; 2377 + struct mlx5_flow_destination *dst = NULL; 2378 + struct mlx5_core_dev *dev = esw->dev; 2379 + struct mlx5_flow_namespace *root_ns; 2380 + struct mlx5_flow_act flow_act = {}; 2381 + struct mlx5_flow_handle *flow_rule; 2382 + struct mlx5_flow_table *table; 2383 + int err = 0, dst_num = 0; 2384 + 2385 + if (esw->fdb_table.offloads.drop_root) 2386 + return 0; 2387 + 2388 + root_ns = esw->fdb_table.offloads.ns; 2389 + 2390 + ft_attr.prio = FDB_DROP_ROOT; 2391 + ft_attr.max_fte = 1; 2392 + ft_attr.autogroup.max_num_groups = 1; 2393 + table = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); 2394 + if (IS_ERR(table)) { 2395 + esw_warn(dev, "Failed to create fdb drop root table, err %pe\n", 2396 + table); 2397 + return PTR_ERR(table); 2398 + } 2399 + 2400 + /* Drop FC reusable, create once on first deactivation of FDB */ 2401 + if (!esw->fdb_table.offloads.drop_root_fc) { 2402 + struct mlx5_fc *counter = mlx5_fc_create(dev, 0); 2403 + 2404 + err = PTR_ERR_OR_ZERO(counter); 2405 + if (err) 2406 + esw_warn(esw->dev, "create fdb drop fc err %d\n", err); 2407 + else 2408 + esw->fdb_table.offloads.drop_root_fc = counter; 2409 + } 2410 + 2411 + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; 2412 + 2413 + if (esw->fdb_table.offloads.drop_root_fc) { 2414 + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 2415 + drop_fc_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; 2416 + drop_fc_dst.counter = esw->fdb_table.offloads.drop_root_fc; 2417 + dst = &drop_fc_dst; 2418 + dst_num++; 2419 + } 2420 + 2421 + flow_rule = mlx5_add_flow_rules(table, NULL, &flow_act, dst, dst_num); 2422 + err = PTR_ERR_OR_ZERO(flow_rule); 2423 + if (err) { 2424 + esw_warn(esw->dev, 2425 + "fs offloads: Failed to add vport rx drop rule err %d\n", 2426 + err); 2427 + goto err_flow_rule; 2428 + } 2429 + 2430 + esw->fdb_table.offloads.drop_root = table; 2431 + esw->fdb_table.offloads.drop_root_rule = flow_rule; 2432 + esw_debug(esw->dev, "Created FDB drop root table %#x fc %#x\n", 2433 + table->id, dst ? dst->counter->id : 0); 2434 + return 0; 2435 + 2436 + err_flow_rule: 2437 + /* no need to free drop fc, esw_offloads_steering_cleanup will do it */ 2438 + mlx5_destroy_flow_table(table); 2439 + return err; 2440 + } 2441 + 2442 + static void mlx5_esw_fdb_active(struct mlx5_eswitch *esw) 2443 + { 2444 + struct mlx5_vport *vport; 2445 + unsigned long i; 2446 + 2447 + mlx5_esw_fdb_drop_destroy(esw); 2448 + mlx5_mpfs_enable(esw->dev); 2449 + 2450 + mlx5_esw_for_each_vf_vport(esw, i, vport, U16_MAX) { 2451 + if (!vport->adjacent) 2452 + continue; 2453 + esw_debug(esw->dev, "Connecting vport %d to eswitch\n", 2454 + vport->vport); 2455 + mlx5_esw_adj_vport_modify(esw->dev, vport->vport, true); 2456 + } 2457 + 2458 + esw->offloads_inactive = false; 2459 + esw_warn(esw->dev, "MPFS/FDB active\n"); 2460 + } 2461 + 2462 + static void mlx5_esw_fdb_inactive(struct mlx5_eswitch *esw) 2463 + { 2464 + struct mlx5_vport *vport; 2465 + unsigned long i; 2466 + 2467 + mlx5_mpfs_disable(esw->dev); 2468 + mlx5_esw_fdb_drop_create(esw); 2469 + 2470 + mlx5_esw_for_each_vf_vport(esw, i, vport, U16_MAX) { 2471 + if (!vport->adjacent) 2472 + continue; 2473 + esw_debug(esw->dev, "Disconnecting vport %u from eswitch\n", 2474 + vport->vport); 2475 + 2476 + mlx5_esw_adj_vport_modify(esw->dev, vport->vport, false); 2477 + } 2478 + 2479 + esw->offloads_inactive = true; 2480 + esw_warn(esw->dev, "MPFS/FDB inactive\n"); 2357 2481 } 2358 2482 2359 2483 static int esw_offloads_start(struct mlx5_eswitch *esw, ··· 3564 3438 3565 3439 static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) 3566 3440 { 3441 + mlx5_esw_fdb_drop_destroy(esw); 3442 + if (esw->fdb_table.offloads.drop_root_fc) 3443 + mlx5_fc_destroy(esw->dev, esw->fdb_table.offloads.drop_root_fc); 3444 + esw->fdb_table.offloads.drop_root_fc = NULL; 3567 3445 esw_destroy_vport_rx_drop_rule(esw); 3568 3446 esw_destroy_vport_rx_drop_group(esw); 3569 3447 esw_destroy_vport_rx_group(esw); ··· 3730 3600 if (err) 3731 3601 goto err_steering_init; 3732 3602 3603 + if (esw->offloads_inactive) 3604 + mlx5_esw_fdb_inactive(esw); 3605 + else 3606 + mlx5_esw_fdb_active(esw); 3607 + 3733 3608 /* Representor will control the vport link state */ 3734 3609 mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) 3735 3610 vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN; ··· 3801 3666 esw_offloads_metadata_uninit(esw); 3802 3667 mlx5_rdma_disable_roce(esw->dev); 3803 3668 mlx5_esw_adjacent_vhcas_cleanup(esw); 3669 + /* must be done after vhcas cleanup to avoid adjacent vports connect */ 3670 + if (esw->offloads_inactive) 3671 + mlx5_esw_fdb_active(esw); /* legacy mode always active */ 3804 3672 mutex_destroy(&esw->offloads.termtbl_mutex); 3805 3673 } 3806 3674 ··· 3814 3676 *mlx5_mode = MLX5_ESWITCH_LEGACY; 3815 3677 break; 3816 3678 case DEVLINK_ESWITCH_MODE_SWITCHDEV: 3679 + case DEVLINK_ESWITCH_MODE_SWITCHDEV_INACTIVE: 3817 3680 *mlx5_mode = MLX5_ESWITCH_OFFLOADS; 3818 3681 break; 3819 3682 default: ··· 3824 3685 return 0; 3825 3686 } 3826 3687 3827 - static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode) 3688 + static int esw_mode_to_devlink(struct mlx5_eswitch *esw, u16 *mode) 3828 3689 { 3829 - switch (mlx5_mode) { 3690 + switch (esw->mode) { 3830 3691 case MLX5_ESWITCH_LEGACY: 3831 3692 *mode = DEVLINK_ESWITCH_MODE_LEGACY; 3832 3693 break; 3833 3694 case MLX5_ESWITCH_OFFLOADS: 3834 - *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; 3695 + if (esw->offloads_inactive) 3696 + *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV_INACTIVE; 3697 + else 3698 + *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; 3835 3699 break; 3836 3700 default: 3837 3701 return -EINVAL; ··· 3940 3798 return ret; 3941 3799 } 3942 3800 3801 + /* Returns true when only changing between active and inactive switchdev mode */ 3802 + static bool mlx5_devlink_switchdev_active_mode_change(struct mlx5_eswitch *esw, 3803 + u16 devlink_mode) 3804 + { 3805 + /* current mode is not switchdev */ 3806 + if (esw->mode != MLX5_ESWITCH_OFFLOADS) 3807 + return false; 3808 + 3809 + /* new mode is not switchdev */ 3810 + if (devlink_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV && 3811 + devlink_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV_INACTIVE) 3812 + return false; 3813 + 3814 + /* already inactive: no change in current state */ 3815 + if (devlink_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV_INACTIVE && 3816 + esw->offloads_inactive) 3817 + return false; 3818 + 3819 + /* already active: no change in current state */ 3820 + if (devlink_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && 3821 + !esw->offloads_inactive) 3822 + return false; 3823 + 3824 + down_write(&esw->mode_lock); 3825 + esw->offloads_inactive = !esw->offloads_inactive; 3826 + esw->eswitch_operation_in_progress = true; 3827 + up_write(&esw->mode_lock); 3828 + 3829 + if (esw->offloads_inactive) 3830 + mlx5_esw_fdb_inactive(esw); 3831 + else 3832 + mlx5_esw_fdb_active(esw); 3833 + 3834 + down_write(&esw->mode_lock); 3835 + esw->eswitch_operation_in_progress = false; 3836 + up_write(&esw->mode_lock); 3837 + return true; 3838 + } 3839 + 3943 3840 int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, 3944 3841 struct netlink_ext_ack *extack) 3945 3842 { ··· 3993 3812 if (esw_mode_from_devlink(mode, &mlx5_mode)) 3994 3813 return -EINVAL; 3995 3814 3996 - if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && mlx5_get_sd(esw->dev)) { 3815 + if (mlx5_mode == MLX5_ESWITCH_OFFLOADS && mlx5_get_sd(esw->dev)) { 3997 3816 NL_SET_ERR_MSG_MOD(extack, 3998 3817 "Can't change E-Switch mode to switchdev when multi-PF netdev (Socket Direct) is configured."); 3999 3818 return -EPERM; 4000 3819 } 3820 + 3821 + /* Avoid try_lock, active/inactive mode change is not restricted */ 3822 + if (mlx5_devlink_switchdev_active_mode_change(esw, mode)) 3823 + return 0; 4001 3824 4002 3825 mlx5_lag_disable_change(esw->dev); 4003 3826 err = mlx5_esw_try_lock(esw); ··· 4025 3840 esw->eswitch_operation_in_progress = true; 4026 3841 up_write(&esw->mode_lock); 4027 3842 4028 - if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && 3843 + if (mlx5_mode == MLX5_ESWITCH_OFFLOADS && 4029 3844 !mlx5_devlink_netdev_netns_immutable_set(devlink, true)) { 4030 3845 NL_SET_ERR_MSG_MOD(extack, 4031 3846 "Can't change E-Switch mode to switchdev when netdev net namespace has diverged from the devlink's."); ··· 4033 3848 goto skip; 4034 3849 } 4035 3850 4036 - if (mode == DEVLINK_ESWITCH_MODE_LEGACY) 3851 + if (mlx5_mode == MLX5_ESWITCH_LEGACY) 4037 3852 esw->dev->priv.flags |= MLX5_PRIV_FLAGS_SWITCH_LEGACY; 4038 3853 mlx5_eswitch_disable_locked(esw); 4039 - if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) { 3854 + if (mlx5_mode == MLX5_ESWITCH_OFFLOADS) { 4040 3855 if (mlx5_devlink_trap_get_num_active(esw->dev)) { 4041 3856 NL_SET_ERR_MSG_MOD(extack, 4042 3857 "Can't change mode while devlink traps are active"); 4043 3858 err = -EOPNOTSUPP; 4044 3859 goto skip; 4045 3860 } 3861 + esw->offloads_inactive = 3862 + (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV_INACTIVE); 4046 3863 err = esw_offloads_start(esw, extack); 4047 - } else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) { 3864 + } else if (mlx5_mode == MLX5_ESWITCH_LEGACY) { 4048 3865 err = esw_offloads_stop(esw, extack); 4049 3866 } else { 4050 3867 err = -EINVAL; 4051 3868 } 4052 3869 4053 3870 skip: 4054 - if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && err) 3871 + if (mlx5_mode == MLX5_ESWITCH_OFFLOADS && err) 4055 3872 mlx5_devlink_netdev_netns_immutable_set(devlink, false); 4056 3873 down_write(&esw->mode_lock); 4057 3874 esw->eswitch_operation_in_progress = false; ··· 4072 3885 if (IS_ERR(esw)) 4073 3886 return PTR_ERR(esw); 4074 3887 4075 - return esw_mode_to_devlink(esw->mode, mode); 3888 + return esw_mode_to_devlink(esw, mode); 4076 3889 } 4077 3890 4078 3891 static int mlx5_esw_vports_inline_set(struct mlx5_eswitch *esw, u8 mlx5_mode,
+5
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
··· 3520 3520 if (!steering->fdb_root_ns) 3521 3521 return -ENOMEM; 3522 3522 3523 + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_DROP_ROOT, 1); 3524 + err = PTR_ERR_OR_ZERO(maj_prio); 3525 + if (err) 3526 + goto out_err; 3527 + 3523 3528 err = create_fdb_bypass(steering); 3524 3529 if (err) 3525 3530 goto out_err;
+1 -1
drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
··· 167 167 if (err) 168 168 goto free_l2table_index; 169 169 mlx5_core_dbg(dev, "MPFS entry %pM, set @index (%d)\n", 170 - l2addr->node.addr, l2addr->index); 170 + l2addr->node.addr, index); 171 171 } 172 172 173 173 l2addr->index = index;
+1
include/linux/mlx5/fs.h
··· 116 116 }; 117 117 118 118 enum { 119 + FDB_DROP_ROOT, 119 120 FDB_BYPASS_PATH, 120 121 FDB_CRYPTO_INGRESS, 121 122 FDB_TC_OFFLOAD,