Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'symmetric-or-xor-rss-hash'

Gal Pressman says:

====================
Symmetric OR-XOR RSS hash

Add support for a new type of input_xfrm: Symmetric OR-XOR.
Symmetric OR-XOR performs hash as follows:
(SRC_IP | DST_IP, SRC_IP ^ DST_IP, SRC_PORT | DST_PORT, SRC_PORT ^ DST_PORT)

Configuration is done through ethtool -x/X command.
For mlx5, the default is already symmetric hash, this patch now exposes
this to userspace and allows enabling/disabling of the feature.

v5: https://lore.kernel.org/20250220113435.417487-1-gal@nvidia.com
v4: https://lore.kernel.org/20250216182453.226325-1-gal@nvidia.com
v3: https://lore.kernel.org/20250205135341.542720-1-gal@nvidia.com
v2: https://lore.kernel.org/20250203150039.519301-1-gal@nvidia.com
====================

Link: https://patch.msgid.link/20250224174416.499070-1-gal@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+155 -41
+1 -1
Documentation/networking/ethtool-netlink.rst
··· 1934 1934 indicates queue number. 1935 1935 ETHTOOL_A_RSS_INPUT_XFRM attribute is a bitmap indicating the type of 1936 1936 transformation applied to the input protocol fields before given to the RSS 1937 - hfunc. Current supported option is symmetric-xor. 1937 + hfunc. Current supported options are symmetric-xor and symmetric-or-xor. 1938 1938 1939 1939 PLCA_GET_CFG 1940 1940 ============
+11 -4
Documentation/networking/scaling.rst
··· 49 49 are swapped, the computed hash is the same. This is beneficial in some 50 50 applications that monitor TCP/IP flows (IDS, firewalls, ...etc) and need 51 51 both directions of the flow to land on the same Rx queue (and CPU). The 52 - "Symmetric-XOR" is a type of RSS algorithms that achieves this hash 53 - symmetry by XORing the input source and destination fields of the IP 54 - and/or L4 protocols. This, however, results in reduced input entropy and 55 - could potentially be exploited. Specifically, the algorithm XORs the input 52 + "Symmetric-XOR" and "Symmetric-OR-XOR" are types of RSS algorithms that 53 + achieve this hash symmetry by XOR/ORing the input source and destination 54 + fields of the IP and/or L4 protocols. This, however, results in reduced 55 + input entropy and could potentially be exploited. 56 + 57 + Specifically, the "Symmetric-XOR" algorithm XORs the input 56 58 as follows:: 57 59 58 60 # (SRC_IP ^ DST_IP, SRC_IP ^ DST_IP, SRC_PORT ^ DST_PORT, SRC_PORT ^ DST_PORT) 61 + 62 + The "Symmetric-OR-XOR" algorithm, on the other hand, transforms the input as 63 + follows:: 64 + 65 + # (SRC_IP | DST_IP, SRC_IP ^ DST_IP, SRC_PORT | DST_PORT, SRC_PORT ^ DST_PORT) 59 66 60 67 The result is then fed to the underlying RSS algorithm. 61 68
+1 -1
drivers/net/ethernet/intel/iavf/iavf_ethtool.c
··· 1808 1808 static const struct ethtool_ops iavf_ethtool_ops = { 1809 1809 .supported_coalesce_params = ETHTOOL_COALESCE_USECS | 1810 1810 ETHTOOL_COALESCE_USE_ADAPTIVE, 1811 - .cap_rss_sym_xor_supported = true, 1811 + .supported_input_xfrm = RXH_XFRM_SYM_XOR, 1812 1812 .get_drvinfo = iavf_get_drvinfo, 1813 1813 .get_link = ethtool_op_get_link, 1814 1814 .get_ringparam = iavf_get_ringparam,
+1 -1
drivers/net/ethernet/intel/ice/ice_ethtool.c
··· 4770 4770 .supported_coalesce_params = ETHTOOL_COALESCE_USECS | 4771 4771 ETHTOOL_COALESCE_USE_ADAPTIVE | 4772 4772 ETHTOOL_COALESCE_RX_USECS_HIGH, 4773 - .cap_rss_sym_xor_supported = true, 4773 + .supported_input_xfrm = RXH_XFRM_SYM_XOR, 4774 4774 .rxfh_per_ctx_key = true, 4775 4775 .get_link_ksettings = ice_get_link_ksettings, 4776 4776 .set_link_ksettings = ice_set_link_ksettings,
+11 -2
drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
··· 161 161 { 162 162 enum mlx5_traffic_types tt; 163 163 164 + rss->hash.symmetric = true; 164 165 rss->hash.hfunc = ETH_RSS_HASH_TOP; 165 166 netdev_rss_key_fill(rss->hash.toeplitz_hash_key, 166 167 sizeof(rss->hash.toeplitz_hash_key)); ··· 567 566 return final_err; 568 567 } 569 568 570 - int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc) 569 + int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc, bool *symmetric) 571 570 { 572 571 if (indir) 573 572 memcpy(indir, rss->indir.table, ··· 580 579 if (hfunc) 581 580 *hfunc = rss->hash.hfunc; 582 581 582 + if (symmetric) 583 + *symmetric = rss->hash.symmetric; 584 + 583 585 return 0; 584 586 } 585 587 586 588 int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir, 587 - const u8 *key, const u8 *hfunc, 589 + const u8 *key, const u8 *hfunc, const bool *symmetric, 588 590 u32 *rqns, u32 *vhca_ids, unsigned int num_rqns) 589 591 { 590 592 bool changed_indir = false; ··· 625 621 626 622 memcpy(rss->indir.table, indir, 627 623 rss->indir.actual_table_size * sizeof(*rss->indir.table)); 624 + } 625 + 626 + if (symmetric) { 627 + rss->hash.symmetric = *symmetric; 628 + changed_hash = true; 628 629 } 629 630 630 631 if (changed_indir && rss->enabled) {
+2 -2
drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
··· 47 47 48 48 int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss, 49 49 struct mlx5e_packet_merge_param *pkt_merge_param); 50 - int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc); 50 + int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc, bool *symmetric); 51 51 int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir, 52 - const u8 *key, const u8 *hfunc, 52 + const u8 *key, const u8 *hfunc, const bool *symmetric, 53 53 u32 *rqns, u32 *vhca_ids, unsigned int num_rqns); 54 54 struct mlx5e_rss_params_hash mlx5e_rss_get_hash(struct mlx5e_rss *rss); 55 55 u8 mlx5e_rss_get_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt);
+6 -5
drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
··· 194 194 } 195 195 196 196 int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, 197 - u32 *indir, u8 *key, u8 *hfunc) 197 + u32 *indir, u8 *key, u8 *hfunc, bool *symmetric) 198 198 { 199 199 struct mlx5e_rss *rss; 200 200 ··· 205 205 if (!rss) 206 206 return -ENOENT; 207 207 208 - return mlx5e_rss_get_rxfh(rss, indir, key, hfunc); 208 + return mlx5e_rss_get_rxfh(rss, indir, key, hfunc, symmetric); 209 209 } 210 210 211 211 int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, 212 - const u32 *indir, const u8 *key, const u8 *hfunc) 212 + const u32 *indir, const u8 *key, const u8 *hfunc, 213 + const bool *symmetric) 213 214 { 214 215 u32 *vhca_ids = get_vhca_ids(res, 0); 215 216 struct mlx5e_rss *rss; ··· 222 221 if (!rss) 223 222 return -ENOENT; 224 223 225 - return mlx5e_rss_set_rxfh(rss, indir, key, hfunc, res->rss_rqns, vhca_ids, 226 - res->rss_nch); 224 + return mlx5e_rss_set_rxfh(rss, indir, key, hfunc, symmetric, 225 + res->rss_rqns, vhca_ids, res->rss_nch); 227 226 } 228 227 229 228 int mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, u32 rss_idx,
+3 -2
drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
··· 49 49 /* Configuration API */ 50 50 void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch); 51 51 int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, 52 - u32 *indir, u8 *key, u8 *hfunc); 52 + u32 *indir, u8 *key, u8 *hfunc, bool *symmetric); 53 53 int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, 54 - const u32 *indir, const u8 *key, const u8 *hfunc); 54 + const u32 *indir, const u8 *key, const u8 *hfunc, 55 + const bool *symmetric); 55 56 56 57 int mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, u32 rss_idx, 57 58 enum mlx5_traffic_types tt);
+1 -1
drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
··· 124 124 const size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key); 125 125 void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); 126 126 127 - MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); 127 + MLX5_SET(tirc, tirc, rx_hash_symmetric, rss_hash->symmetric); 128 128 memcpy(rss_key, rss_hash->toeplitz_hash_key, len); 129 129 } 130 130
+1
drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
··· 9 9 struct mlx5e_rss_params_hash { 10 10 u8 hfunc; 11 11 u8 toeplitz_hash_key[40]; 12 + bool symmetric; 12 13 }; 13 14 14 15 struct mlx5e_rss_params_traffic_type {
+14 -3
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
··· 1456 1456 { 1457 1457 struct mlx5e_priv *priv = netdev_priv(netdev); 1458 1458 u32 rss_context = rxfh->rss_context; 1459 + bool symmetric; 1459 1460 int err; 1460 1461 1461 1462 mutex_lock(&priv->state_lock); 1462 1463 err = mlx5e_rx_res_rss_get_rxfh(priv->rx_res, rss_context, 1463 - rxfh->indir, rxfh->key, &rxfh->hfunc); 1464 + rxfh->indir, rxfh->key, &rxfh->hfunc, &symmetric); 1464 1465 mutex_unlock(&priv->state_lock); 1465 - return err; 1466 + 1467 + if (err) 1468 + return err; 1469 + 1470 + if (symmetric) 1471 + rxfh->input_xfrm = RXH_XFRM_SYM_OR_XOR; 1472 + 1473 + return 0; 1466 1474 } 1467 1475 1468 1476 static int mlx5e_set_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh, 1469 1477 struct netlink_ext_ack *extack) 1470 1478 { 1479 + bool symmetric = rxfh->input_xfrm == RXH_XFRM_SYM_OR_XOR; 1471 1480 struct mlx5e_priv *priv = netdev_priv(dev); 1472 1481 u32 *rss_context = &rxfh->rss_context; 1473 1482 u8 hfunc = rxfh->hfunc; ··· 1511 1502 1512 1503 err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, *rss_context, 1513 1504 rxfh->indir, rxfh->key, 1514 - hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc); 1505 + hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc, 1506 + rxfh->input_xfrm == RXH_XFRM_NO_CHANGE ? NULL : &symmetric); 1515 1507 1516 1508 unlock: 1517 1509 mutex_unlock(&priv->state_lock); ··· 2621 2611 ETHTOOL_COALESCE_MAX_FRAMES | 2622 2612 ETHTOOL_COALESCE_USE_ADAPTIVE | 2623 2613 ETHTOOL_COALESCE_USE_CQE, 2614 + .supported_input_xfrm = RXH_XFRM_SYM_OR_XOR, 2624 2615 .get_drvinfo = mlx5e_get_drvinfo, 2625 2616 .get_link = ethtool_op_get_link, 2626 2617 .get_link_ext_state = mlx5e_get_link_ext_state,
+2 -3
include/linux/ethtool.h
··· 763 763 764 764 /** 765 765 * struct ethtool_ops - optional netdev operations 766 + * @supported_input_xfrm: supported types of input xfrm from %RXH_XFRM_*. 766 767 * @cap_link_lanes_supported: indicates if the driver supports lanes 767 768 * parameter. 768 769 * @cap_rss_ctx_supported: indicates if the driver supports RSS 769 770 * contexts via legacy API, drivers implementing @create_rxfh_context 770 771 * do not have to set this bit. 771 - * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor 772 - * RSS. 773 772 * @rxfh_per_ctx_key: device supports setting different RSS key for each 774 773 * additional context. Netlink API should report hfunc, key, and input_xfrm 775 774 * for every context, not just context 0. ··· 994 995 * of the generic netdev features interface. 995 996 */ 996 997 struct ethtool_ops { 998 + u32 supported_input_xfrm:8; 997 999 u32 cap_link_lanes_supported:1; 998 1000 u32 cap_rss_ctx_supported:1; 999 - u32 cap_rss_sym_xor_supported:1; 1000 1001 u32 rxfh_per_ctx_key:1; 1001 1002 u32 cap_rss_rxnfc_adds:1; 1002 1003 u32 rxfh_indir_space;
+4
include/uapi/linux/ethtool.h
··· 2289 2289 * be exploited to reduce the RSS queue spread. 2290 2290 */ 2291 2291 #define RXH_XFRM_SYM_XOR (1 << 0) 2292 + /* Similar to SYM_XOR, except that one copy of the XOR'ed fields is replaced by 2293 + * an OR of the same fields 2294 + */ 2295 + #define RXH_XFRM_SYM_OR_XOR (1 << 1) 2292 2296 #define RXH_XFRM_NO_CHANGE 0xff 2293 2297 2294 2298 /* L2-L4 network traffic flow types */
+4 -4
net/ethtool/ioctl.c
··· 1011 1011 if (rc) 1012 1012 return rc; 1013 1013 1014 - /* Sanity check: if symmetric-xor is set, then: 1014 + /* Sanity check: if symmetric-xor/symmetric-or-xor is set, then: 1015 1015 * 1 - no other fields besides IP src/dst and/or L4 src/dst 1016 1016 * 2 - If src is set, dst must also be set 1017 1017 */ 1018 - if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) && 1018 + if ((rxfh.input_xfrm & (RXH_XFRM_SYM_XOR | RXH_XFRM_SYM_OR_XOR)) && 1019 1019 ((info.data & ~(RXH_IP_SRC | RXH_IP_DST | 1020 1020 RXH_L4_B_0_1 | RXH_L4_B_2_3)) || 1021 1021 (!!(info.data & RXH_IP_SRC) ^ !!(info.data & RXH_IP_DST)) || ··· 1388 1388 return -EOPNOTSUPP; 1389 1389 /* Check input data transformation capabilities */ 1390 1390 if (rxfh.input_xfrm && rxfh.input_xfrm != RXH_XFRM_SYM_XOR && 1391 + rxfh.input_xfrm != RXH_XFRM_SYM_OR_XOR && 1391 1392 rxfh.input_xfrm != RXH_XFRM_NO_CHANGE) 1392 1393 return -EINVAL; 1393 1394 if (rxfh.input_xfrm != RXH_XFRM_NO_CHANGE && 1394 - (rxfh.input_xfrm & RXH_XFRM_SYM_XOR) && 1395 - !ops->cap_rss_sym_xor_supported) 1395 + rxfh.input_xfrm & ~ops->supported_input_xfrm) 1396 1396 return -EOPNOTSUPP; 1397 1397 create = rxfh.rss_context == ETH_RXFH_CONTEXT_ALLOC; 1398 1398
+1
tools/testing/selftests/drivers/net/hw/Makefile
··· 15 15 nic_performance.py \ 16 16 pp_alloc_fail.py \ 17 17 rss_ctx.py \ 18 + rss_input_xfrm.py \ 18 19 tso.py \ 19 20 # 20 21
+87
tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
··· 1 + #!/usr/bin/env python3 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + import multiprocessing 5 + import socket 6 + from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, cmd, fd_read_timeout 7 + from lib.py import NetDrvEpEnv 8 + from lib.py import EthtoolFamily, NetdevFamily 9 + from lib.py import KsftSkipEx, KsftFailEx 10 + from lib.py import rand_port 11 + 12 + 13 + def traffic(cfg, local_port, remote_port, ipver): 14 + af_inet = socket.AF_INET if ipver == "4" else socket.AF_INET6 15 + sock = socket.socket(af_inet, socket.SOCK_DGRAM) 16 + sock.bind(("", local_port)) 17 + sock.connect((cfg.remote_addr_v[ipver], remote_port)) 18 + tgt = f"{ipver}:[{cfg.addr_v[ipver]}]:{local_port},sourceport={remote_port}" 19 + cmd("echo a | socat - UDP" + tgt, host=cfg.remote) 20 + fd_read_timeout(sock.fileno(), 5) 21 + return sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU) 22 + 23 + 24 + def test_rss_input_xfrm(cfg, ipver): 25 + """ 26 + Test symmetric input_xfrm. 27 + If symmetric RSS hash is configured, send traffic twice, swapping the 28 + src/dst UDP ports, and verify that the same queue is receiving the traffic 29 + in both cases (IPs are constant). 30 + """ 31 + 32 + if multiprocessing.cpu_count() < 2: 33 + raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash") 34 + 35 + input_xfrm = cfg.ethnl.rss_get( 36 + {'header': {'dev-name': cfg.ifname}}).get('input_xfrm') 37 + 38 + # Check for symmetric xor/or-xor 39 + if not input_xfrm or (input_xfrm != 1 and input_xfrm != 2): 40 + raise KsftSkipEx("Symmetric RSS hash not requested") 41 + 42 + cpus = set() 43 + successful = 0 44 + for _ in range(100): 45 + try: 46 + port1 = rand_port(socket.SOCK_DGRAM) 47 + port2 = rand_port(socket.SOCK_DGRAM) 48 + cpu1 = traffic(cfg, port1, port2, ipver) 49 + cpu2 = traffic(cfg, port2, port1, ipver) 50 + cpus.update([cpu1, cpu2]) 51 + ksft_eq( 52 + cpu1, cpu2, comment=f"Received traffic on different cpus with ports ({port1 = }, {port2 = }) while symmetric hash is configured") 53 + 54 + successful += 1 55 + if successful == 10: 56 + break 57 + except: 58 + continue 59 + else: 60 + raise KsftFailEx("Failed to run traffic") 61 + 62 + ksft_ge(len(cpus), 2, 63 + comment=f"Received traffic on less than two cpus {cpus = }") 64 + 65 + 66 + def test_rss_input_xfrm_ipv4(cfg): 67 + cfg.require_ipver("4") 68 + test_rss_input_xfrm(cfg, "4") 69 + 70 + 71 + def test_rss_input_xfrm_ipv6(cfg): 72 + cfg.require_ipver("6") 73 + test_rss_input_xfrm(cfg, "6") 74 + 75 + 76 + def main() -> None: 77 + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: 78 + cfg.ethnl = EthtoolFamily() 79 + cfg.netdevnl = NetdevFamily() 80 + 81 + ksft_run([test_rss_input_xfrm_ipv4, test_rss_input_xfrm_ipv6], 82 + args=(cfg, )) 83 + ksft_exit() 84 + 85 + 86 + if __name__ == "__main__": 87 + main()
+5 -12
tools/testing/selftests/net/lib/py/utils.py
··· 185 185 return tool('ethtool', args, json=json, ns=ns, host=host) 186 186 187 187 188 - def rand_port(): 188 + def rand_port(type=socket.SOCK_STREAM): 189 189 """ 190 - Get a random unprivileged port, try to make sure it's not already used. 190 + Get a random unprivileged port. 191 191 """ 192 - for _ in range(1000): 193 - port = random.randint(10000, 65535) 194 - try: 195 - with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: 196 - s.bind(("", port)) 197 - return port 198 - except OSError as e: 199 - if e.errno != errno.EADDRINUSE: 200 - raise 201 - raise Exception("Can't find any free unprivileged port") 192 + with socket.socket(socket.AF_INET6, type) as s: 193 + s.bind(("", 0)) 194 + return s.getsockname()[1] 202 195 203 196 204 197 def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadline=5):