Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

net: fib: restore ECMP balance from loopback

Preference of nexthop with source address broke ECMP for packets with
source addresses which are not in the broadcast domain, but rather added
to loopback/dummy interfaces. Original behaviour was to balance over
nexthops while now it uses the latest nexthop from the group. To fix the
issue introduce next hop scoring system where next hops with source
address equal to requested will always have higher priority.

For the case with 198.51.100.1/32 assigned to dummy0 and routed using
192.0.2.0/24 and 203.0.113.0/24 networks:

2: dummy0: <BROADCAST,NOARP,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000
link/ether d6:54:8a:ff:78:f5 brd ff:ff:ff:ff:ff:ff
inet 198.51.100.1/32 scope global dummy0
valid_lft forever preferred_lft forever
7: veth1@if6: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
link/ether 06:ed:98:87:6d:8a brd ff:ff:ff:ff:ff:ff link-netnsid 0
inet 192.0.2.2/24 scope global veth1
valid_lft forever preferred_lft forever
inet6 fe80::4ed:98ff:fe87:6d8a/64 scope link proto kernel_ll
valid_lft forever preferred_lft forever
9: veth3@if8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
link/ether ae:75:23:38:a0:d2 brd ff:ff:ff:ff:ff:ff link-netnsid 0
inet 203.0.113.2/24 scope global veth3
valid_lft forever preferred_lft forever
inet6 fe80::ac75:23ff:fe38:a0d2/64 scope link proto kernel_ll
valid_lft forever preferred_lft forever

~ ip ro list:
default
nexthop via 192.0.2.1 dev veth1 weight 1
nexthop via 203.0.113.1 dev veth3 weight 1
192.0.2.0/24 dev veth1 proto kernel scope link src 192.0.2.2
203.0.113.0/24 dev veth3 proto kernel scope link src 203.0.113.2

before:
for i in {1..255} ; do ip ro get 10.0.0.$i; done | grep veth | awk ' {print $(NF-2)}' | sort | uniq -c:
255 veth3

after:
for i in {1..255} ; do ip ro get 10.0.0.$i; done | grep veth | awk ' {print $(NF-2)}' | sort | uniq -c:
122 veth1
133 veth3

Fixes: 32607a332cfe ("ipv4: prefer multipath nexthop that matches source address")
Signed-off-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20251221192639.3911901-1-vadim.fedorenko@linux.dev
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

authored by

Vadim Fedorenko and committed by
Paolo Abeni
6e17474a 44741e9d

+10 -16
+10 -16
net/ipv4/fib_semantics.c
··· 2167 2167 { 2168 2168 struct fib_info *fi = res->fi; 2169 2169 struct net *net = fi->fib_net; 2170 - bool found = false; 2171 2170 bool use_neigh; 2171 + int score = -1; 2172 2172 __be32 saddr; 2173 2173 2174 2174 if (unlikely(res->fi->nh)) { ··· 2180 2180 saddr = fl4 ? fl4->saddr : 0; 2181 2181 2182 2182 change_nexthops(fi) { 2183 - int nh_upper_bound; 2183 + int nh_upper_bound, nh_score = 0; 2184 2184 2185 2185 /* Nexthops without a carrier are assigned an upper bound of 2186 2186 * minus one when "ignore_routes_with_linkdown" is set. ··· 2190 2190 (use_neigh && !fib_good_nh(nexthop_nh))) 2191 2191 continue; 2192 2192 2193 - if (!found) { 2193 + if (saddr && nexthop_nh->nh_saddr == saddr) 2194 + nh_score += 2; 2195 + if (hash <= nh_upper_bound) 2196 + nh_score++; 2197 + if (score < nh_score) { 2194 2198 res->nh_sel = nhsel; 2195 2199 res->nhc = &nexthop_nh->nh_common; 2196 - found = !saddr || nexthop_nh->nh_saddr == saddr; 2200 + if (nh_score == 3 || (!saddr && nh_score == 1)) 2201 + return; 2202 + score = nh_score; 2197 2203 } 2198 - 2199 - if (hash > nh_upper_bound) 2200 - continue; 2201 - 2202 - if (!saddr || nexthop_nh->nh_saddr == saddr) { 2203 - res->nh_sel = nhsel; 2204 - res->nhc = &nexthop_nh->nh_common; 2205 - return; 2206 - } 2207 - 2208 - if (found) 2209 - return; 2210 2204 2211 2205 } endfor_nexthops(fi); 2212 2206 }