Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'mitigate-double-allocations-in-ioam6_iptunnel'

Justin Iurman says:

====================
Mitigate double allocations in ioam6_iptunnel

Commit dce525185bc9 ("net: ipv6: ioam6_iptunnel: mitigate 2-realloc
issue") fixed the double allocation issue in ioam6_iptunnel. However,
since commit 92191dd10730 ("net: ipv6: fix dst ref loops in rpl, seg6
and ioam6 lwtunnels"), the fix was left incomplete. Because the cache is
now empty when the dst_entry is the same post transformation in order to
avoid a reference loop, the double reallocation is back for such cases
(e.g., inline mode) which are valid for IOAM. This patch provides a way
to detect such cases without having a reference loop in the cache, and
so to avoid the double reallocation issue for all cases again.

v1: https://lore.kernel.org/netdev/20250410152432.30246-1-justin.iurman@uliege.be/T/#t
====================

Link: https://patch.msgid.link/20250415112554.23823-1-justin.iurman@uliege.be
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

+53 -21
+53 -21
net/ipv6/ioam6_iptunnel.c
··· 38 38 }; 39 39 40 40 struct ioam6_lwt { 41 + struct dst_entry null_dst; 41 42 struct dst_cache cache; 42 43 struct ioam6_lwt_freq freq; 43 44 atomic_t pkt_cnt; ··· 177 176 err = dst_cache_init(&ilwt->cache, GFP_ATOMIC); 178 177 if (err) 179 178 goto free_lwt; 179 + 180 + /* This "fake" dst_entry will be stored in a dst_cache, which will call 181 + * dst_hold() and dst_release() on it. We must ensure that dst_destroy() 182 + * will never be called. For that, its initial refcount is 1 and +1 when 183 + * it is stored in the cache. Then, +1/-1 each time we read the cache 184 + * and release it. Long story short, we're fine. 185 + */ 186 + dst_init(&ilwt->null_dst, NULL, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); 180 187 181 188 atomic_set(&ilwt->pkt_cnt, 0); 182 189 ilwt->freq.k = freq_k; ··· 345 336 346 337 static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 347 338 { 348 - struct dst_entry *dst = skb_dst(skb), *cache_dst = NULL; 339 + struct dst_entry *orig_dst = skb_dst(skb); 340 + struct dst_entry *dst = NULL; 349 341 struct ioam6_lwt *ilwt; 350 342 int err = -EINVAL; 351 343 u32 pkt_cnt; ··· 354 344 if (skb->protocol != htons(ETH_P_IPV6)) 355 345 goto drop; 356 346 357 - ilwt = ioam6_lwt_state(dst->lwtstate); 347 + ilwt = ioam6_lwt_state(orig_dst->lwtstate); 358 348 359 349 /* Check for insertion frequency (i.e., "k over n" insertions) */ 360 350 pkt_cnt = atomic_fetch_inc(&ilwt->pkt_cnt); ··· 362 352 goto out; 363 353 364 354 local_bh_disable(); 365 - cache_dst = dst_cache_get(&ilwt->cache); 355 + dst = dst_cache_get(&ilwt->cache); 366 356 local_bh_enable(); 357 + 358 + /* This is how we notify that the destination does not change after 359 + * transformation and that we need to use orig_dst instead of the cache 360 + */ 361 + if (dst == &ilwt->null_dst) { 362 + dst_release(dst); 363 + 364 + dst = orig_dst; 365 + /* keep refcount balance: dst_release() is called at the end */ 366 + dst_hold(dst); 367 + } 367 368 368 369 switch (ilwt->mode) { 369 370 case IOAM6_IPTUNNEL_MODE_INLINE: ··· 383 362 if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) 384 363 goto out; 385 364 386 - err = ioam6_do_inline(net, skb, &ilwt->tuninfo, cache_dst); 365 + err = ioam6_do_inline(net, skb, &ilwt->tuninfo, dst); 387 366 if (unlikely(err)) 388 367 goto drop; 389 368 ··· 393 372 /* Encapsulation (ip6ip6) */ 394 373 err = ioam6_do_encap(net, skb, &ilwt->tuninfo, 395 374 ilwt->has_tunsrc, &ilwt->tunsrc, 396 - &ilwt->tundst, cache_dst); 375 + &ilwt->tundst, dst); 397 376 if (unlikely(err)) 398 377 goto drop; 399 378 ··· 411 390 goto drop; 412 391 } 413 392 414 - if (unlikely(!cache_dst)) { 393 + if (unlikely(!dst)) { 415 394 struct ipv6hdr *hdr = ipv6_hdr(skb); 416 395 struct flowi6 fl6; 417 396 ··· 422 401 fl6.flowi6_mark = skb->mark; 423 402 fl6.flowi6_proto = hdr->nexthdr; 424 403 425 - cache_dst = ip6_route_output(net, NULL, &fl6); 426 - if (cache_dst->error) { 427 - err = cache_dst->error; 404 + dst = ip6_route_output(net, NULL, &fl6); 405 + if (dst->error) { 406 + err = dst->error; 428 407 goto drop; 429 408 } 430 409 431 - /* cache only if we don't create a dst reference loop */ 432 - if (dst->lwtstate != cache_dst->lwtstate) { 433 - local_bh_disable(); 434 - dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr); 435 - local_bh_enable(); 436 - } 410 + /* If the destination is the same after transformation (which is 411 + * a valid use case for IOAM), then we don't want to add it to 412 + * the cache in order to avoid a reference loop. Instead, we add 413 + * our fake dst_entry to the cache as a way to detect this case. 414 + * Otherwise, we add the resolved destination to the cache. 415 + */ 416 + local_bh_disable(); 417 + if (orig_dst->lwtstate == dst->lwtstate) 418 + dst_cache_set_ip6(&ilwt->cache, 419 + &ilwt->null_dst, &fl6.saddr); 420 + else 421 + dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr); 422 + local_bh_enable(); 437 423 438 - err = skb_cow_head(skb, LL_RESERVED_SPACE(cache_dst->dev)); 424 + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); 439 425 if (unlikely(err)) 440 426 goto drop; 441 427 } ··· 450 422 /* avoid lwtunnel_output() reentry loop when destination is the same 451 423 * after transformation (e.g., with the inline mode) 452 424 */ 453 - if (dst->lwtstate != cache_dst->lwtstate) { 425 + if (orig_dst->lwtstate != dst->lwtstate) { 454 426 skb_dst_drop(skb); 455 - skb_dst_set(skb, cache_dst); 427 + skb_dst_set(skb, dst); 456 428 return dst_output(net, sk, skb); 457 429 } 458 430 out: 459 - dst_release(cache_dst); 460 - return dst->lwtstate->orig_output(net, sk, skb); 431 + dst_release(dst); 432 + return orig_dst->lwtstate->orig_output(net, sk, skb); 461 433 drop: 462 - dst_release(cache_dst); 434 + dst_release(dst); 463 435 kfree_skb(skb); 464 436 return err; 465 437 } 466 438 467 439 static void ioam6_destroy_state(struct lwtunnel_state *lwt) 468 440 { 441 + /* Since the refcount of per-cpu dst_entry caches will never be 0 (see 442 + * why above) when our "fake" dst_entry is used, it is not necessary to 443 + * remove them before calling dst_cache_destroy() 444 + */ 469 445 dst_cache_destroy(&ioam6_lwt_state(lwt)->cache); 470 446 } 471 447