Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

neighbour: Create netdev->neighbour association

Create a mapping between a netdev and its neighoburs,
allowing for much cheaper flushes.

Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20241107160444.2913124-7-gnaaman@drivenets.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Gilad Naaman and committed by
Jakub Kicinski
f7f52738 a01a67ab

+80 -45
+1
Documentation/networking/net_cachelines/net_device.rst
··· 188 188 struct_napi_config* napi_config 189 189 unsigned_long gro_flush_timeout 190 190 u32 napi_defer_hard_irqs 191 + struct hlist_head neighbours[2] 191 192 =================================== =========================== =================== =================== ===================================================================================
+7
include/linux/netdevice.h
··· 52 52 #include <net/net_trackers.h> 53 53 #include <net/net_debug.h> 54 54 #include <net/dropreason-core.h> 55 + #include <net/neighbour_tables.h> 55 56 56 57 struct netpoll_info; 57 58 struct device; ··· 2033 2032 * @napi_defer_hard_irqs: If not zero, provides a counter that would 2034 2033 * allow to avoid NIC hard IRQ, on busy queues. 2035 2034 * 2035 + * @neighbours: List heads pointing to this device's neighbours' 2036 + * dev_list, one per address-family. 2037 + * 2036 2038 * FIXME: cleanup struct net_device such that network protocol info 2037 2039 * moves out. 2038 2040 */ ··· 2444 2440 */ 2445 2441 struct net_shaper_hierarchy *net_shaper_hierarchy; 2446 2442 #endif 2443 + 2444 + struct hlist_head neighbours[NEIGH_NR_TABLES]; 2445 + 2447 2446 u8 priv[] ____cacheline_aligned 2448 2447 __counted_by(priv_len); 2449 2448 } ____cacheline_aligned;
+2 -7
include/net/neighbour.h
··· 29 29 #include <linux/sysctl.h> 30 30 #include <linux/workqueue.h> 31 31 #include <net/rtnetlink.h> 32 + #include <net/neighbour_tables.h> 32 33 33 34 /* 34 35 * NUD stands for "neighbor unreachability detection" ··· 137 136 138 137 struct neighbour { 139 138 struct hlist_node hash; 139 + struct hlist_node dev_list; 140 140 struct neigh_table *tbl; 141 141 struct neigh_parms *parms; 142 142 unsigned long confirmed; ··· 236 234 struct neigh_statistics __percpu *stats; 237 235 struct neigh_hash_table __rcu *nht; 238 236 struct pneigh_entry **phash_buckets; 239 - }; 240 - 241 - enum { 242 - NEIGH_ARP_TABLE = 0, 243 - NEIGH_ND_TABLE = 1, 244 - NEIGH_NR_TABLES, 245 - NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */ 246 237 }; 247 238 248 239 static inline int neigh_parms_family(struct neigh_parms *p)
+12
include/net/neighbour_tables.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _NET_NEIGHBOUR_TABLES_H 3 + #define _NET_NEIGHBOUR_TABLES_H 4 + 5 + enum { 6 + NEIGH_ARP_TABLE = 0, 7 + NEIGH_ND_TABLE = 1, 8 + NEIGH_NR_TABLES, 9 + NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */ 10 + }; 11 + 12 + #endif
+58 -38
net/core/neighbour.c
··· 60 60 static const struct seq_operations neigh_stat_seq_ops; 61 61 #endif 62 62 63 + static struct hlist_head *neigh_get_dev_table(struct net_device *dev, int family) 64 + { 65 + int i; 66 + 67 + switch (family) { 68 + default: 69 + DEBUG_NET_WARN_ON_ONCE(1); 70 + fallthrough; /* to avoid panic by null-ptr-deref */ 71 + case AF_INET: 72 + i = NEIGH_ARP_TABLE; 73 + break; 74 + case AF_INET6: 75 + i = NEIGH_ND_TABLE; 76 + break; 77 + } 78 + 79 + return &dev->neighbours[i]; 80 + } 81 + 63 82 /* 64 83 Neighbour hash table buckets are protected with rwlock tbl->lock. 65 84 ··· 230 211 write_lock(&n->lock); 231 212 if (refcount_read(&n->refcnt) == 1) { 232 213 hlist_del_rcu(&n->hash); 214 + hlist_del_rcu(&n->dev_list); 233 215 neigh_mark_dead(n); 234 216 retval = true; 235 217 } ··· 371 351 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, 372 352 bool skip_perm) 373 353 { 374 - int i; 375 - struct neigh_hash_table *nht; 354 + struct hlist_head *dev_head; 355 + struct hlist_node *tmp; 356 + struct neighbour *n; 376 357 377 - nht = rcu_dereference_protected(tbl->nht, 378 - lockdep_is_held(&tbl->lock)); 358 + dev_head = neigh_get_dev_table(dev, tbl->family); 379 359 380 - for (i = 0; i < (1 << nht->hash_shift); i++) { 381 - struct hlist_node *tmp; 382 - struct neighbour *n; 360 + hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) { 361 + if (skip_perm && n->nud_state & NUD_PERMANENT) 362 + continue; 383 363 384 - neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) { 385 - if (dev && n->dev != dev) 386 - continue; 387 - if (skip_perm && n->nud_state & NUD_PERMANENT) 388 - continue; 389 - 390 - hlist_del_rcu(&n->hash); 391 - write_lock(&n->lock); 392 - neigh_del_timer(n); 393 - neigh_mark_dead(n); 394 - if (refcount_read(&n->refcnt) != 1) { 395 - /* The most unpleasant situation. 396 - We must destroy neighbour entry, 397 - but someone still uses it. 398 - 399 - The destroy will be delayed until 400 - the last user releases us, but 401 - we must kill timers etc. and move 402 - it to safe state. 403 - */ 404 - __skb_queue_purge(&n->arp_queue); 405 - n->arp_queue_len_bytes = 0; 406 - WRITE_ONCE(n->output, neigh_blackhole); 407 - if (n->nud_state & NUD_VALID) 408 - n->nud_state = NUD_NOARP; 409 - else 410 - n->nud_state = NUD_NONE; 411 - neigh_dbg(2, "neigh %p is stray\n", n); 412 - } 413 - write_unlock(&n->lock); 414 - neigh_cleanup_and_release(n); 364 + hlist_del_rcu(&n->hash); 365 + hlist_del_rcu(&n->dev_list); 366 + write_lock(&n->lock); 367 + neigh_del_timer(n); 368 + neigh_mark_dead(n); 369 + if (refcount_read(&n->refcnt) != 1) { 370 + /* The most unpleasant situation. 371 + * We must destroy neighbour entry, 372 + * but someone still uses it. 373 + * 374 + * The destroy will be delayed until 375 + * the last user releases us, but 376 + * we must kill timers etc. and move 377 + * it to safe state. 378 + */ 379 + __skb_queue_purge(&n->arp_queue); 380 + n->arp_queue_len_bytes = 0; 381 + WRITE_ONCE(n->output, neigh_blackhole); 382 + if (n->nud_state & NUD_VALID) 383 + n->nud_state = NUD_NOARP; 384 + else 385 + n->nud_state = NUD_NONE; 386 + neigh_dbg(2, "neigh %p is stray\n", n); 415 387 } 388 + write_unlock(&n->lock); 389 + neigh_cleanup_and_release(n); 416 390 } 417 391 } 418 392 ··· 669 655 if (want_ref) 670 656 neigh_hold(n); 671 657 hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]); 658 + 659 + hlist_add_head_rcu(&n->dev_list, 660 + neigh_get_dev_table(dev, tbl->family)); 661 + 672 662 write_unlock_bh(&tbl->lock); 673 663 neigh_dbg(2, "neigh %p is created\n", n); 674 664 rc = n; ··· 953 935 !time_in_range_open(jiffies, n->used, 954 936 n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { 955 937 hlist_del_rcu(&n->hash); 938 + hlist_del_rcu(&n->dev_list); 956 939 neigh_mark_dead(n); 957 940 write_unlock(&n->lock); 958 941 neigh_cleanup_and_release(n); ··· 3073 3054 release = cb(n); 3074 3055 if (release) { 3075 3056 hlist_del_rcu(&n->hash); 3057 + hlist_del_rcu(&n->dev_list); 3076 3058 neigh_mark_dead(n); 3077 3059 } 3078 3060 write_unlock(&n->lock);