Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

net: gro: decouple GRO from the NAPI layer

In fact, these two are not tied closely to each other. The only
requirements to GRO are to use it in the BH context and have some
sane limits on the packet batches, e.g. NAPI has a limit of its
budget (64/8/etc.).
Move purely GRO fields into a new structure, &gro_node. Embed it
into &napi_struct and adjust all the references.
gro_node::cached_napi_id is effectively the same as
napi_struct::napi_id, but to be used on GRO hotpath to mark skbs.
napi_struct::napi_id is now a fully control path field.

Three Ethernet drivers use napi_gro_flush() not really meant to be
exported, so move it to <net/gro.h> and add that include there.
napi_gro_receive() is used in more than 100 drivers, keep it
in <linux/netdevice.h>.
This does not make GRO ready to use outside of the NAPI context
yet.

Tested-by: Daniel Xu <dxu@dxuuu.xyz>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

authored by

Alexander Lobakin and committed by
Paolo Abeni
291515c7 01358e8f

+130 -92
+1
drivers/net/ethernet/brocade/bna/bnad.c
··· 19 19 #include <linux/ip.h> 20 20 #include <linux/prefetch.h> 21 21 #include <linux/module.h> 22 + #include <net/gro.h> 22 23 23 24 #include "bnad.h" 24 25 #include "bna.h"
+1
drivers/net/ethernet/cortina/gemini.c
··· 40 40 #include <linux/in.h> 41 41 #include <linux/ip.h> 42 42 #include <linux/ipv6.h> 43 + #include <net/gro.h> 43 44 44 45 #include "gemini.h" 45 46
+1
drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c
··· 41 41 #include <linux/types.h> 42 42 #include <linux/wait.h> 43 43 #include <linux/workqueue.h> 44 + #include <net/gro.h> 44 45 45 46 #include "t7xx_dpmaif.h" 46 47 #include "t7xx_hif_dpmaif.h"
+28 -9
include/linux/netdevice.h
··· 340 340 }; 341 341 342 342 /* 343 - * size of gro hash buckets, must less than bit number of 344 - * napi_struct::gro_bitmask 343 + * size of gro hash buckets, must be <= the number of bits in 344 + * gro_node::bitmask 345 345 */ 346 346 #define GRO_HASH_BUCKETS 8 347 + 348 + /** 349 + * struct gro_node - structure to support Generic Receive Offload 350 + * @bitmask: bitmask to indicate used buckets in @hash 351 + * @hash: hashtable of pending aggregated skbs, separated by flows 352 + * @rx_list: list of pending ``GRO_NORMAL`` skbs 353 + * @rx_count: cached current length of @rx_list 354 + * @cached_napi_id: napi_struct::napi_id cached for hotpath, 0 for standalone 355 + */ 356 + struct gro_node { 357 + unsigned long bitmask; 358 + struct gro_list hash[GRO_HASH_BUCKETS]; 359 + struct list_head rx_list; 360 + u32 rx_count; 361 + u32 cached_napi_id; 362 + }; 347 363 348 364 /* 349 365 * Structure for per-NAPI config ··· 387 371 unsigned long state; 388 372 int weight; 389 373 u32 defer_hard_irqs_count; 390 - unsigned long gro_bitmask; 391 374 int (*poll)(struct napi_struct *, int); 392 375 #ifdef CONFIG_NETPOLL 393 376 /* CPU actively polling if netpoll is configured */ ··· 395 380 /* CPU on which NAPI has been scheduled for processing */ 396 381 int list_owner; 397 382 struct net_device *dev; 398 - struct gro_list gro_hash[GRO_HASH_BUCKETS]; 399 383 struct sk_buff *skb; 400 - struct list_head rx_list; /* Pending GRO_NORMAL skbs */ 401 - int rx_count; /* length of rx_list */ 402 - unsigned int napi_id; /* protected by netdev_lock */ 384 + struct gro_node gro; 403 385 struct hrtimer timer; 404 386 /* all fields past this point are write-protected by netdev_lock */ 405 387 struct task_struct *thread; ··· 404 392 unsigned long irq_suspend_timeout; 405 393 u32 defer_hard_irqs; 406 394 /* control-path-only fields follow */ 395 + u32 napi_id; 407 396 struct list_head dev_list; 408 397 struct hlist_node napi_hash_node; 409 398 int irq; ··· 4144 4131 int netif_receive_skb_core(struct sk_buff *skb); 4145 4132 void netif_receive_skb_list_internal(struct list_head *head); 4146 4133 void netif_receive_skb_list(struct list_head *head); 4147 - gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); 4148 - void napi_gro_flush(struct napi_struct *napi, bool flush_old); 4134 + gro_result_t gro_receive_skb(struct gro_node *gro, struct sk_buff *skb); 4135 + 4136 + static inline gro_result_t napi_gro_receive(struct napi_struct *napi, 4137 + struct sk_buff *skb) 4138 + { 4139 + return gro_receive_skb(&napi->gro, skb); 4140 + } 4141 + 4149 4142 struct sk_buff *napi_get_frags(struct napi_struct *napi); 4150 4143 gro_result_t napi_gro_frags(struct napi_struct *napi); 4151 4144
+9 -3
include/net/busy_poll.h
··· 127 127 } 128 128 129 129 /* used in the NIC receive handler to mark the skb */ 130 - static inline void skb_mark_napi_id(struct sk_buff *skb, 131 - struct napi_struct *napi) 130 + static inline void __skb_mark_napi_id(struct sk_buff *skb, 131 + const struct gro_node *gro) 132 132 { 133 133 #ifdef CONFIG_NET_RX_BUSY_POLL 134 134 /* If the skb was already marked with a valid NAPI ID, avoid overwriting 135 135 * it. 136 136 */ 137 137 if (!napi_id_valid(skb->napi_id)) 138 - skb->napi_id = napi->napi_id; 138 + skb->napi_id = gro->cached_napi_id; 139 139 #endif 140 + } 141 + 142 + static inline void skb_mark_napi_id(struct sk_buff *skb, 143 + const struct napi_struct *napi) 144 + { 145 + __skb_mark_napi_id(skb, &napi->gro); 140 146 } 141 147 142 148 /* used in the protocol handler to propagate the napi_id to the socket */
+25 -10
include/net/gro.h
··· 509 509 510 510 int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); 511 511 int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb); 512 + void __gro_flush(struct gro_node *gro, bool flush_old); 513 + 514 + static inline void gro_flush(struct gro_node *gro, bool flush_old) 515 + { 516 + if (!gro->bitmask) 517 + return; 518 + 519 + __gro_flush(gro, flush_old); 520 + } 521 + 522 + static inline void napi_gro_flush(struct napi_struct *napi, bool flush_old) 523 + { 524 + gro_flush(&napi->gro, flush_old); 525 + } 512 526 513 527 /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ 514 - static inline void gro_normal_list(struct napi_struct *napi) 528 + static inline void gro_normal_list(struct gro_node *gro) 515 529 { 516 - if (!napi->rx_count) 530 + if (!gro->rx_count) 517 531 return; 518 - netif_receive_skb_list_internal(&napi->rx_list); 519 - INIT_LIST_HEAD(&napi->rx_list); 520 - napi->rx_count = 0; 532 + netif_receive_skb_list_internal(&gro->rx_list); 533 + INIT_LIST_HEAD(&gro->rx_list); 534 + gro->rx_count = 0; 521 535 } 522 536 523 537 /* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, 524 538 * pass the whole batch up to the stack. 525 539 */ 526 - static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs) 540 + static inline void gro_normal_one(struct gro_node *gro, struct sk_buff *skb, 541 + int segs) 527 542 { 528 - list_add_tail(&skb->list, &napi->rx_list); 529 - napi->rx_count += segs; 530 - if (napi->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch)) 531 - gro_normal_list(napi); 543 + list_add_tail(&skb->list, &gro->rx_list); 544 + gro->rx_count += segs; 545 + if (gro->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch)) 546 + gro_normal_list(gro); 532 547 } 533 548 534 549 /* This function is the alternative of 'inet_iif' and 'inet_sdif'
+31 -35
net/core/dev.c
··· 6484 6484 return false; 6485 6485 6486 6486 if (work_done) { 6487 - if (n->gro_bitmask) 6487 + if (n->gro.bitmask) 6488 6488 timeout = napi_get_gro_flush_timeout(n); 6489 6489 n->defer_hard_irqs_count = napi_get_defer_hard_irqs(n); 6490 6490 } ··· 6494 6494 if (timeout) 6495 6495 ret = false; 6496 6496 } 6497 - if (n->gro_bitmask) { 6498 - /* When the NAPI instance uses a timeout and keeps postponing 6499 - * it, we need to bound somehow the time packets are kept in 6500 - * the GRO layer 6501 - */ 6502 - napi_gro_flush(n, !!timeout); 6503 - } 6504 6497 6505 - gro_normal_list(n); 6498 + /* 6499 + * When the NAPI instance uses a timeout and keeps postponing 6500 + * it, we need to bound somehow the time packets are kept in 6501 + * the GRO layer. 6502 + */ 6503 + gro_flush(&n->gro, !!timeout); 6504 + gro_normal_list(&n->gro); 6506 6505 6507 6506 if (unlikely(!list_empty(&n->poll_list))) { 6508 6507 /* If n->poll_list is not empty, we need to mask irqs */ ··· 6565 6566 static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule) 6566 6567 { 6567 6568 if (!skip_schedule) { 6568 - gro_normal_list(napi); 6569 + gro_normal_list(&napi->gro); 6569 6570 __napi_schedule(napi); 6570 6571 return; 6571 6572 } 6572 6573 6573 - if (napi->gro_bitmask) { 6574 - /* flush too old packets 6575 - * If HZ < 1000, flush all packets. 6576 - */ 6577 - napi_gro_flush(napi, HZ >= 1000); 6578 - } 6574 + /* Flush too old packets. If HZ < 1000, flush all packets */ 6575 + gro_flush(&napi->gro, HZ >= 1000); 6576 + gro_normal_list(&napi->gro); 6579 6577 6580 - gro_normal_list(napi); 6581 6578 clear_bit(NAPI_STATE_SCHED, &napi->state); 6582 6579 } 6583 6580 ··· 6680 6685 } 6681 6686 work = napi_poll(napi, budget); 6682 6687 trace_napi_poll(napi, work, budget); 6683 - gro_normal_list(napi); 6688 + gro_normal_list(&napi->gro); 6684 6689 count: 6685 6690 if (work > 0) 6686 6691 __NET_ADD_STATS(dev_net(napi->dev), ··· 6780 6785 static void __napi_hash_add_with_id(struct napi_struct *napi, 6781 6786 unsigned int napi_id) 6782 6787 { 6788 + napi->gro.cached_napi_id = napi_id; 6789 + 6783 6790 WRITE_ONCE(napi->napi_id, napi_id); 6784 6791 hlist_add_head_rcu(&napi->napi_hash_node, 6785 6792 &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); ··· 6855 6858 int i; 6856 6859 6857 6860 for (i = 0; i < GRO_HASH_BUCKETS; i++) { 6858 - INIT_LIST_HEAD(&napi->gro_hash[i].list); 6859 - napi->gro_hash[i].count = 0; 6861 + INIT_LIST_HEAD(&napi->gro.hash[i].list); 6862 + napi->gro.hash[i].count = 0; 6860 6863 } 6861 - napi->gro_bitmask = 0; 6864 + 6865 + napi->gro.bitmask = 0; 6866 + napi->gro.cached_napi_id = 0; 6862 6867 } 6863 6868 6864 6869 int dev_set_threaded(struct net_device *dev, bool threaded) ··· 7192 7193 napi->timer.function = napi_watchdog; 7193 7194 init_gro_hash(napi); 7194 7195 napi->skb = NULL; 7195 - INIT_LIST_HEAD(&napi->rx_list); 7196 - napi->rx_count = 0; 7196 + INIT_LIST_HEAD(&napi->gro.rx_list); 7197 + napi->gro.rx_count = 0; 7197 7198 napi->poll = poll; 7198 7199 if (weight > NAPI_POLL_WEIGHT) 7199 7200 netdev_err_once(dev, "%s() called with weight %d\n", __func__, ··· 7314 7315 for (i = 0; i < GRO_HASH_BUCKETS; i++) { 7315 7316 struct sk_buff *skb, *n; 7316 7317 7317 - list_for_each_entry_safe(skb, n, &napi->gro_hash[i].list, list) 7318 + list_for_each_entry_safe(skb, n, &napi->gro.hash[i].list, list) 7318 7319 kfree_skb(skb); 7319 - napi->gro_hash[i].count = 0; 7320 + napi->gro.hash[i].count = 0; 7320 7321 } 7322 + 7323 + napi->gro.bitmask = 0; 7324 + napi->gro.cached_napi_id = 0; 7321 7325 } 7322 7326 7323 7327 /* Must be called in process context */ ··· 7346 7344 napi_free_frags(napi); 7347 7345 7348 7346 flush_gro_hash(napi); 7349 - napi->gro_bitmask = 0; 7350 7347 7351 7348 if (napi->thread) { 7352 7349 kthread_stop(napi->thread); ··· 7404 7403 return work; 7405 7404 } 7406 7405 7407 - if (n->gro_bitmask) { 7408 - /* flush too old packets 7409 - * If HZ < 1000, flush all packets. 7410 - */ 7411 - napi_gro_flush(n, HZ >= 1000); 7412 - } 7413 - 7414 - gro_normal_list(n); 7406 + /* Flush too old packets. If HZ < 1000, flush all packets */ 7407 + gro_flush(&n->gro, HZ >= 1000); 7408 + gro_normal_list(&n->gro); 7415 7409 7416 7410 /* Some drivers may have called napi_schedule 7417 7411 * prior to exhausting their budget. ··· 12435 12439 static int __net_init netdev_init(struct net *net) 12436 12440 { 12437 12441 BUILD_BUG_ON(GRO_HASH_BUCKETS > 12438 - 8 * sizeof_field(struct napi_struct, gro_bitmask)); 12442 + BITS_PER_BYTE * sizeof_field(struct gro_node, bitmask)); 12439 12443 12440 12444 INIT_LIST_HEAD(&net->dev_base_head); 12441 12445
+34 -35
net/core/gro.c
··· 250 250 return 0; 251 251 } 252 252 253 - 254 - static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) 253 + static void gro_complete(struct gro_node *gro, struct sk_buff *skb) 255 254 { 256 255 struct list_head *head = &net_hotdata.offload_base; 257 256 struct packet_offload *ptype; ··· 283 284 } 284 285 285 286 out: 286 - gro_normal_one(napi, skb, NAPI_GRO_CB(skb)->count); 287 + gro_normal_one(gro, skb, NAPI_GRO_CB(skb)->count); 287 288 } 288 289 289 - static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index, 290 - bool flush_old) 290 + static void __gro_flush_chain(struct gro_node *gro, u32 index, bool flush_old) 291 291 { 292 - struct list_head *head = &napi->gro_hash[index].list; 292 + struct list_head *head = &gro->hash[index].list; 293 293 struct sk_buff *skb, *p; 294 294 295 295 list_for_each_entry_safe_reverse(skb, p, head, list) { 296 296 if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) 297 297 return; 298 298 skb_list_del_init(skb); 299 - napi_gro_complete(napi, skb); 300 - napi->gro_hash[index].count--; 299 + gro_complete(gro, skb); 300 + gro->hash[index].count--; 301 301 } 302 302 303 - if (!napi->gro_hash[index].count) 304 - __clear_bit(index, &napi->gro_bitmask); 303 + if (!gro->hash[index].count) 304 + __clear_bit(index, &gro->bitmask); 305 305 } 306 306 307 - /* napi->gro_hash[].list contains packets ordered by age. 307 + /* 308 + * gro->hash[].list contains packets ordered by age. 308 309 * youngest packets at the head of it. 309 310 * Complete skbs in reverse order to reduce latencies. 310 311 */ 311 - void napi_gro_flush(struct napi_struct *napi, bool flush_old) 312 + void __gro_flush(struct gro_node *gro, bool flush_old) 312 313 { 313 - unsigned long bitmask = napi->gro_bitmask; 314 + unsigned long bitmask = gro->bitmask; 314 315 unsigned int i, base = ~0U; 315 316 316 317 while ((i = ffs(bitmask)) != 0) { 317 318 bitmask >>= i; 318 319 base += i; 319 - __napi_gro_flush_chain(napi, base, flush_old); 320 + __gro_flush_chain(gro, base, flush_old); 320 321 } 321 322 } 322 - EXPORT_SYMBOL(napi_gro_flush); 323 + EXPORT_SYMBOL(__gro_flush); 323 324 324 325 static unsigned long gro_list_prepare_tc_ext(const struct sk_buff *skb, 325 326 const struct sk_buff *p, ··· 438 439 gro_pull_from_frag0(skb, grow); 439 440 } 440 441 441 - static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head) 442 + static void gro_flush_oldest(struct gro_node *gro, struct list_head *head) 442 443 { 443 444 struct sk_buff *oldest; 444 445 ··· 454 455 * SKB to the chain. 455 456 */ 456 457 skb_list_del_init(oldest); 457 - napi_gro_complete(napi, oldest); 458 + gro_complete(gro, oldest); 458 459 } 459 460 460 - static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 461 + static enum gro_result dev_gro_receive(struct gro_node *gro, 462 + struct sk_buff *skb) 461 463 { 462 464 u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1); 463 - struct gro_list *gro_list = &napi->gro_hash[bucket]; 464 465 struct list_head *head = &net_hotdata.offload_base; 466 + struct gro_list *gro_list = &gro->hash[bucket]; 465 467 struct packet_offload *ptype; 466 468 __be16 type = skb->protocol; 467 469 struct sk_buff *pp = NULL; ··· 526 526 527 527 if (pp) { 528 528 skb_list_del_init(pp); 529 - napi_gro_complete(napi, pp); 529 + gro_complete(gro, pp); 530 530 gro_list->count--; 531 531 } 532 532 ··· 537 537 goto normal; 538 538 539 539 if (unlikely(gro_list->count >= MAX_GRO_SKBS)) 540 - gro_flush_oldest(napi, &gro_list->list); 540 + gro_flush_oldest(gro, &gro_list->list); 541 541 else 542 542 gro_list->count++; 543 543 ··· 551 551 ret = GRO_HELD; 552 552 ok: 553 553 if (gro_list->count) { 554 - if (!test_bit(bucket, &napi->gro_bitmask)) 555 - __set_bit(bucket, &napi->gro_bitmask); 556 - } else if (test_bit(bucket, &napi->gro_bitmask)) { 557 - __clear_bit(bucket, &napi->gro_bitmask); 554 + if (!test_bit(bucket, &gro->bitmask)) 555 + __set_bit(bucket, &gro->bitmask); 556 + } else if (test_bit(bucket, &gro->bitmask)) { 557 + __clear_bit(bucket, &gro->bitmask); 558 558 } 559 559 560 560 return ret; ··· 593 593 } 594 594 EXPORT_SYMBOL(gro_find_complete_by_type); 595 595 596 - static gro_result_t napi_skb_finish(struct napi_struct *napi, 597 - struct sk_buff *skb, 598 - gro_result_t ret) 596 + static gro_result_t gro_skb_finish(struct gro_node *gro, struct sk_buff *skb, 597 + gro_result_t ret) 599 598 { 600 599 switch (ret) { 601 600 case GRO_NORMAL: 602 - gro_normal_one(napi, skb, 1); 601 + gro_normal_one(gro, skb, 1); 603 602 break; 604 603 605 604 case GRO_MERGED_FREE: ··· 619 620 return ret; 620 621 } 621 622 622 - gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 623 + gro_result_t gro_receive_skb(struct gro_node *gro, struct sk_buff *skb) 623 624 { 624 625 gro_result_t ret; 625 626 626 - skb_mark_napi_id(skb, napi); 627 + __skb_mark_napi_id(skb, gro); 627 628 trace_napi_gro_receive_entry(skb); 628 629 629 630 skb_gro_reset_offset(skb, 0); 630 631 631 - ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb)); 632 + ret = gro_skb_finish(gro, skb, dev_gro_receive(gro, skb)); 632 633 trace_napi_gro_receive_exit(ret); 633 634 634 635 return ret; 635 636 } 636 - EXPORT_SYMBOL(napi_gro_receive); 637 + EXPORT_SYMBOL(gro_receive_skb); 637 638 638 639 static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) 639 640 { ··· 689 690 __skb_push(skb, ETH_HLEN); 690 691 skb->protocol = eth_type_trans(skb, skb->dev); 691 692 if (ret == GRO_NORMAL) 692 - gro_normal_one(napi, skb, 1); 693 + gro_normal_one(&napi->gro, skb, 1); 693 694 break; 694 695 695 696 case GRO_MERGED_FREE: ··· 758 759 759 760 trace_napi_gro_frags_entry(skb); 760 761 761 - ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); 762 + ret = napi_frags_finish(napi, skb, dev_gro_receive(&napi->gro, skb)); 762 763 trace_napi_gro_frags_exit(ret); 763 764 764 765 return ret;