Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'net-napi-add-cpu-affinity-to-napi-config'

Ahmed Zaki says:

====================
net: napi: add CPU affinity to napi->config

Drivers usually need to re-apply the user-set IRQ affinity to their IRQs
after reset. However, since there can be only one IRQ affinity notifier
for each IRQ, registering IRQ notifiers conflicts with the ARFS rmap
management in the core (which also registers separate IRQ affinity
notifiers).

Move the IRQ affinity management to the napi struct. This way we can have
a unified IRQ notifier to re-apply the user-set affinity and also manage
the ARFS rmaps.

The first patch moves the aRFS rmap management to core. It also adds the
IRQ affinity mask to napi_config and re-applies the mask after reset.
Patches 2, 4 and 5 use the new API for ena, ice and idpf drivers.

ICE does not always delete the NAPIs before releasing the IRQs. The third
patch makes sure the driver removes the IRQ number along with the queue
when the NAPIs are disabled. Without this, the next patches in this series
would free the IRQ before releasing the IRQ notifier (which generates
warnings).

Tested on ice and idpf.

v8: https://lore.kernel.org/20250211210657.428439-1-ahmed.zaki@intel.com
v7: https://lore.kernel.org/20250204220622.156061-1-ahmed.zaki@intel.com
v6: https://lore.kernel.org/20250118003335.155379-1-ahmed.zaki@intel.com
v5: https://lore.kernel.org/20250113171042.158123-1-ahmed.zaki@intel.com
v4: https://lore.kernel.org/20250109233107.17519-1-ahmed.zaki@intel.com
v3: https://lore.kernel.org/20250104004314.208259-1-ahmed.zaki@intel.com
v2: https://lore.kernel.org/202412190454.nwvp3hU2-lkp@intel.com
v1: https://lore.kernel.org/20241210002626.366878-1-ahmed.zaki@intel.com
====================

Link: https://patch.msgid.link/20250224232228.990783-1-ahmed.zaki@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+343 -163
+4 -2
Documentation/networking/scaling.rst
··· 434 434 is maintained by the NIC driver. This is an auto-generated reverse map of 435 435 the IRQ affinity table shown by /proc/interrupts. Drivers can use 436 436 functions in the cpu_rmap (“CPU affinity reverse map”) kernel library 437 - to populate the map. For each CPU, the corresponding queue in the map is 438 - set to be one whose processing CPU is closest in cache locality. 437 + to populate the map. Alternatively, drivers can delegate the cpu_rmap 438 + management to the Kernel by calling netif_enable_cpu_rmap(). For each CPU, 439 + the corresponding queue in the map is set to be one whose processing CPU is 440 + closest in cache locality. 439 441 440 442 441 443 Accelerated RFS Configuration
+1 -42
drivers/net/ethernet/amazon/ena/ena_netdev.c
··· 5 5 6 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 7 8 - #ifdef CONFIG_RFS_ACCEL 9 - #include <linux/cpu_rmap.h> 10 - #endif /* CONFIG_RFS_ACCEL */ 11 8 #include <linux/ethtool.h> 12 9 #include <linux/kernel.h> 13 10 #include <linux/module.h> ··· 156 159 157 160 ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, 158 161 ring->ring_size); 159 - return 0; 160 - } 161 - 162 - static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) 163 - { 164 - #ifdef CONFIG_RFS_ACCEL 165 - u32 i; 166 - int rc; 167 - 168 - adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_io_queues); 169 - if (!adapter->netdev->rx_cpu_rmap) 170 - return -ENOMEM; 171 - for (i = 0; i < adapter->num_io_queues; i++) { 172 - int irq_idx = ENA_IO_IRQ_IDX(i); 173 - 174 - rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap, 175 - pci_irq_vector(adapter->pdev, irq_idx)); 176 - if (rc) { 177 - free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap); 178 - adapter->netdev->rx_cpu_rmap = NULL; 179 - return rc; 180 - } 181 - } 182 - #endif /* CONFIG_RFS_ACCEL */ 183 162 return 0; 184 163 } 185 164 ··· 1569 1596 adapter->num_io_queues = irq_cnt - ENA_ADMIN_MSIX_VEC; 1570 1597 } 1571 1598 1572 - if (ena_init_rx_cpu_rmap(adapter)) 1599 + if (netif_enable_cpu_rmap(adapter->netdev, adapter->num_io_queues)) 1573 1600 netif_warn(adapter, probe, adapter->netdev, 1574 1601 "Failed to map IRQs to CPUs\n"); 1575 1602 ··· 1714 1741 u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; 1715 1742 struct ena_irq *irq; 1716 1743 int i; 1717 - 1718 - #ifdef CONFIG_RFS_ACCEL 1719 - if (adapter->msix_vecs >= 1) { 1720 - free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap); 1721 - adapter->netdev->rx_cpu_rmap = NULL; 1722 - } 1723 - #endif /* CONFIG_RFS_ACCEL */ 1724 1744 1725 1745 for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) { 1726 1746 irq = &adapter->irq_tbl[i]; ··· 4097 4131 ena_dev = adapter->ena_dev; 4098 4132 netdev = adapter->netdev; 4099 4133 4100 - #ifdef CONFIG_RFS_ACCEL 4101 - if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) { 4102 - free_irq_cpu_rmap(netdev->rx_cpu_rmap); 4103 - netdev->rx_cpu_rmap = NULL; 4104 - } 4105 - 4106 - #endif /* CONFIG_RFS_ACCEL */ 4107 4134 /* Make sure timer and reset routine won't be called after 4108 4135 * freeing device resources. 4109 4136 */
-3
drivers/net/ethernet/intel/ice/ice.h
··· 475 475 struct ice_ring_container rx; 476 476 struct ice_ring_container tx; 477 477 478 - cpumask_t affinity_mask; 479 - struct irq_affinity_notify affinity_notify; 480 - 481 478 struct ice_channel *ch; 482 479 483 480 char name[ICE_INT_NAME_STR_LEN];
+1 -32
drivers/net/ethernet/intel/ice/ice_arfs.c
··· 571 571 } 572 572 573 573 /** 574 - * ice_free_cpu_rx_rmap - free setup CPU reverse map 575 - * @vsi: the VSI to be forwarded to 576 - */ 577 - void ice_free_cpu_rx_rmap(struct ice_vsi *vsi) 578 - { 579 - struct net_device *netdev; 580 - 581 - if (!vsi || vsi->type != ICE_VSI_PF) 582 - return; 583 - 584 - netdev = vsi->netdev; 585 - if (!netdev || !netdev->rx_cpu_rmap) 586 - return; 587 - 588 - free_irq_cpu_rmap(netdev->rx_cpu_rmap); 589 - netdev->rx_cpu_rmap = NULL; 590 - } 591 - 592 - /** 593 574 * ice_set_cpu_rx_rmap - setup CPU reverse map for each queue 594 575 * @vsi: the VSI to be forwarded to 595 576 */ ··· 578 597 { 579 598 struct net_device *netdev; 580 599 struct ice_pf *pf; 581 - int i; 582 600 583 601 if (!vsi || vsi->type != ICE_VSI_PF) 584 602 return 0; ··· 590 610 netdev_dbg(netdev, "Setup CPU RMAP: vsi type 0x%x, ifname %s, q_vectors %d\n", 591 611 vsi->type, netdev->name, vsi->num_q_vectors); 592 612 593 - netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(vsi->num_q_vectors); 594 - if (unlikely(!netdev->rx_cpu_rmap)) 595 - return -EINVAL; 596 - 597 - ice_for_each_q_vector(vsi, i) 598 - if (irq_cpu_rmap_add(netdev->rx_cpu_rmap, 599 - vsi->q_vectors[i]->irq.virq)) { 600 - ice_free_cpu_rx_rmap(vsi); 601 - return -EINVAL; 602 - } 603 - 604 - return 0; 613 + return netif_enable_cpu_rmap(netdev, vsi->num_q_vectors); 605 614 } 606 615 607 616 /**
-2
drivers/net/ethernet/intel/ice/ice_arfs.h
··· 45 45 ice_rx_flow_steer(struct net_device *netdev, const struct sk_buff *skb, 46 46 u16 rxq_idx, u32 flow_id); 47 47 void ice_clear_arfs(struct ice_vsi *vsi); 48 - void ice_free_cpu_rx_rmap(struct ice_vsi *vsi); 49 48 void ice_init_arfs(struct ice_vsi *vsi); 50 49 void ice_sync_arfs_fltrs(struct ice_pf *pf); 51 50 int ice_set_cpu_rx_rmap(struct ice_vsi *vsi); ··· 55 56 enum ice_fltr_ptype flow_type); 56 57 #else 57 58 static inline void ice_clear_arfs(struct ice_vsi *vsi) { } 58 - static inline void ice_free_cpu_rx_rmap(struct ice_vsi *vsi) { } 59 59 static inline void ice_init_arfs(struct ice_vsi *vsi) { } 60 60 static inline void ice_sync_arfs_fltrs(struct ice_pf *pf) { } 61 61 static inline void ice_remove_arfs(struct ice_pf *pf) { }
+2 -5
drivers/net/ethernet/intel/ice/ice_base.c
··· 147 147 q_vector->reg_idx = q_vector->irq.index; 148 148 q_vector->vf_reg_idx = q_vector->irq.index; 149 149 150 - /* only set affinity_mask if the CPU is online */ 151 - if (cpu_online(v_idx)) 152 - cpumask_set_cpu(v_idx, &q_vector->affinity_mask); 153 - 154 150 /* This will not be called in the driver load path because the netdev 155 151 * will not be created yet. All other cases with register the NAPI 156 152 * handler here (i.e. resume, reset/rebuild, etc.) ··· 272 276 if (test_and_set_bit(ICE_TX_XPS_INIT_DONE, ring->xps_state)) 273 277 return; 274 278 275 - netif_set_xps_queue(ring->netdev, &ring->q_vector->affinity_mask, 279 + netif_set_xps_queue(ring->netdev, 280 + &ring->q_vector->napi.config->affinity_mask, 276 281 ring->q_index); 277 282 } 278 283
+8 -8
drivers/net/ethernet/intel/ice/ice_lib.c
··· 2592 2592 return; 2593 2593 2594 2594 vsi->irqs_ready = false; 2595 - ice_free_cpu_rx_rmap(vsi); 2596 2595 2597 2596 ice_for_each_q_vector(vsi, i) { 2598 2597 int irq_num; ··· 2604 2605 vsi->q_vectors[i]->num_ring_rx)) 2605 2606 continue; 2606 2607 2607 - /* clear the affinity notifier in the IRQ descriptor */ 2608 - if (!IS_ENABLED(CONFIG_RFS_ACCEL)) 2609 - irq_set_affinity_notifier(irq_num, NULL); 2610 - 2611 - /* clear the affinity_hint in the IRQ descriptor */ 2612 - irq_update_affinity_hint(irq_num, NULL); 2613 2608 synchronize_irq(irq_num); 2614 2609 devm_free_irq(ice_pf_to_dev(pf), irq_num, vsi->q_vectors[i]); 2615 2610 } ··· 2758 2765 void ice_vsi_clear_napi_queues(struct ice_vsi *vsi) 2759 2766 { 2760 2767 struct net_device *netdev = vsi->netdev; 2761 - int q_idx; 2768 + int q_idx, v_idx; 2762 2769 2763 2770 if (!netdev) 2764 2771 return; 2772 + 2773 + /* Clear the NAPI's interrupt number */ 2774 + ice_for_each_q_vector(vsi, v_idx) { 2775 + struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; 2776 + 2777 + netif_napi_set_irq(&q_vector->napi, -1); 2778 + } 2765 2779 2766 2780 ice_for_each_txq(vsi, q_idx) 2767 2781 netif_queue_set_napi(netdev, q_idx, NETDEV_QUEUE_TYPE_TX, NULL);
+3 -44
drivers/net/ethernet/intel/ice/ice_main.c
··· 2528 2528 } 2529 2529 2530 2530 /** 2531 - * ice_irq_affinity_notify - Callback for affinity changes 2532 - * @notify: context as to what irq was changed 2533 - * @mask: the new affinity mask 2534 - * 2535 - * This is a callback function used by the irq_set_affinity_notifier function 2536 - * so that we may register to receive changes to the irq affinity masks. 2537 - */ 2538 - static void 2539 - ice_irq_affinity_notify(struct irq_affinity_notify *notify, 2540 - const cpumask_t *mask) 2541 - { 2542 - struct ice_q_vector *q_vector = 2543 - container_of(notify, struct ice_q_vector, affinity_notify); 2544 - 2545 - cpumask_copy(&q_vector->affinity_mask, mask); 2546 - } 2547 - 2548 - /** 2549 - * ice_irq_affinity_release - Callback for affinity notifier release 2550 - * @ref: internal core kernel usage 2551 - * 2552 - * This is a callback function used by the irq_set_affinity_notifier function 2553 - * to inform the current notification subscriber that they will no longer 2554 - * receive notifications. 2555 - */ 2556 - static void ice_irq_affinity_release(struct kref __always_unused *ref) {} 2557 - 2558 - /** 2559 2531 * ice_vsi_ena_irq - Enable IRQ for the given VSI 2560 2532 * @vsi: the VSI being configured 2561 2533 */ ··· 2590 2618 err); 2591 2619 goto free_q_irqs; 2592 2620 } 2593 - 2594 - /* register for affinity change notifications */ 2595 - if (!IS_ENABLED(CONFIG_RFS_ACCEL)) { 2596 - struct irq_affinity_notify *affinity_notify; 2597 - 2598 - affinity_notify = &q_vector->affinity_notify; 2599 - affinity_notify->notify = ice_irq_affinity_notify; 2600 - affinity_notify->release = ice_irq_affinity_release; 2601 - irq_set_affinity_notifier(irq_num, affinity_notify); 2602 - } 2603 - 2604 - /* assign the mask for this irq */ 2605 - irq_update_affinity_hint(irq_num, &q_vector->affinity_mask); 2606 2621 } 2607 2622 2608 2623 err = ice_set_cpu_rx_rmap(vsi); ··· 2605 2646 free_q_irqs: 2606 2647 while (vector--) { 2607 2648 irq_num = vsi->q_vectors[vector]->irq.virq; 2608 - if (!IS_ENABLED(CONFIG_RFS_ACCEL)) 2609 - irq_set_affinity_notifier(irq_num, NULL); 2610 - irq_update_affinity_hint(irq_num, NULL); 2611 2649 devm_free_irq(dev, irq_num, &vsi->q_vectors[vector]); 2612 2650 } 2613 2651 return err; ··· 3630 3674 * be changed at runtime 3631 3675 */ 3632 3676 netdev->hw_features |= NETIF_F_RXFCS; 3677 + 3678 + /* Allow core to manage IRQs affinity */ 3679 + netif_set_affinity_auto(netdev); 3633 3680 3634 3681 netif_set_tso_max_size(netdev, ICE_MAX_TSO_SIZE); 3635 3682 }
+1
drivers/net/ethernet/intel/idpf/idpf_lib.c
··· 814 814 netdev->hw_features |= dflt_features | offloads; 815 815 netdev->hw_enc_features |= dflt_features | offloads; 816 816 idpf_set_ethtool_ops(netdev); 817 + netif_set_affinity_auto(netdev); 817 818 SET_NETDEV_DEV(netdev, &adapter->pdev->dev); 818 819 819 820 /* carrier off on init to avoid Tx hangs */
+7 -15
drivers/net/ethernet/intel/idpf/idpf_txrx.c
··· 3551 3551 q_vector->tx = NULL; 3552 3552 kfree(q_vector->rx); 3553 3553 q_vector->rx = NULL; 3554 - 3555 - free_cpumask_var(q_vector->affinity_mask); 3556 3554 } 3557 3555 3558 3556 kfree(vport->q_vectors); ··· 3577 3579 vidx = vport->q_vector_idxs[vector]; 3578 3580 irq_num = adapter->msix_entries[vidx].vector; 3579 3581 3580 - /* clear the affinity_mask in the IRQ descriptor */ 3581 - irq_set_affinity_hint(irq_num, NULL); 3582 3582 kfree(free_irq(irq_num, q_vector)); 3583 3583 } 3584 3584 } ··· 3764 3768 "Request_irq failed, error: %d\n", err); 3765 3769 goto free_q_irqs; 3766 3770 } 3767 - /* assign the mask for this irq */ 3768 - irq_set_affinity_hint(irq_num, q_vector->affinity_mask); 3769 3771 } 3770 3772 3771 3773 return 0; ··· 4175 4181 static void idpf_vport_intr_napi_add_all(struct idpf_vport *vport) 4176 4182 { 4177 4183 int (*napi_poll)(struct napi_struct *napi, int budget); 4178 - u16 v_idx; 4184 + u16 v_idx, qv_idx; 4185 + int irq_num; 4179 4186 4180 4187 if (idpf_is_queue_model_split(vport->txq_model)) 4181 4188 napi_poll = idpf_vport_splitq_napi_poll; ··· 4185 4190 4186 4191 for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { 4187 4192 struct idpf_q_vector *q_vector = &vport->q_vectors[v_idx]; 4193 + qv_idx = vport->q_vector_idxs[v_idx]; 4194 + irq_num = vport->adapter->msix_entries[qv_idx].vector; 4188 4195 4189 - netif_napi_add(vport->netdev, &q_vector->napi, napi_poll); 4190 - 4191 - /* only set affinity_mask if the CPU is online */ 4192 - if (cpu_online(v_idx)) 4193 - cpumask_set_cpu(v_idx, q_vector->affinity_mask); 4196 + netif_napi_add_config(vport->netdev, &q_vector->napi, 4197 + napi_poll, v_idx); 4198 + netif_napi_set_irq(&q_vector->napi, irq_num); 4194 4199 } 4195 4200 } 4196 4201 ··· 4233 4238 q_vector->rx_itr_value = IDPF_ITR_RX_DEF; 4234 4239 q_vector->rx_intr_mode = IDPF_ITR_DYNAMIC; 4235 4240 q_vector->rx_itr_idx = VIRTCHNL2_ITR_IDX_0; 4236 - 4237 - if (!zalloc_cpumask_var(&q_vector->affinity_mask, GFP_KERNEL)) 4238 - goto error; 4239 4241 4240 4242 q_vector->tx = kcalloc(txqs_per_vector, sizeof(*q_vector->tx), 4241 4243 GFP_KERNEL);
+2 -4
drivers/net/ethernet/intel/idpf/idpf_txrx.h
··· 382 382 * @rx_intr_mode: Dynamic ITR or not 383 383 * @rx_itr_idx: RX ITR index 384 384 * @v_idx: Vector index 385 - * @affinity_mask: CPU affinity mask 386 385 */ 387 386 struct idpf_q_vector { 388 387 __cacheline_group_begin_aligned(read_mostly); ··· 418 419 __cacheline_group_begin_aligned(cold); 419 420 u16 v_idx; 420 421 421 - cpumask_var_t affinity_mask; 422 422 __cacheline_group_end_aligned(cold); 423 423 }; 424 424 libeth_cacheline_set_assert(struct idpf_q_vector, 120, 425 425 24 + sizeof(struct napi_struct) + 426 426 2 * sizeof(struct dim), 427 - 8 + sizeof(cpumask_var_t)); 427 + 8); 428 428 429 429 struct idpf_rx_queue_stats { 430 430 u64_stats_t packets; ··· 919 921 if (!q_vector) 920 922 return NUMA_NO_NODE; 921 923 922 - cpu = cpumask_first(q_vector->affinity_mask); 924 + cpu = cpumask_first(&q_vector->napi.config->affinity_mask); 923 925 924 926 return cpu < nr_cpu_ids ? cpu_to_mem(cpu) : NUMA_NO_NODE; 925 927 }
+1
include/linux/cpu_rmap.h
··· 32 32 #define CPU_RMAP_DIST_INF 0xffff 33 33 34 34 extern struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags); 35 + extern void cpu_rmap_get(struct cpu_rmap *rmap); 35 36 extern int cpu_rmap_put(struct cpu_rmap *rmap); 36 37 37 38 extern int cpu_rmap_add(struct cpu_rmap *rmap, void *obj);
+20 -4
include/linux/netdevice.h
··· 352 352 u64 gro_flush_timeout; 353 353 u64 irq_suspend_timeout; 354 354 u32 defer_hard_irqs; 355 + cpumask_t affinity_mask; 355 356 unsigned int napi_id; 356 357 }; 357 358 ··· 395 394 struct list_head dev_list; 396 395 struct hlist_node napi_hash_node; 397 396 int irq; 397 + struct irq_affinity_notify notify; 398 + int napi_rmap_idx; 398 399 int index; 399 400 struct napi_config *config; 400 401 }; ··· 412 409 NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/ 413 410 NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/ 414 411 NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */ 412 + NAPI_STATE_HAS_NOTIFIER, /* Napi has an IRQ notifier */ 415 413 }; 416 414 417 415 enum { ··· 426 422 NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL), 427 423 NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), 428 424 NAPIF_STATE_SCHED_THREADED = BIT(NAPI_STATE_SCHED_THREADED), 425 + NAPIF_STATE_HAS_NOTIFIER = BIT(NAPI_STATE_HAS_NOTIFIER), 429 426 }; 430 427 431 428 enum gro_result { ··· 1994 1989 * 1995 1990 * @threaded: napi threaded mode is enabled 1996 1991 * 1992 + * @irq_affinity_auto: driver wants the core to store and re-assign the IRQ 1993 + * affinity. Set by netif_enable_irq_affinity(), then 1994 + * the driver must create a persistent napi by 1995 + * netif_napi_add_config() and finally bind the napi to 1996 + * IRQ (via netif_napi_set_irq()). 1997 + * 1998 + * @rx_cpu_rmap_auto: driver wants the core to manage the ARFS rmap. 1999 + * Set by calling netif_enable_cpu_rmap(). 2000 + * 1997 2001 * @see_all_hwtstamp_requests: device wants to see calls to 1998 2002 * ndo_hwtstamp_set() for all timestamp requests 1999 2003 * regardless of source, even if those aren't ··· 2410 2396 struct lock_class_key *qdisc_tx_busylock; 2411 2397 bool proto_down; 2412 2398 bool threaded; 2399 + bool irq_affinity_auto; 2400 + bool rx_cpu_rmap_auto; 2413 2401 2414 2402 /* priv_flags_slow, ungrouped to save space */ 2415 2403 unsigned long see_all_hwtstamp_requests:1; ··· 2740 2724 netdev_assert_locked(dev); 2741 2725 } 2742 2726 2743 - static inline void netif_napi_set_irq_locked(struct napi_struct *napi, int irq) 2744 - { 2745 - napi->irq = irq; 2746 - } 2727 + void netif_napi_set_irq_locked(struct napi_struct *napi, int irq); 2747 2728 2748 2729 static inline void netif_napi_set_irq(struct napi_struct *napi, int irq) 2749 2730 { ··· 2877 2864 __netif_napi_del(napi); 2878 2865 synchronize_net(); 2879 2866 } 2867 + 2868 + int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs); 2869 + void netif_set_affinity_auto(struct net_device *dev); 2880 2870 2881 2871 struct packet_type { 2882 2872 __be16 type; /* This is really htons(ether_type). */
+1 -1
lib/cpu_rmap.c
··· 73 73 * cpu_rmap_get - internal helper to get new ref on a cpu_rmap 74 74 * @rmap: reverse-map allocated with alloc_cpu_rmap() 75 75 */ 76 - static inline void cpu_rmap_get(struct cpu_rmap *rmap) 76 + void cpu_rmap_get(struct cpu_rmap *rmap) 77 77 { 78 78 kref_get(&rmap->refcount); 79 79 }
+169
net/core/dev.c
··· 6943 6943 } 6944 6944 EXPORT_SYMBOL(netif_queue_set_napi); 6945 6945 6946 + static void 6947 + netif_napi_irq_notify(struct irq_affinity_notify *notify, 6948 + const cpumask_t *mask) 6949 + { 6950 + struct napi_struct *napi = 6951 + container_of(notify, struct napi_struct, notify); 6952 + #ifdef CONFIG_RFS_ACCEL 6953 + struct cpu_rmap *rmap = napi->dev->rx_cpu_rmap; 6954 + int err; 6955 + #endif 6956 + 6957 + if (napi->config && napi->dev->irq_affinity_auto) 6958 + cpumask_copy(&napi->config->affinity_mask, mask); 6959 + 6960 + #ifdef CONFIG_RFS_ACCEL 6961 + if (napi->dev->rx_cpu_rmap_auto) { 6962 + err = cpu_rmap_update(rmap, napi->napi_rmap_idx, mask); 6963 + if (err) 6964 + netdev_warn(napi->dev, "RMAP update failed (%d)\n", 6965 + err); 6966 + } 6967 + #endif 6968 + } 6969 + 6970 + #ifdef CONFIG_RFS_ACCEL 6971 + static void netif_napi_affinity_release(struct kref *ref) 6972 + { 6973 + struct napi_struct *napi = 6974 + container_of(ref, struct napi_struct, notify.kref); 6975 + struct cpu_rmap *rmap = napi->dev->rx_cpu_rmap; 6976 + 6977 + netdev_assert_locked(napi->dev); 6978 + WARN_ON(test_and_clear_bit(NAPI_STATE_HAS_NOTIFIER, 6979 + &napi->state)); 6980 + 6981 + if (!napi->dev->rx_cpu_rmap_auto) 6982 + return; 6983 + rmap->obj[napi->napi_rmap_idx] = NULL; 6984 + napi->napi_rmap_idx = -1; 6985 + cpu_rmap_put(rmap); 6986 + } 6987 + 6988 + int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs) 6989 + { 6990 + if (dev->rx_cpu_rmap_auto) 6991 + return 0; 6992 + 6993 + dev->rx_cpu_rmap = alloc_irq_cpu_rmap(num_irqs); 6994 + if (!dev->rx_cpu_rmap) 6995 + return -ENOMEM; 6996 + 6997 + dev->rx_cpu_rmap_auto = true; 6998 + return 0; 6999 + } 7000 + EXPORT_SYMBOL(netif_enable_cpu_rmap); 7001 + 7002 + static void netif_del_cpu_rmap(struct net_device *dev) 7003 + { 7004 + struct cpu_rmap *rmap = dev->rx_cpu_rmap; 7005 + 7006 + if (!dev->rx_cpu_rmap_auto) 7007 + return; 7008 + 7009 + /* Free the rmap */ 7010 + cpu_rmap_put(rmap); 7011 + dev->rx_cpu_rmap = NULL; 7012 + dev->rx_cpu_rmap_auto = false; 7013 + } 7014 + 7015 + #else 7016 + static void netif_napi_affinity_release(struct kref *ref) 7017 + { 7018 + } 7019 + 7020 + int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs) 7021 + { 7022 + return 0; 7023 + } 7024 + EXPORT_SYMBOL(netif_enable_cpu_rmap); 7025 + 7026 + static void netif_del_cpu_rmap(struct net_device *dev) 7027 + { 7028 + } 7029 + #endif 7030 + 7031 + void netif_set_affinity_auto(struct net_device *dev) 7032 + { 7033 + unsigned int i, maxqs, numa; 7034 + 7035 + maxqs = max(dev->num_tx_queues, dev->num_rx_queues); 7036 + numa = dev_to_node(&dev->dev); 7037 + 7038 + for (i = 0; i < maxqs; i++) 7039 + cpumask_set_cpu(cpumask_local_spread(i, numa), 7040 + &dev->napi_config[i].affinity_mask); 7041 + 7042 + dev->irq_affinity_auto = true; 7043 + } 7044 + EXPORT_SYMBOL(netif_set_affinity_auto); 7045 + 7046 + void netif_napi_set_irq_locked(struct napi_struct *napi, int irq) 7047 + { 7048 + int rc; 7049 + 7050 + netdev_assert_locked_or_invisible(napi->dev); 7051 + 7052 + if (napi->irq == irq) 7053 + return; 7054 + 7055 + /* Remove existing resources */ 7056 + if (test_and_clear_bit(NAPI_STATE_HAS_NOTIFIER, &napi->state)) 7057 + irq_set_affinity_notifier(napi->irq, NULL); 7058 + 7059 + napi->irq = irq; 7060 + if (irq < 0 || 7061 + (!napi->dev->rx_cpu_rmap_auto && !napi->dev->irq_affinity_auto)) 7062 + return; 7063 + 7064 + /* Abort for buggy drivers */ 7065 + if (napi->dev->irq_affinity_auto && WARN_ON_ONCE(!napi->config)) 7066 + return; 7067 + 7068 + #ifdef CONFIG_RFS_ACCEL 7069 + if (napi->dev->rx_cpu_rmap_auto) { 7070 + rc = cpu_rmap_add(napi->dev->rx_cpu_rmap, napi); 7071 + if (rc < 0) 7072 + return; 7073 + 7074 + cpu_rmap_get(napi->dev->rx_cpu_rmap); 7075 + napi->napi_rmap_idx = rc; 7076 + } 7077 + #endif 7078 + 7079 + /* Use core IRQ notifier */ 7080 + napi->notify.notify = netif_napi_irq_notify; 7081 + napi->notify.release = netif_napi_affinity_release; 7082 + rc = irq_set_affinity_notifier(irq, &napi->notify); 7083 + if (rc) { 7084 + netdev_warn(napi->dev, "Unable to set IRQ notifier (%d)\n", 7085 + rc); 7086 + goto put_rmap; 7087 + } 7088 + 7089 + set_bit(NAPI_STATE_HAS_NOTIFIER, &napi->state); 7090 + return; 7091 + 7092 + put_rmap: 7093 + #ifdef CONFIG_RFS_ACCEL 7094 + if (napi->dev->rx_cpu_rmap_auto) { 7095 + cpu_rmap_put(napi->dev->rx_cpu_rmap); 7096 + napi->dev->rx_cpu_rmap->obj[napi->napi_rmap_idx] = NULL; 7097 + napi->napi_rmap_idx = -1; 7098 + } 7099 + #endif 7100 + napi->notify.notify = NULL; 7101 + napi->notify.release = NULL; 7102 + } 7103 + EXPORT_SYMBOL(netif_napi_set_irq_locked); 7104 + 6946 7105 static void napi_restore_config(struct napi_struct *n) 6947 7106 { 6948 7107 n->defer_hard_irqs = n->config->defer_hard_irqs; 6949 7108 n->gro_flush_timeout = n->config->gro_flush_timeout; 6950 7109 n->irq_suspend_timeout = n->config->irq_suspend_timeout; 7110 + 7111 + if (n->dev->irq_affinity_auto && 7112 + test_bit(NAPI_STATE_HAS_NOTIFIER, &n->state)) 7113 + irq_set_affinity(n->irq, &n->config->affinity_mask); 7114 + 6951 7115 /* a NAPI ID might be stored in the config, if so use it. if not, use 6952 7116 * napi_hash_add to generate one for us. 6953 7117 */ ··· 7331 7167 7332 7168 /* Make sure NAPI is disabled (or was never enabled). */ 7333 7169 WARN_ON(!test_bit(NAPI_STATE_SCHED, &napi->state)); 7170 + 7171 + if (test_and_clear_bit(NAPI_STATE_HAS_NOTIFIER, &napi->state)) 7172 + irq_set_affinity_notifier(napi->irq, NULL); 7334 7173 7335 7174 if (napi->config) { 7336 7175 napi->index = -1; ··· 11886 11719 dev_addr_flush(dev); 11887 11720 11888 11721 netdev_napi_exit(dev); 11722 + 11723 + netif_del_cpu_rmap(dev); 11889 11724 11890 11725 ref_tracker_dir_exit(&dev->refcnt_tracker); 11891 11726 #ifdef CONFIG_PCPU_DEV_REFCNT
+4
tools/testing/selftests/drivers/net/hw/Makefile
··· 10 10 ethtool_rmon.sh \ 11 11 hw_stats_l3.sh \ 12 12 hw_stats_l3_gre.sh \ 13 + irq.py \ 13 14 loopback.sh \ 14 15 nic_link_layer.py \ 15 16 nic_performance.py \ ··· 35 34 # YNL files, must be before "include ..lib.mk" 36 35 YNL_GEN_FILES := ncdevmem 37 36 TEST_GEN_FILES += $(YNL_GEN_FILES) 37 + TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) 38 38 39 39 include ../../../lib.mk 40 40 41 41 # YNL build 42 42 YNL_GENS := ethtool netdev 43 43 include ../../../net/ynl.mk 44 + 45 + include ../../../net/bpf.mk
+99
tools/testing/selftests/drivers/net/hw/irq.py
··· 1 + #!/usr/bin/env python3 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + from lib.py import ksft_run, ksft_exit 5 + from lib.py import ksft_ge, ksft_eq 6 + from lib.py import KsftSkipEx 7 + from lib.py import ksft_disruptive 8 + from lib.py import EthtoolFamily, NetdevFamily 9 + from lib.py import NetDrvEnv 10 + from lib.py import cmd, ip, defer 11 + 12 + 13 + def read_affinity(irq) -> str: 14 + with open(f'/proc/irq/{irq}/smp_affinity', 'r') as fp: 15 + return fp.read().lstrip("0,").strip() 16 + 17 + 18 + def write_affinity(irq, what) -> str: 19 + if what != read_affinity(irq): 20 + with open(f'/proc/irq/{irq}/smp_affinity', 'w') as fp: 21 + fp.write(what) 22 + 23 + 24 + def check_irqs_reported(cfg) -> None: 25 + """ Check that device reports IRQs for NAPI instances """ 26 + napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True) 27 + irqs = sum(['irq' in x for x in napis]) 28 + 29 + ksft_ge(irqs, 1) 30 + ksft_eq(irqs, len(napis)) 31 + 32 + 33 + def _check_reconfig(cfg, reconfig_cb) -> None: 34 + napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True) 35 + for n in reversed(napis): 36 + if 'irq' in n: 37 + break 38 + else: 39 + raise KsftSkipEx(f"Device has no NAPI with IRQ attribute (#napis: {len(napis)}") 40 + 41 + old = read_affinity(n['irq']) 42 + # pick an affinity that's not the current one 43 + new = "3" if old != "3" else "5" 44 + write_affinity(n['irq'], new) 45 + defer(write_affinity, n['irq'], old) 46 + 47 + reconfig_cb(cfg) 48 + 49 + ksft_eq(read_affinity(n['irq']), new, comment="IRQ affinity changed after reconfig") 50 + 51 + 52 + def check_reconfig_queues(cfg) -> None: 53 + def reconfig(cfg) -> None: 54 + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) 55 + if channels['combined-count'] == 0: 56 + rx_type = 'rx' 57 + else: 58 + rx_type = 'combined' 59 + cur_queue_cnt = channels[f'{rx_type}-count'] 60 + max_queue_cnt = channels[f'{rx_type}-max'] 61 + 62 + cmd(f"ethtool -L {cfg.ifname} {rx_type} 1") 63 + cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}") 64 + cmd(f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}") 65 + 66 + _check_reconfig(cfg, reconfig) 67 + 68 + 69 + def check_reconfig_xdp(cfg) -> None: 70 + def reconfig(cfg) -> None: 71 + ip(f"link set dev %s xdp obj %s sec xdp" % 72 + (cfg.ifname, cfg.rpath("xdp_dummy.bpf.o"))) 73 + ip(f"link set dev %s xdp off" % cfg.ifname) 74 + 75 + _check_reconfig(cfg, reconfig) 76 + 77 + 78 + @ksft_disruptive 79 + def check_down(cfg) -> None: 80 + def reconfig(cfg) -> None: 81 + ip("link set dev %s down" % cfg.ifname) 82 + ip("link set dev %s up" % cfg.ifname) 83 + 84 + _check_reconfig(cfg, reconfig) 85 + 86 + 87 + def main() -> None: 88 + with NetDrvEnv(__file__, nsim_test=False) as cfg: 89 + cfg.ethnl = EthtoolFamily() 90 + cfg.netnl = NetdevFamily() 91 + 92 + ksft_run([check_irqs_reported, check_reconfig_queues, 93 + check_reconfig_xdp, check_down], 94 + args=(cfg, )) 95 + ksft_exit() 96 + 97 + 98 + if __name__ == "__main__": 99 + main()
+13
tools/testing/selftests/drivers/net/hw/xdp_dummy.bpf.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #define KBUILD_MODNAME "xdp_dummy" 4 + #include <linux/bpf.h> 5 + #include <bpf/bpf_helpers.h> 6 + 7 + SEC("xdp") 8 + int xdp_dummy_prog(struct xdp_md *ctx) 9 + { 10 + return XDP_PASS; 11 + } 12 + 13 + char _license[] SEC("license") = "GPL";
+7 -1
tools/testing/selftests/drivers/net/lib/py/env.py
··· 58 58 """ 59 59 Class for a single NIC / host env, with no remote end 60 60 """ 61 - def __init__(self, src_path, **kwargs): 61 + def __init__(self, src_path, nsim_test=None, **kwargs): 62 62 super().__init__(src_path) 63 63 64 64 self._ns = None 65 65 66 66 if 'NETIF' in self.env: 67 + if nsim_test is True: 68 + raise KsftXfailEx("Test only works on netdevsim") 69 + 67 70 self.dev = ip("-d link show dev " + self.env['NETIF'], json=True)[0] 68 71 else: 72 + if nsim_test is False: 73 + raise KsftXfailEx("Test does not work on netdevsim") 74 + 69 75 self._ns = NetdevSimDev(**kwargs) 70 76 self.dev = self._ns.nsims[0].dev 71 77 self.ifname = self.dev['ifname']