Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue

Tony Nguyen says:

====================
Intel Wired LAN Driver Updates 2026-01-26 (ice, idpf)

For ice:
Jake converts ring stats to utilize u64_stats APIs and performs some
cleanups along the way.

Alexander reorganizes layout of Tx and Rx rings for cacheline
locality and utilizes __cacheline_group* macros on the new layouts.

For idpf:
YiFei Zhu adds support for BPF kfunc reporting of hardware Rx timestamps.

* '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue:
idpf: export RX hardware timestamping information to XDP
ice: reshuffle and group Rx and Tx queue fields by cachelines
ice: convert all ring stats to u64_stats_t
ice: shorten ring stat names and add accessors
ice: use u64_stats API to access pkts/bytes in dim sample
ice: remove ice_q_stats struct and use struct_group
ice: pass pointer to ice_fetch_u64_stats_per_ring
====================

Link: https://patch.msgid.link/20260126224313.3847849-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+398 -209
-3
drivers/net/ethernet/intel/ice/ice.h
··· 957 957 int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked); 958 958 void ice_update_vsi_stats(struct ice_vsi *vsi); 959 959 void ice_update_pf_stats(struct ice_pf *pf); 960 - void 961 - ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp, 962 - struct ice_q_stats stats, u64 *pkts, u64 *bytes); 963 960 int ice_up(struct ice_vsi *vsi); 964 961 int ice_down(struct ice_vsi *vsi); 965 962 int ice_down_up(struct ice_vsi *vsi);
+2 -2
drivers/net/ethernet/intel/ice/ice_base.c
··· 1414 1414 if (!vsi_stat) 1415 1415 return; 1416 1416 1417 - memset(&vsi_stat->rx_ring_stats[q_idx]->rx_stats, 0, 1418 - sizeof(vsi_stat->rx_ring_stats[q_idx]->rx_stats)); 1417 + memset(&vsi_stat->rx_ring_stats[q_idx]->stats, 0, 1418 + sizeof(vsi_stat->rx_ring_stats[q_idx]->stats)); 1419 1419 memset(&vsi_stat->tx_ring_stats[q_idx]->stats, 0, 1420 1420 sizeof(vsi_stat->tx_ring_stats[q_idx]->stats)); 1421 1421 if (vsi->xdp_rings)
+20 -11
drivers/net/ethernet/intel/ice/ice_ethtool.c
··· 33 33 { 34 34 struct ice_netdev_priv *np = netdev_priv(netdev); 35 35 36 - return ((np->vsi->alloc_txq + np->vsi->alloc_rxq) * 37 - (sizeof(struct ice_q_stats) / sizeof(u64))); 36 + /* One packets and one bytes count per queue */ 37 + return ((np->vsi->alloc_txq + np->vsi->alloc_rxq) * 2); 38 38 } 39 39 40 40 #define ICE_PF_STATS_LEN ARRAY_SIZE(ice_gstrings_pf_stats) ··· 1942 1942 rcu_read_lock(); 1943 1943 1944 1944 ice_for_each_alloc_txq(vsi, j) { 1945 + u64 pkts, bytes; 1946 + 1945 1947 tx_ring = READ_ONCE(vsi->tx_rings[j]); 1946 - if (tx_ring && tx_ring->ring_stats) { 1947 - data[i++] = tx_ring->ring_stats->stats.pkts; 1948 - data[i++] = tx_ring->ring_stats->stats.bytes; 1949 - } else { 1948 + if (!tx_ring || !tx_ring->ring_stats) { 1950 1949 data[i++] = 0; 1951 1950 data[i++] = 0; 1951 + continue; 1952 1952 } 1953 + 1954 + ice_fetch_tx_ring_stats(tx_ring, &pkts, &bytes); 1955 + 1956 + data[i++] = pkts; 1957 + data[i++] = bytes; 1953 1958 } 1954 1959 1955 1960 ice_for_each_alloc_rxq(vsi, j) { 1961 + u64 pkts, bytes; 1962 + 1956 1963 rx_ring = READ_ONCE(vsi->rx_rings[j]); 1957 - if (rx_ring && rx_ring->ring_stats) { 1958 - data[i++] = rx_ring->ring_stats->stats.pkts; 1959 - data[i++] = rx_ring->ring_stats->stats.bytes; 1960 - } else { 1964 + if (!rx_ring || !rx_ring->ring_stats) { 1961 1965 data[i++] = 0; 1962 1966 data[i++] = 0; 1967 + continue; 1963 1968 } 1969 + 1970 + ice_fetch_rx_ring_stats(rx_ring, &pkts, &bytes); 1971 + 1972 + data[i++] = pkts; 1973 + data[i++] = bytes; 1964 1974 } 1965 1975 1966 1976 rcu_read_unlock(); ··· 3388 3378 */ 3389 3379 rx_rings[i].next_to_use = 0; 3390 3380 rx_rings[i].next_to_clean = 0; 3391 - rx_rings[i].next_to_alloc = 0; 3392 3381 *vsi->rx_rings[i] = rx_rings[i]; 3393 3382 } 3394 3383 kfree(rx_rings);
+40 -16
drivers/net/ethernet/intel/ice/ice_lib.c
··· 3433 3433 } 3434 3434 3435 3435 /** 3436 - * ice_update_ring_stats - Update ring statistics 3437 - * @stats: stats to be updated 3438 - * @pkts: number of processed packets 3439 - * @bytes: number of processed bytes 3440 - * 3441 - * This function assumes that caller has acquired a u64_stats_sync lock. 3442 - */ 3443 - static void ice_update_ring_stats(struct ice_q_stats *stats, u64 pkts, u64 bytes) 3444 - { 3445 - stats->bytes += bytes; 3446 - stats->pkts += pkts; 3447 - } 3448 - 3449 - /** 3450 3436 * ice_update_tx_ring_stats - Update Tx ring specific counters 3451 3437 * @tx_ring: ring to update 3452 3438 * @pkts: number of processed packets ··· 3441 3455 void ice_update_tx_ring_stats(struct ice_tx_ring *tx_ring, u64 pkts, u64 bytes) 3442 3456 { 3443 3457 u64_stats_update_begin(&tx_ring->ring_stats->syncp); 3444 - ice_update_ring_stats(&tx_ring->ring_stats->stats, pkts, bytes); 3458 + u64_stats_add(&tx_ring->ring_stats->pkts, pkts); 3459 + u64_stats_add(&tx_ring->ring_stats->bytes, bytes); 3445 3460 u64_stats_update_end(&tx_ring->ring_stats->syncp); 3446 3461 } 3447 3462 ··· 3455 3468 void ice_update_rx_ring_stats(struct ice_rx_ring *rx_ring, u64 pkts, u64 bytes) 3456 3469 { 3457 3470 u64_stats_update_begin(&rx_ring->ring_stats->syncp); 3458 - ice_update_ring_stats(&rx_ring->ring_stats->stats, pkts, bytes); 3471 + u64_stats_add(&rx_ring->ring_stats->pkts, pkts); 3472 + u64_stats_add(&rx_ring->ring_stats->bytes, bytes); 3459 3473 u64_stats_update_end(&rx_ring->ring_stats->syncp); 3474 + } 3475 + 3476 + /** 3477 + * ice_fetch_tx_ring_stats - Fetch Tx ring packet and byte counters 3478 + * @ring: ring to update 3479 + * @pkts: number of processed packets 3480 + * @bytes: number of processed bytes 3481 + */ 3482 + void ice_fetch_tx_ring_stats(const struct ice_tx_ring *ring, 3483 + u64 *pkts, u64 *bytes) 3484 + { 3485 + unsigned int start; 3486 + 3487 + do { 3488 + start = u64_stats_fetch_begin(&ring->ring_stats->syncp); 3489 + *pkts = u64_stats_read(&ring->ring_stats->pkts); 3490 + *bytes = u64_stats_read(&ring->ring_stats->bytes); 3491 + } while (u64_stats_fetch_retry(&ring->ring_stats->syncp, start)); 3492 + } 3493 + 3494 + /** 3495 + * ice_fetch_rx_ring_stats - Fetch Rx ring packet and byte counters 3496 + * @ring: ring to read 3497 + * @pkts: number of processed packets 3498 + * @bytes: number of processed bytes 3499 + */ 3500 + void ice_fetch_rx_ring_stats(const struct ice_rx_ring *ring, 3501 + u64 *pkts, u64 *bytes) 3502 + { 3503 + unsigned int start; 3504 + 3505 + do { 3506 + start = u64_stats_fetch_begin(&ring->ring_stats->syncp); 3507 + *pkts = u64_stats_read(&ring->ring_stats->pkts); 3508 + *bytes = u64_stats_read(&ring->ring_stats->bytes); 3509 + } while (u64_stats_fetch_retry(&ring->ring_stats->syncp, start)); 3460 3510 } 3461 3511 3462 3512 /**
+6
drivers/net/ethernet/intel/ice/ice_lib.h
··· 92 92 93 93 void ice_update_rx_ring_stats(struct ice_rx_ring *ring, u64 pkts, u64 bytes); 94 94 95 + void ice_fetch_tx_ring_stats(const struct ice_tx_ring *ring, 96 + u64 *pkts, u64 *bytes); 97 + 98 + void ice_fetch_rx_ring_stats(const struct ice_rx_ring *ring, 99 + u64 *pkts, u64 *bytes); 100 + 95 101 void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl); 96 102 void ice_write_itr(struct ice_ring_container *rc, u16 itr); 97 103 void ice_set_q_vector_intrl(struct ice_q_vector *q_vector);
+126 -70
drivers/net/ethernet/intel/ice/ice_main.c
··· 159 159 * prev_pkt would be negative if there was no 160 160 * pending work. 161 161 */ 162 - packets = ring_stats->stats.pkts & INT_MAX; 163 - if (ring_stats->tx_stats.prev_pkt == packets) { 162 + packets = ice_stats_read(ring_stats, pkts) & INT_MAX; 163 + if (ring_stats->tx.prev_pkt == packets) { 164 164 /* Trigger sw interrupt to revive the queue */ 165 165 ice_trigger_sw_intr(hw, tx_ring->q_vector); 166 166 continue; ··· 170 170 * to ice_get_tx_pending() 171 171 */ 172 172 smp_rmb(); 173 - ring_stats->tx_stats.prev_pkt = 173 + ring_stats->tx.prev_pkt = 174 174 ice_get_tx_pending(tx_ring) ? packets : -1; 175 175 } 176 176 } ··· 6822 6822 return err; 6823 6823 } 6824 6824 6825 + struct ice_vsi_tx_stats { 6826 + u64 pkts; 6827 + u64 bytes; 6828 + u64 tx_restart_q; 6829 + u64 tx_busy; 6830 + u64 tx_linearize; 6831 + }; 6832 + 6833 + struct ice_vsi_rx_stats { 6834 + u64 pkts; 6835 + u64 bytes; 6836 + u64 rx_non_eop_descs; 6837 + u64 rx_page_failed; 6838 + u64 rx_buf_failed; 6839 + }; 6840 + 6825 6841 /** 6826 - * ice_fetch_u64_stats_per_ring - get packets and bytes stats per ring 6827 - * @syncp: pointer to u64_stats_sync 6828 - * @stats: stats that pkts and bytes count will be taken from 6829 - * @pkts: packets stats counter 6830 - * @bytes: bytes stats counter 6842 + * ice_fetch_u64_tx_stats - get Tx stats from a ring 6843 + * @ring: the Tx ring to copy stats from 6844 + * @copy: temporary storage for the ring statistics 6831 6845 * 6832 - * This function fetches stats from the ring considering the atomic operations 6833 - * that needs to be performed to read u64 values in 32 bit machine. 6846 + * Fetch the u64 stats from the ring using u64_stats_fetch. This ensures each 6847 + * stat value is self-consistent, though not necessarily consistent w.r.t 6848 + * other stats. 6834 6849 */ 6835 - void 6836 - ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp, 6837 - struct ice_q_stats stats, u64 *pkts, u64 *bytes) 6850 + static void ice_fetch_u64_tx_stats(struct ice_tx_ring *ring, 6851 + struct ice_vsi_tx_stats *copy) 6838 6852 { 6853 + struct ice_ring_stats *stats = ring->ring_stats; 6839 6854 unsigned int start; 6840 6855 6841 6856 do { 6842 - start = u64_stats_fetch_begin(syncp); 6843 - *pkts = stats.pkts; 6844 - *bytes = stats.bytes; 6845 - } while (u64_stats_fetch_retry(syncp, start)); 6857 + start = u64_stats_fetch_begin(&stats->syncp); 6858 + copy->pkts = u64_stats_read(&stats->pkts); 6859 + copy->bytes = u64_stats_read(&stats->bytes); 6860 + copy->tx_restart_q = u64_stats_read(&stats->tx_restart_q); 6861 + copy->tx_busy = u64_stats_read(&stats->tx_busy); 6862 + copy->tx_linearize = u64_stats_read(&stats->tx_linearize); 6863 + } while (u64_stats_fetch_retry(&stats->syncp, start)); 6864 + } 6865 + 6866 + /** 6867 + * ice_fetch_u64_rx_stats - get Rx stats from a ring 6868 + * @ring: the Rx ring to copy stats from 6869 + * @copy: temporary storage for the ring statistics 6870 + * 6871 + * Fetch the u64 stats from the ring using u64_stats_fetch. This ensures each 6872 + * stat value is self-consistent, though not necessarily consistent w.r.t 6873 + * other stats. 6874 + */ 6875 + static void ice_fetch_u64_rx_stats(struct ice_rx_ring *ring, 6876 + struct ice_vsi_rx_stats *copy) 6877 + { 6878 + struct ice_ring_stats *stats = ring->ring_stats; 6879 + unsigned int start; 6880 + 6881 + do { 6882 + start = u64_stats_fetch_begin(&stats->syncp); 6883 + copy->pkts = u64_stats_read(&stats->pkts); 6884 + copy->bytes = u64_stats_read(&stats->bytes); 6885 + copy->rx_non_eop_descs = 6886 + u64_stats_read(&stats->rx_non_eop_descs); 6887 + copy->rx_page_failed = u64_stats_read(&stats->rx_page_failed); 6888 + copy->rx_buf_failed = u64_stats_read(&stats->rx_buf_failed); 6889 + } while (u64_stats_fetch_retry(&stats->syncp, start)); 6846 6890 } 6847 6891 6848 6892 /** 6849 6893 * ice_update_vsi_tx_ring_stats - Update VSI Tx ring stats counters 6850 6894 * @vsi: the VSI to be updated 6851 - * @vsi_stats: the stats struct to be updated 6895 + * @vsi_stats: accumulated stats for this VSI 6852 6896 * @rings: rings to work on 6853 6897 * @count: number of rings 6854 6898 */ 6855 - static void 6856 - ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, 6857 - struct rtnl_link_stats64 *vsi_stats, 6858 - struct ice_tx_ring **rings, u16 count) 6899 + static void ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, 6900 + struct ice_vsi_tx_stats *vsi_stats, 6901 + struct ice_tx_ring **rings, u16 count) 6859 6902 { 6903 + struct ice_vsi_tx_stats copy = {}; 6860 6904 u16 i; 6861 6905 6862 6906 for (i = 0; i < count; i++) { 6863 6907 struct ice_tx_ring *ring; 6864 - u64 pkts = 0, bytes = 0; 6865 6908 6866 6909 ring = READ_ONCE(rings[i]); 6867 6910 if (!ring || !ring->ring_stats) 6868 6911 continue; 6869 - ice_fetch_u64_stats_per_ring(&ring->ring_stats->syncp, 6870 - ring->ring_stats->stats, &pkts, 6871 - &bytes); 6872 - vsi_stats->tx_packets += pkts; 6873 - vsi_stats->tx_bytes += bytes; 6874 - vsi->tx_restart += ring->ring_stats->tx_stats.restart_q; 6875 - vsi->tx_busy += ring->ring_stats->tx_stats.tx_busy; 6876 - vsi->tx_linearize += ring->ring_stats->tx_stats.tx_linearize; 6912 + 6913 + ice_fetch_u64_tx_stats(ring, &copy); 6914 + 6915 + vsi_stats->pkts += copy.pkts; 6916 + vsi_stats->bytes += copy.bytes; 6917 + vsi_stats->tx_restart_q += copy.tx_restart_q; 6918 + vsi_stats->tx_busy += copy.tx_busy; 6919 + vsi_stats->tx_linearize += copy.tx_linearize; 6920 + } 6921 + } 6922 + 6923 + /** 6924 + * ice_update_vsi_rx_ring_stats - Update VSI Rx ring stats counters 6925 + * @vsi: the VSI to be updated 6926 + * @vsi_stats: accumulated stats for this VSI 6927 + * @rings: rings to work on 6928 + * @count: number of rings 6929 + */ 6930 + static void ice_update_vsi_rx_ring_stats(struct ice_vsi *vsi, 6931 + struct ice_vsi_rx_stats *vsi_stats, 6932 + struct ice_rx_ring **rings, u16 count) 6933 + { 6934 + struct ice_vsi_rx_stats copy = {}; 6935 + u16 i; 6936 + 6937 + for (i = 0; i < count; i++) { 6938 + struct ice_rx_ring *ring; 6939 + 6940 + ring = READ_ONCE(rings[i]); 6941 + if (!ring || !ring->ring_stats) 6942 + continue; 6943 + 6944 + ice_fetch_u64_rx_stats(ring, &copy); 6945 + 6946 + vsi_stats->pkts += copy.pkts; 6947 + vsi_stats->bytes += copy.bytes; 6948 + vsi_stats->rx_non_eop_descs += copy.rx_non_eop_descs; 6949 + vsi_stats->rx_page_failed += copy.rx_page_failed; 6950 + vsi_stats->rx_buf_failed += copy.rx_buf_failed; 6877 6951 } 6878 6952 } 6879 6953 ··· 6958 6884 static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) 6959 6885 { 6960 6886 struct rtnl_link_stats64 *net_stats, *stats_prev; 6961 - struct rtnl_link_stats64 *vsi_stats; 6887 + struct ice_vsi_tx_stats tx_stats = {}; 6888 + struct ice_vsi_rx_stats rx_stats = {}; 6962 6889 struct ice_pf *pf = vsi->back; 6963 - u64 pkts, bytes; 6964 - int i; 6965 - 6966 - vsi_stats = kzalloc(sizeof(*vsi_stats), GFP_ATOMIC); 6967 - if (!vsi_stats) 6968 - return; 6969 - 6970 - /* reset non-netdev (extended) stats */ 6971 - vsi->tx_restart = 0; 6972 - vsi->tx_busy = 0; 6973 - vsi->tx_linearize = 0; 6974 - vsi->rx_buf_failed = 0; 6975 - vsi->rx_page_failed = 0; 6976 6890 6977 6891 rcu_read_lock(); 6978 6892 6979 6893 /* update Tx rings counters */ 6980 - ice_update_vsi_tx_ring_stats(vsi, vsi_stats, vsi->tx_rings, 6894 + ice_update_vsi_tx_ring_stats(vsi, &tx_stats, vsi->tx_rings, 6981 6895 vsi->num_txq); 6982 6896 6983 6897 /* update Rx rings counters */ 6984 - ice_for_each_rxq(vsi, i) { 6985 - struct ice_rx_ring *ring = READ_ONCE(vsi->rx_rings[i]); 6986 - struct ice_ring_stats *ring_stats; 6987 - 6988 - ring_stats = ring->ring_stats; 6989 - ice_fetch_u64_stats_per_ring(&ring_stats->syncp, 6990 - ring_stats->stats, &pkts, 6991 - &bytes); 6992 - vsi_stats->rx_packets += pkts; 6993 - vsi_stats->rx_bytes += bytes; 6994 - vsi->rx_buf_failed += ring_stats->rx_stats.alloc_buf_failed; 6995 - vsi->rx_page_failed += ring_stats->rx_stats.alloc_page_failed; 6996 - } 6898 + ice_update_vsi_rx_ring_stats(vsi, &rx_stats, vsi->rx_rings, 6899 + vsi->num_rxq); 6997 6900 6998 6901 /* update XDP Tx rings counters */ 6999 6902 if (ice_is_xdp_ena_vsi(vsi)) 7000 - ice_update_vsi_tx_ring_stats(vsi, vsi_stats, vsi->xdp_rings, 6903 + ice_update_vsi_tx_ring_stats(vsi, &tx_stats, vsi->xdp_rings, 7001 6904 vsi->num_xdp_txq); 7002 6905 7003 6906 rcu_read_unlock(); 6907 + 6908 + /* Save non-netdev (extended) stats */ 6909 + vsi->tx_restart = tx_stats.tx_restart_q; 6910 + vsi->tx_busy = tx_stats.tx_busy; 6911 + vsi->tx_linearize = tx_stats.tx_linearize; 6912 + vsi->rx_buf_failed = rx_stats.rx_buf_failed; 6913 + vsi->rx_page_failed = rx_stats.rx_page_failed; 7004 6914 7005 6915 net_stats = &vsi->net_stats; 7006 6916 stats_prev = &vsi->net_stats_prev; ··· 6995 6937 * let's skip this round. 6996 6938 */ 6997 6939 if (likely(pf->stat_prev_loaded)) { 6998 - net_stats->tx_packets += vsi_stats->tx_packets - stats_prev->tx_packets; 6999 - net_stats->tx_bytes += vsi_stats->tx_bytes - stats_prev->tx_bytes; 7000 - net_stats->rx_packets += vsi_stats->rx_packets - stats_prev->rx_packets; 7001 - net_stats->rx_bytes += vsi_stats->rx_bytes - stats_prev->rx_bytes; 6940 + net_stats->tx_packets += tx_stats.pkts - stats_prev->tx_packets; 6941 + net_stats->tx_bytes += tx_stats.bytes - stats_prev->tx_bytes; 6942 + net_stats->rx_packets += rx_stats.pkts - stats_prev->rx_packets; 6943 + net_stats->rx_bytes += rx_stats.bytes - stats_prev->rx_bytes; 7002 6944 } 7003 6945 7004 - stats_prev->tx_packets = vsi_stats->tx_packets; 7005 - stats_prev->tx_bytes = vsi_stats->tx_bytes; 7006 - stats_prev->rx_packets = vsi_stats->rx_packets; 7007 - stats_prev->rx_bytes = vsi_stats->rx_bytes; 7008 - 7009 - kfree(vsi_stats); 6946 + stats_prev->tx_packets = tx_stats.pkts; 6947 + stats_prev->tx_bytes = tx_stats.bytes; 6948 + stats_prev->rx_packets = rx_stats.pkts; 6949 + stats_prev->rx_bytes = rx_stats.bytes; 7010 6950 } 7011 6951 7012 6952 /**
+23 -23
drivers/net/ethernet/intel/ice/ice_txrx.c
··· 379 379 if (netif_tx_queue_stopped(txring_txq(tx_ring)) && 380 380 !test_bit(ICE_VSI_DOWN, vsi->state)) { 381 381 netif_tx_wake_queue(txring_txq(tx_ring)); 382 - ++tx_ring->ring_stats->tx_stats.restart_q; 382 + ice_stats_inc(tx_ring->ring_stats, tx_restart_q); 383 383 } 384 384 } 385 385 ··· 499 499 500 500 tx_ring->next_to_use = 0; 501 501 tx_ring->next_to_clean = 0; 502 - tx_ring->ring_stats->tx_stats.prev_pkt = -1; 502 + tx_ring->ring_stats->tx.prev_pkt = -1; 503 503 return 0; 504 504 505 505 err: ··· 574 574 PAGE_SIZE); 575 575 memset(rx_ring->desc, 0, size); 576 576 577 - rx_ring->next_to_alloc = 0; 578 577 rx_ring->next_to_clean = 0; 579 578 rx_ring->next_to_use = 0; 580 579 } ··· 848 849 849 850 addr = libeth_rx_alloc(&fq, ntu); 850 851 if (addr == DMA_MAPPING_ERROR) { 851 - rx_ring->ring_stats->rx_stats.alloc_page_failed++; 852 + ice_stats_inc(rx_ring->ring_stats, rx_page_failed); 852 853 break; 853 854 } 854 855 ··· 862 863 863 864 addr = libeth_rx_alloc(&hdr_fq, ntu); 864 865 if (addr == DMA_MAPPING_ERROR) { 865 - rx_ring->ring_stats->rx_stats.alloc_page_failed++; 866 + ice_stats_inc(rx_ring->ring_stats, rx_page_failed); 866 867 867 868 libeth_rx_recycle_slow(fq.fqes[ntu].netmem); 868 869 break; ··· 1044 1045 /* exit if we failed to retrieve a buffer */ 1045 1046 if (!skb) { 1046 1047 libeth_xdp_return_buff_slow(xdp); 1047 - rx_ring->ring_stats->rx_stats.alloc_buf_failed++; 1048 + ice_stats_inc(rx_ring->ring_stats, rx_buf_failed); 1048 1049 continue; 1049 1050 } 1050 1051 ··· 1086 1087 struct dim_sample *sample, 1087 1088 bool is_tx) 1088 1089 { 1089 - u64 packets = 0, bytes = 0; 1090 + u64 total_packets = 0, total_bytes = 0, pkts, bytes; 1090 1091 1091 1092 if (is_tx) { 1092 1093 struct ice_tx_ring *tx_ring; 1093 1094 1094 1095 ice_for_each_tx_ring(tx_ring, *rc) { 1095 - struct ice_ring_stats *ring_stats; 1096 - 1097 - ring_stats = tx_ring->ring_stats; 1098 - if (!ring_stats) 1096 + if (!tx_ring->ring_stats) 1099 1097 continue; 1100 - packets += ring_stats->stats.pkts; 1101 - bytes += ring_stats->stats.bytes; 1098 + 1099 + ice_fetch_tx_ring_stats(tx_ring, &pkts, &bytes); 1100 + 1101 + total_packets += pkts; 1102 + total_bytes += bytes; 1102 1103 } 1103 1104 } else { 1104 1105 struct ice_rx_ring *rx_ring; 1105 1106 1106 1107 ice_for_each_rx_ring(rx_ring, *rc) { 1107 - struct ice_ring_stats *ring_stats; 1108 - 1109 - ring_stats = rx_ring->ring_stats; 1110 - if (!ring_stats) 1108 + if (!rx_ring->ring_stats) 1111 1109 continue; 1112 - packets += ring_stats->stats.pkts; 1113 - bytes += ring_stats->stats.bytes; 1110 + 1111 + ice_fetch_rx_ring_stats(rx_ring, &pkts, &bytes); 1112 + 1113 + total_packets += pkts; 1114 + total_bytes += bytes; 1114 1115 } 1115 1116 } 1116 1117 1117 - dim_update_sample(q_vector->total_events, packets, bytes, sample); 1118 + dim_update_sample(q_vector->total_events, 1119 + total_packets, total_bytes, sample); 1118 1120 sample->comp_ctr = 0; 1119 1121 1120 1122 /* if dim settings get stale, like when not updated for 1 ··· 1362 1362 1363 1363 /* A reprieve! - use start_queue because it doesn't call schedule */ 1364 1364 netif_tx_start_queue(txring_txq(tx_ring)); 1365 - ++tx_ring->ring_stats->tx_stats.restart_q; 1365 + ice_stats_inc(tx_ring->ring_stats, tx_restart_q); 1366 1366 return 0; 1367 1367 } 1368 1368 ··· 2164 2164 if (__skb_linearize(skb)) 2165 2165 goto out_drop; 2166 2166 count = ice_txd_use_count(skb->len); 2167 - tx_ring->ring_stats->tx_stats.tx_linearize++; 2167 + ice_stats_inc(tx_ring->ring_stats, tx_linearize); 2168 2168 } 2169 2169 2170 2170 /* need: 1 descriptor per page * PAGE_SIZE/ICE_MAX_DATA_PER_TXD, ··· 2175 2175 */ 2176 2176 if (ice_maybe_stop_tx(tx_ring, count + ICE_DESCS_PER_CACHE_LINE + 2177 2177 ICE_DESCS_FOR_CTX_DESC)) { 2178 - tx_ring->ring_stats->tx_stats.tx_busy++; 2178 + ice_stats_inc(tx_ring->ring_stats, tx_busy); 2179 2179 return NETDEV_TX_BUSY; 2180 2180 } 2181 2181
+126 -77
drivers/net/ethernet/intel/ice/ice_txrx.h
··· 129 129 u8 header_len; 130 130 }; 131 131 132 - struct ice_q_stats { 133 - u64 pkts; 134 - u64 bytes; 135 - }; 136 - 137 - struct ice_txq_stats { 138 - u64 restart_q; 139 - u64 tx_busy; 140 - u64 tx_linearize; 141 - int prev_pkt; /* negative if no pending Tx descriptors */ 142 - }; 143 - 144 - struct ice_rxq_stats { 145 - u64 non_eop_descs; 146 - u64 alloc_page_failed; 147 - u64 alloc_buf_failed; 148 - }; 149 - 150 132 struct ice_ring_stats { 151 133 struct rcu_head rcu; /* to avoid race on free */ 152 - struct ice_q_stats stats; 153 134 struct u64_stats_sync syncp; 154 - union { 155 - struct ice_txq_stats tx_stats; 156 - struct ice_rxq_stats rx_stats; 157 - }; 135 + struct_group(stats, 136 + u64_stats_t pkts; 137 + u64_stats_t bytes; 138 + union { 139 + struct_group(tx, 140 + u64_stats_t tx_restart_q; 141 + u64_stats_t tx_busy; 142 + u64_stats_t tx_linearize; 143 + /* negative if no pending Tx descriptors */ 144 + int prev_pkt; 145 + ); 146 + struct_group(rx, 147 + u64_stats_t rx_non_eop_descs; 148 + u64_stats_t rx_page_failed; 149 + u64_stats_t rx_buf_failed; 150 + ); 151 + }; 152 + ); 158 153 }; 154 + 155 + /** 156 + * ice_stats_read - Read a single ring stat value 157 + * @stats: pointer to ring_stats structure for a queue 158 + * @member: the ice_ring_stats member to read 159 + * 160 + * Shorthand for reading a single 64-bit stat value from struct 161 + * ice_ring_stats. 162 + * 163 + * Return: the value of the requested stat. 164 + */ 165 + #define ice_stats_read(stats, member) ({ \ 166 + struct ice_ring_stats *__stats = (stats); \ 167 + unsigned int start; \ 168 + u64 val; \ 169 + do { \ 170 + start = u64_stats_fetch_begin(&__stats->syncp); \ 171 + val = u64_stats_read(&__stats->member); \ 172 + } while (u64_stats_fetch_retry(&__stats->syncp, start)); \ 173 + val; \ 174 + }) 175 + 176 + /** 177 + * ice_stats_inc - Increment a single ring stat value 178 + * @stats: pointer to the ring_stats structure for a queue 179 + * @member: the ice_ring_stats member to increment 180 + * 181 + * Shorthand for incrementing a single 64-bit stat value in struct 182 + * ice_ring_stats. 183 + */ 184 + #define ice_stats_inc(stats, member) do { \ 185 + struct ice_ring_stats *__stats = (stats); \ 186 + u64_stats_update_begin(&__stats->syncp); \ 187 + u64_stats_inc(&__stats->member); \ 188 + u64_stats_update_end(&__stats->syncp); \ 189 + } while (0) 159 190 160 191 enum ice_ring_state_t { 161 192 ICE_TX_XPS_INIT_DONE, ··· 267 236 } ____cacheline_internodealigned_in_smp; 268 237 269 238 struct ice_rx_ring { 270 - /* CL1 - 1st cacheline starts here */ 239 + __cacheline_group_begin_aligned(read_mostly); 271 240 void *desc; /* Descriptor ring memory */ 272 241 struct page_pool *pp; 273 242 struct net_device *netdev; /* netdev ring maps to */ 274 - struct ice_vsi *vsi; /* Backreference to associated VSI */ 275 243 struct ice_q_vector *q_vector; /* Backreference to associated vector */ 276 244 u8 __iomem *tail; 277 - u16 q_index; /* Queue number of ring */ 278 - 279 - u16 count; /* Number of descriptors */ 280 - u16 reg_idx; /* HW register index of the ring */ 281 - u16 next_to_alloc; 282 245 283 246 union { 284 247 struct libeth_fqe *rx_fqes; 285 248 struct xdp_buff **xdp_buf; 286 249 }; 287 250 288 - /* CL2 - 2nd cacheline starts here */ 289 - struct libeth_fqe *hdr_fqes; 290 - struct page_pool *hdr_pp; 251 + u16 count; /* Number of descriptors */ 252 + u8 ptp_rx; 291 253 254 + u8 flags; 255 + #define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2) 256 + #define ICE_RX_FLAGS_MULTIDEV BIT(3) 257 + #define ICE_RX_FLAGS_RING_GCS BIT(4) 258 + 259 + u32 truesize; 260 + 261 + struct page_pool *hdr_pp; 262 + struct libeth_fqe *hdr_fqes; 263 + 264 + struct bpf_prog *xdp_prog; 265 + struct ice_tx_ring *xdp_ring; 266 + struct xsk_buff_pool *xsk_pool; 267 + 268 + /* stats structs */ 269 + struct ice_ring_stats *ring_stats; 270 + struct ice_rx_ring *next; /* pointer to next ring in q_vector */ 271 + 272 + u32 hdr_truesize; 273 + 274 + struct xdp_rxq_info xdp_rxq; 275 + __cacheline_group_end_aligned(read_mostly); 276 + 277 + __cacheline_group_begin_aligned(read_write); 292 278 union { 293 279 struct libeth_xdp_buff_stash xdp; 294 280 struct libeth_xdp_buff *xsk; 295 281 }; 296 - 297 - /* CL3 - 3rd cacheline starts here */ 298 282 union { 299 283 struct ice_pkt_ctx pkt_ctx; 300 284 struct { ··· 317 271 __be16 vlan_proto; 318 272 }; 319 273 }; 320 - struct bpf_prog *xdp_prog; 321 274 322 275 /* used in interrupt processing */ 323 276 u16 next_to_use; 324 277 u16 next_to_clean; 278 + __cacheline_group_end_aligned(read_write); 325 279 326 - u32 hdr_truesize; 327 - u32 truesize; 328 - 329 - /* stats structs */ 330 - struct ice_ring_stats *ring_stats; 331 - 280 + __cacheline_group_begin_aligned(cold); 332 281 struct rcu_head rcu; /* to avoid race on free */ 333 - /* CL4 - 4th cacheline starts here */ 282 + struct ice_vsi *vsi; /* Backreference to associated VSI */ 334 283 struct ice_channel *ch; 335 - struct ice_tx_ring *xdp_ring; 336 - struct ice_rx_ring *next; /* pointer to next ring in q_vector */ 337 - struct xsk_buff_pool *xsk_pool; 284 + 285 + dma_addr_t dma; /* physical address of ring */ 286 + u16 q_index; /* Queue number of ring */ 287 + u16 reg_idx; /* HW register index of the ring */ 288 + u8 dcb_tc; /* Traffic class of ring */ 289 + 338 290 u16 rx_hdr_len; 339 291 u16 rx_buf_len; 340 - dma_addr_t dma; /* physical address of ring */ 341 - u8 dcb_tc; /* Traffic class of ring */ 342 - u8 ptp_rx; 343 - #define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2) 344 - #define ICE_RX_FLAGS_MULTIDEV BIT(3) 345 - #define ICE_RX_FLAGS_RING_GCS BIT(4) 346 - u8 flags; 347 - /* CL5 - 5th cacheline starts here */ 348 - struct xdp_rxq_info xdp_rxq; 292 + __cacheline_group_end_aligned(cold); 349 293 } ____cacheline_internodealigned_in_smp; 350 294 351 295 struct ice_tx_ring { 352 - /* CL1 - 1st cacheline starts here */ 353 - struct ice_tx_ring *next; /* pointer to next ring in q_vector */ 296 + __cacheline_group_begin_aligned(read_mostly); 354 297 void *desc; /* Descriptor ring memory */ 355 298 struct device *dev; /* Used for DMA mapping */ 356 299 u8 __iomem *tail; 357 300 struct ice_tx_buf *tx_buf; 301 + 358 302 struct ice_q_vector *q_vector; /* Backreference to associated vector */ 359 303 struct net_device *netdev; /* netdev ring maps to */ 360 304 struct ice_vsi *vsi; /* Backreference to associated VSI */ 361 - /* CL2 - 2nd cacheline starts here */ 362 - dma_addr_t dma; /* physical address of ring */ 363 - struct xsk_buff_pool *xsk_pool; 364 - u16 next_to_use; 365 - u16 next_to_clean; 366 - u16 q_handle; /* Queue handle per TC */ 367 - u16 reg_idx; /* HW register index of the ring */ 305 + 368 306 u16 count; /* Number of descriptors */ 369 307 u16 q_index; /* Queue number of ring */ 370 - u16 xdp_tx_active; 371 - /* stats structs */ 372 - struct ice_ring_stats *ring_stats; 373 - /* CL3 - 3rd cacheline starts here */ 374 - struct rcu_head rcu; /* to avoid race on free */ 375 - DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */ 376 - struct ice_channel *ch; 377 - struct ice_ptp_tx *tx_tstamps; 378 - spinlock_t tx_lock; 379 - u32 txq_teid; /* Added Tx queue TEID */ 380 - /* CL4 - 4th cacheline starts here */ 381 - struct ice_tstamp_ring *tstamp_ring; 308 + 309 + u8 flags; 382 310 #define ICE_TX_FLAGS_RING_XDP BIT(0) 383 311 #define ICE_TX_FLAGS_RING_VLAN_L2TAG1 BIT(1) 384 312 #define ICE_TX_FLAGS_RING_VLAN_L2TAG2 BIT(2) 385 313 #define ICE_TX_FLAGS_TXTIME BIT(3) 386 - u8 flags; 314 + 315 + struct xsk_buff_pool *xsk_pool; 316 + 317 + /* stats structs */ 318 + struct ice_ring_stats *ring_stats; 319 + struct ice_tx_ring *next; /* pointer to next ring in q_vector */ 320 + 321 + struct ice_tstamp_ring *tstamp_ring; 322 + struct ice_ptp_tx *tx_tstamps; 323 + __cacheline_group_end_aligned(read_mostly); 324 + 325 + __cacheline_group_begin_aligned(read_write); 326 + u16 next_to_use; 327 + u16 next_to_clean; 328 + 329 + u16 xdp_tx_active; 330 + spinlock_t tx_lock; 331 + __cacheline_group_end_aligned(read_write); 332 + 333 + __cacheline_group_begin_aligned(cold); 334 + struct rcu_head rcu; /* to avoid race on free */ 335 + DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */ 336 + struct ice_channel *ch; 337 + 338 + dma_addr_t dma; /* physical address of ring */ 339 + u16 q_handle; /* Queue handle per TC */ 340 + u16 reg_idx; /* HW register index of the ring */ 387 341 u8 dcb_tc; /* Traffic class of ring */ 342 + 388 343 u16 quanta_prof_id; 344 + u32 txq_teid; /* Added Tx queue TEID */ 345 + __cacheline_group_end_aligned(cold); 389 346 } ____cacheline_internodealigned_in_smp; 390 347 391 348 static inline bool ice_ring_ch_enabled(struct ice_tx_ring *ring)
+1 -4
drivers/net/ethernet/intel/ice/ice_txrx_lib.c
··· 20 20 21 21 rx_ring->next_to_use = val; 22 22 23 - /* update next to alloc since we have filled the ring */ 24 - rx_ring->next_to_alloc = val; 25 - 26 23 /* QRX_TAIL will be updated with any tail value, but hardware ignores 27 24 * the lower 3 bits. This makes it so we only bump tail on meaningful 28 25 * boundaries. Also, this allows us to bump tail on intervals of 8 up to ··· 477 480 return ICE_XDP_CONSUMED; 478 481 479 482 busy: 480 - xdp_ring->ring_stats->tx_stats.tx_busy++; 483 + ice_stats_inc(xdp_ring->ring_stats, tx_busy); 481 484 482 485 return ICE_XDP_CONSUMED; 483 486 }
+1 -1
drivers/net/ethernet/intel/ice/ice_txrx_lib.h
··· 38 38 if (likely(ice_test_staterr(rx_desc->wb.status_error0, ICE_RXD_EOF))) 39 39 return false; 40 40 41 - rx_ring->ring_stats->rx_stats.non_eop_descs++; 41 + ice_stats_inc(rx_ring->ring_stats, rx_non_eop_descs); 42 42 43 43 return true; 44 44 }
+2 -2
drivers/net/ethernet/intel/ice/ice_xsk.c
··· 497 497 return ICE_XDP_TX; 498 498 499 499 busy: 500 - xdp_ring->ring_stats->tx_stats.tx_busy++; 500 + ice_stats_inc(xdp_ring->ring_stats, tx_busy); 501 501 502 502 return ICE_XDP_CONSUMED; 503 503 } ··· 659 659 xsk_buff_free(first); 660 660 first = NULL; 661 661 662 - rx_ring->ring_stats->rx_stats.alloc_buf_failed++; 662 + ice_stats_inc(rx_ring->ring_stats, rx_buf_failed); 663 663 continue; 664 664 } 665 665
+31
drivers/net/ethernet/intel/idpf/xdp.c
··· 2 2 /* Copyright (C) 2025 Intel Corporation */ 3 3 4 4 #include "idpf.h" 5 + #include "idpf_ptp.h" 5 6 #include "idpf_virtchnl.h" 6 7 #include "xdp.h" 7 8 #include "xsk.h" ··· 399 398 pt); 400 399 } 401 400 401 + static int idpf_xdpmo_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) 402 + { 403 + const struct libeth_xdp_buff *xdp = (typeof(xdp))ctx; 404 + struct idpf_xdp_rx_desc desc __uninitialized; 405 + const struct idpf_rx_queue *rxq; 406 + u64 cached_time, ts_ns; 407 + u32 ts_high; 408 + 409 + rxq = libeth_xdp_buff_to_rq(xdp, typeof(*rxq), xdp_rxq); 410 + 411 + if (!idpf_queue_has(PTP, rxq)) 412 + return -ENODATA; 413 + 414 + idpf_xdp_get_qw1(&desc, xdp->desc); 415 + 416 + if (!(idpf_xdp_rx_ts_low(&desc) & VIRTCHNL2_RX_FLEX_TSTAMP_VALID)) 417 + return -ENODATA; 418 + 419 + cached_time = READ_ONCE(rxq->cached_phc_time); 420 + 421 + idpf_xdp_get_qw3(&desc, xdp->desc); 422 + 423 + ts_high = idpf_xdp_rx_ts_high(&desc); 424 + ts_ns = idpf_ptp_tstamp_extend_32b_to_64b(cached_time, ts_high); 425 + 426 + *timestamp = ts_ns; 427 + return 0; 428 + } 429 + 402 430 static const struct xdp_metadata_ops idpf_xdpmo = { 403 431 .xmo_rx_hash = idpf_xdpmo_rx_hash, 432 + .xmo_rx_timestamp = idpf_xdpmo_rx_timestamp, 404 433 }; 405 434 406 435 void idpf_xdp_set_features(const struct idpf_vport *vport)
+20
drivers/net/ethernet/intel/idpf/xdp.h
··· 112 112 aligned_u64 qw1; 113 113 #define IDPF_XDP_RX_BUF GENMASK_ULL(47, 32) 114 114 #define IDPF_XDP_RX_EOP BIT_ULL(1) 115 + #define IDPF_XDP_RX_TS_LOW GENMASK_ULL(31, 24) 115 116 116 117 aligned_u64 qw2; 117 118 #define IDPF_XDP_RX_HASH GENMASK_ULL(31, 0) 118 119 119 120 aligned_u64 qw3; 121 + #define IDPF_XDP_RX_TS_HIGH GENMASK_ULL(63, 32) 120 122 } __aligned(4 * sizeof(u64)); 121 123 static_assert(sizeof(struct idpf_xdp_rx_desc) == 122 124 sizeof(struct virtchnl2_rx_flex_desc_adv_nic_3)); ··· 130 128 #define idpf_xdp_rx_buf(desc) FIELD_GET(IDPF_XDP_RX_BUF, (desc)->qw1) 131 129 #define idpf_xdp_rx_eop(desc) !!((desc)->qw1 & IDPF_XDP_RX_EOP) 132 130 #define idpf_xdp_rx_hash(desc) FIELD_GET(IDPF_XDP_RX_HASH, (desc)->qw2) 131 + #define idpf_xdp_rx_ts_low(desc) FIELD_GET(IDPF_XDP_RX_TS_LOW, (desc)->qw1) 132 + #define idpf_xdp_rx_ts_high(desc) FIELD_GET(IDPF_XDP_RX_TS_HIGH, (desc)->qw3) 133 133 134 134 static inline void 135 135 idpf_xdp_get_qw0(struct idpf_xdp_rx_desc *desc, ··· 153 149 desc->qw1 = ((const typeof(desc))rxd)->qw1; 154 150 #else 155 151 desc->qw1 = ((u64)le16_to_cpu(rxd->buf_id) << 32) | 152 + ((u64)rxd->ts_low << 24) | 153 + ((u64)rxd->fflags1 << 16) | 154 + ((u64)rxd->status_err1 << 8) | 156 155 rxd->status_err0_qw1; 157 156 #endif 158 157 } ··· 170 163 desc->qw2 = ((u64)rxd->hash3 << 24) | 171 164 ((u64)rxd->ff2_mirrid_hash2.hash2 << 16) | 172 165 le16_to_cpu(rxd->hash1); 166 + #endif 167 + } 168 + 169 + static inline void 170 + idpf_xdp_get_qw3(struct idpf_xdp_rx_desc *desc, 171 + const struct virtchnl2_rx_flex_desc_adv_nic_3 *rxd) 172 + { 173 + #ifdef __LIBETH_WORD_ACCESS 174 + desc->qw3 = ((const typeof(desc))rxd)->qw3; 175 + #else 176 + desc->qw3 = ((u64)le32_to_cpu(rxd->ts_high) << 32) | 177 + ((u64)le16_to_cpu(rxd->fmd6) << 16) | 178 + le16_to_cpu(rxd->l2tag1); 173 179 #endif 174 180 } 175 181