Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'net-6.4-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

Pull networking fixes from Paolo Abeni:
"Including fixes from netfilter.

Current release - regressions:

- mtk_eth_soc: fix NULL pointer dereference

Previous releases - regressions:

- core:
- skb_partial_csum_set() fix against transport header magic value
- fix load-tearing on sk->sk_stamp in sock_recv_cmsgs().
- annotate sk->sk_err write from do_recvmmsg()
- add vlan_get_protocol_and_depth() helper

- netlink: annotate accesses to nlk->cb_running

- netfilter: always release netdev hooks from notifier

Previous releases - always broken:

- core: deal with most data-races in sk_wait_event()

- netfilter: fix possible bug_on with enable_hooks=1

- eth: bonding: fix send_peer_notif overflow

- eth: xpcs: fix incorrect number of interfaces

- eth: ipvlan: fix out-of-bounds caused by unclear skb->cb

- eth: stmmac: Initialize MAC_ONEUS_TIC_COUNTER register"

* tag 'net-6.4-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (31 commits)
af_unix: Fix data races around sk->sk_shutdown.
af_unix: Fix a data race of sk->sk_receive_queue->qlen.
net: datagram: fix data-races in datagram_poll()
net: mscc: ocelot: fix stat counter register values
ipvlan:Fix out-of-bounds caused by unclear skb->cb
docs: networking: fix x25-iface.rst heading & index order
gve: Remove the code of clearing PBA bit
tcp: add annotations around sk->sk_shutdown accesses
net: add vlan_get_protocol_and_depth() helper
net: pcs: xpcs: fix incorrect number of interfaces
net: deal with most data-races in sk_wait_event()
net: annotate sk->sk_err write from do_recvmmsg()
netlink: annotate accesses to nlk->cb_running
kselftest: bonding: add num_grat_arp test
selftests: forwarding: lib: add netns support for tc rule handle stats get
Documentation: bonding: fix the doc of peer_notif_delay
bonding: fix send_peer_notif overflow
net: ethernet: mtk_eth_soc: fix NULL pointer dereference
selftests: nft_flowtable.sh: check ingress/egress chain too
selftests: nft_flowtable.sh: monitor result file sizes
...

+361 -112
+5 -4
Documentation/networking/bonding.rst
··· 776 776 Specify the delay, in milliseconds, between each peer 777 777 notification (gratuitous ARP and unsolicited IPv6 Neighbor 778 778 Advertisement) when they are issued after a failover event. 779 - This delay should be a multiple of the link monitor interval 780 - (arp_interval or miimon, whichever is active). The default 781 - value is 0 which means to match the value of the link monitor 782 - interval. 779 + This delay should be a multiple of the MII link monitor interval 780 + (miimon). 781 + 782 + The valid range is 0 - 300000. The default value is 0, which means 783 + to match the value of the MII link monitor interval. 783 784 784 785 prio 785 786 Slave priority. A higher number means higher priority.
+1 -1
Documentation/networking/index.rst
··· 116 116 udplite 117 117 vrf 118 118 vxlan 119 - x25-iface 120 119 x25 120 + x25-iface 121 121 xfrm_device 122 122 xfrm_proc 123 123 xfrm_sync
+1 -2
Documentation/networking/x25-iface.rst
··· 1 1 .. SPDX-License-Identifier: GPL-2.0 2 2 3 - ============================- 4 3 X.25 Device Driver Interface 5 - ============================- 4 + ============================ 6 5 7 6 Version 1.1 8 7
+6 -1
drivers/net/bonding/bond_netlink.c
··· 84 84 return -EMSGSIZE; 85 85 } 86 86 87 + /* Limit the max delay range to 300s */ 88 + static struct netlink_range_validation delay_range = { 89 + .max = 300000, 90 + }; 91 + 87 92 static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { 88 93 [IFLA_BOND_MODE] = { .type = NLA_U8 }, 89 94 [IFLA_BOND_ACTIVE_SLAVE] = { .type = NLA_U32 }, ··· 119 114 [IFLA_BOND_AD_ACTOR_SYSTEM] = { .type = NLA_BINARY, 120 115 .len = ETH_ALEN }, 121 116 [IFLA_BOND_TLB_DYNAMIC_LB] = { .type = NLA_U8 }, 122 - [IFLA_BOND_PEER_NOTIF_DELAY] = { .type = NLA_U32 }, 117 + [IFLA_BOND_PEER_NOTIF_DELAY] = NLA_POLICY_FULL_RANGE(NLA_U32, &delay_range), 123 118 [IFLA_BOND_MISSED_MAX] = { .type = NLA_U8 }, 124 119 [IFLA_BOND_NS_IP6_TARGET] = { .type = NLA_NESTED }, 125 120 };
+7 -1
drivers/net/bonding/bond_options.c
··· 169 169 { NULL, -1, 0} 170 170 }; 171 171 172 + static const struct bond_opt_value bond_peer_notif_delay_tbl[] = { 173 + { "off", 0, 0}, 174 + { "maxval", 300000, BOND_VALFLAG_MAX}, 175 + { NULL, -1, 0} 176 + }; 177 + 172 178 static const struct bond_opt_value bond_primary_reselect_tbl[] = { 173 179 { "always", BOND_PRI_RESELECT_ALWAYS, BOND_VALFLAG_DEFAULT}, 174 180 { "better", BOND_PRI_RESELECT_BETTER, 0}, ··· 494 488 .id = BOND_OPT_PEER_NOTIF_DELAY, 495 489 .name = "peer_notif_delay", 496 490 .desc = "Delay between each peer notification on failover event, in milliseconds", 497 - .values = bond_intmax_tbl, 491 + .values = bond_peer_notif_delay_tbl, 498 492 .set = bond_option_peer_notif_delay_set 499 493 } 500 494 };
-13
drivers/net/ethernet/google/gve/gve_main.c
··· 294 294 bool reschedule = false; 295 295 int work_done = 0; 296 296 297 - /* Clear PCI MSI-X Pending Bit Array (PBA) 298 - * 299 - * This bit is set if an interrupt event occurs while the vector is 300 - * masked. If this bit is set and we reenable the interrupt, it will 301 - * fire again. Since we're just about to poll the queue state, we don't 302 - * need it to fire again. 303 - * 304 - * Under high softirq load, it's possible that the interrupt condition 305 - * is triggered twice before we got the chance to process it. 306 - */ 307 - gve_write_irq_doorbell_dqo(priv, block, 308 - GVE_ITR_NO_UPDATE_DQO | GVE_ITR_CLEAR_PBA_BIT_DQO); 309 - 310 297 if (block->tx) 311 298 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 312 299
+1 -1
drivers/net/ethernet/mediatek/mtk_wed.c
··· 654 654 BIT(hw->index), BIT(hw->index)); 655 655 } 656 656 657 - if (!hw_list[!hw->index]->wed_dev && 657 + if ((!hw_list[!hw->index] || !hw_list[!hw->index]->wed_dev) && 658 658 hw->eth->dma_dev != hw->eth->dev) 659 659 mtk_eth_set_dma_device(hw->eth, hw->eth->dev); 660 660
+9 -9
drivers/net/ethernet/mscc/vsc7514_regs.c
··· 307 307 REG(SYS_COUNT_DROP_YELLOW_PRIO_4, 0x000218), 308 308 REG(SYS_COUNT_DROP_YELLOW_PRIO_5, 0x00021c), 309 309 REG(SYS_COUNT_DROP_YELLOW_PRIO_6, 0x000220), 310 - REG(SYS_COUNT_DROP_YELLOW_PRIO_7, 0x000214), 311 - REG(SYS_COUNT_DROP_GREEN_PRIO_0, 0x000218), 312 - REG(SYS_COUNT_DROP_GREEN_PRIO_1, 0x00021c), 313 - REG(SYS_COUNT_DROP_GREEN_PRIO_2, 0x000220), 314 - REG(SYS_COUNT_DROP_GREEN_PRIO_3, 0x000224), 315 - REG(SYS_COUNT_DROP_GREEN_PRIO_4, 0x000228), 316 - REG(SYS_COUNT_DROP_GREEN_PRIO_5, 0x00022c), 317 - REG(SYS_COUNT_DROP_GREEN_PRIO_6, 0x000230), 318 - REG(SYS_COUNT_DROP_GREEN_PRIO_7, 0x000234), 310 + REG(SYS_COUNT_DROP_YELLOW_PRIO_7, 0x000224), 311 + REG(SYS_COUNT_DROP_GREEN_PRIO_0, 0x000228), 312 + REG(SYS_COUNT_DROP_GREEN_PRIO_1, 0x00022c), 313 + REG(SYS_COUNT_DROP_GREEN_PRIO_2, 0x000230), 314 + REG(SYS_COUNT_DROP_GREEN_PRIO_3, 0x000234), 315 + REG(SYS_COUNT_DROP_GREEN_PRIO_4, 0x000238), 316 + REG(SYS_COUNT_DROP_GREEN_PRIO_5, 0x00023c), 317 + REG(SYS_COUNT_DROP_GREEN_PRIO_6, 0x000240), 318 + REG(SYS_COUNT_DROP_GREEN_PRIO_7, 0x000244), 319 319 REG(SYS_RESET_CFG, 0x000508), 320 320 REG(SYS_CMID, 0x00050c), 321 321 REG(SYS_VLAN_ETYPE_CFG, 0x000510),
+1
drivers/net/ethernet/stmicro/stmmac/dwmac4.h
··· 181 181 #define GMAC4_LPI_CTRL_STATUS 0xd0 182 182 #define GMAC4_LPI_TIMER_CTRL 0xd4 183 183 #define GMAC4_LPI_ENTRY_TIMER 0xd8 184 + #define GMAC4_MAC_ONEUS_TIC_COUNTER 0xdc 184 185 185 186 /* LPI control and status defines */ 186 187 #define GMAC4_LPI_CTRL_STATUS_LPITCSE BIT(21) /* LPI Tx Clock Stop Enable */
+5
drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
··· 25 25 struct stmmac_priv *priv = netdev_priv(dev); 26 26 void __iomem *ioaddr = hw->pcsr; 27 27 u32 value = readl(ioaddr + GMAC_CONFIG); 28 + u32 clk_rate; 28 29 29 30 value |= GMAC_CORE_INIT; 30 31 ··· 47 46 } 48 47 49 48 writel(value, ioaddr + GMAC_CONFIG); 49 + 50 + /* Configure LPI 1us counter to number of CSR clock ticks in 1us - 1 */ 51 + clk_rate = clk_get_rate(priv->plat->stmmac_clk); 52 + writel((clk_rate / 1000000) - 1, ioaddr + GMAC4_MAC_ONEUS_TIC_COUNTER); 50 53 51 54 /* Enable GMAC interrupts */ 52 55 value = GMAC_INT_DEFAULT_ENABLE;
+6
drivers/net/ipvlan/ipvlan_core.c
··· 436 436 goto err; 437 437 } 438 438 skb_dst_set(skb, &rt->dst); 439 + 440 + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 441 + 439 442 err = ip_local_out(net, skb->sk, skb); 440 443 if (unlikely(net_xmit_eval(err))) 441 444 dev->stats.tx_errors++; ··· 477 474 goto err; 478 475 } 479 476 skb_dst_set(skb, dst); 477 + 478 + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); 479 + 480 480 err = ip6_local_out(net, skb->sk, skb); 481 481 if (unlikely(net_xmit_eval(err))) 482 482 dev->stats.tx_errors++;
+10 -1
drivers/net/mdio/mdio-mvusb.c
··· 67 67 struct device *dev = &interface->dev; 68 68 struct mvusb_mdio *mvusb; 69 69 struct mii_bus *mdio; 70 + int ret; 70 71 71 72 mdio = devm_mdiobus_alloc_size(dev, sizeof(*mvusb)); 72 73 if (!mdio) ··· 88 87 mdio->write = mvusb_mdio_write; 89 88 90 89 usb_set_intfdata(interface, mvusb); 91 - return of_mdiobus_register(mdio, dev->of_node); 90 + ret = of_mdiobus_register(mdio, dev->of_node); 91 + if (ret) 92 + goto put_dev; 93 + 94 + return 0; 95 + 96 + put_dev: 97 + usb_put_dev(mvusb->udev); 98 + return ret; 92 99 } 93 100 94 101 static void mvusb_mdio_disconnect(struct usb_interface *interface)
+1 -1
drivers/net/pcs/pcs-xpcs.c
··· 1203 1203 [DW_XPCS_2500BASEX] = { 1204 1204 .supported = xpcs_2500basex_features, 1205 1205 .interface = xpcs_2500basex_interfaces, 1206 - .num_interfaces = ARRAY_SIZE(xpcs_2500basex_features), 1206 + .num_interfaces = ARRAY_SIZE(xpcs_2500basex_interfaces), 1207 1207 .an_mode = DW_2500BASEX, 1208 1208 }, 1209 1209 };
+5
drivers/net/phy/bcm-phy-lib.h
··· 40 40 return bcm_phy_write_exp(phydev, reg | MII_BCM54XX_EXP_SEL_ER, val); 41 41 } 42 42 43 + static inline int bcm_phy_read_exp_sel(struct phy_device *phydev, u16 reg) 44 + { 45 + return bcm_phy_read_exp(phydev, reg | MII_BCM54XX_EXP_SEL_ER); 46 + } 47 + 43 48 int bcm54xx_auxctl_write(struct phy_device *phydev, u16 regnum, u16 val); 44 49 int bcm54xx_auxctl_read(struct phy_device *phydev, u16 regnum); 45 50
+1 -1
drivers/net/phy/bcm7xxx.c
··· 486 486 bcm_phy_write_misc(phydev, 0x0038, 0x0002, 0xede0); 487 487 488 488 /* Read CORE_EXPA9 */ 489 - tmp = bcm_phy_read_exp(phydev, 0x00a9); 489 + tmp = bcm_phy_read_exp_sel(phydev, 0x00a9); 490 490 /* CORE_EXPA9[6:1] is rcalcode[5:0] */ 491 491 rcalcode = (tmp & 0x7e) / 2; 492 492 /* Correct RCAL code + 1 is -1% rprogr, LP: +16 */
+2 -2
drivers/net/tap.c
··· 742 742 743 743 /* Move network header to the right position for VLAN tagged packets */ 744 744 if (eth_type_vlan(skb->protocol) && 745 - __vlan_get_protocol(skb, skb->protocol, &depth) != 0) 745 + vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0) 746 746 skb_set_network_header(skb, depth); 747 747 748 748 /* copy skb_ubuf_info for callback when skb has no error */ ··· 1197 1197 1198 1198 /* Move network header to the right position for VLAN tagged packets */ 1199 1199 if (eth_type_vlan(skb->protocol) && 1200 - __vlan_get_protocol(skb, skb->protocol, &depth) != 0) 1200 + vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0) 1201 1201 skb_set_network_header(skb, depth); 1202 1202 1203 1203 rcu_read_lock();
+2 -1
include/linux/dim.h
··· 236 236 * 237 237 * Calculate the delta between two samples (in data rates). 238 238 * Takes into consideration counter wrap-around. 239 + * Returned boolean indicates whether curr_stats are reliable. 239 240 */ 240 - void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, 241 + bool dim_calc_stats(struct dim_sample *start, struct dim_sample *end, 241 242 struct dim_stats *curr_stats); 242 243 243 244 /**
+17
include/linux/if_vlan.h
··· 637 637 return __vlan_get_protocol(skb, skb->protocol, NULL); 638 638 } 639 639 640 + /* This version of __vlan_get_protocol() also pulls mac header in skb->head */ 641 + static inline __be16 vlan_get_protocol_and_depth(struct sk_buff *skb, 642 + __be16 type, int *depth) 643 + { 644 + int maclen; 645 + 646 + type = __vlan_get_protocol(skb, type, &maclen); 647 + 648 + if (type) { 649 + if (!pskb_may_pull(skb, maclen)) 650 + type = 0; 651 + else if (depth) 652 + *depth = maclen; 653 + } 654 + return type; 655 + } 656 + 640 657 /* A getter for the SKB protocol field which will handle VLAN tags consistently 641 658 * whether VLAN acceleration is enabled or not. 642 659 */
+1 -1
include/net/bonding.h
··· 233 233 */ 234 234 spinlock_t mode_lock; 235 235 spinlock_t stats_lock; 236 - u8 send_peer_notif; 236 + u32 send_peer_notif; 237 237 u8 igmp_retrans; 238 238 #ifdef CONFIG_PROC_FS 239 239 struct proc_dir_entry *proc_entry;
+1 -1
include/net/sock.h
··· 2718 2718 __sock_recv_cmsgs(msg, sk, skb); 2719 2719 else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP))) 2720 2720 sock_write_timestamp(sk, skb->tstamp); 2721 - else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP)) 2721 + else if (unlikely(sock_read_timestamp(sk) == SK_DEFAULT_STAMP)) 2722 2722 sock_write_timestamp(sk, 0); 2723 2723 } 2724 2724
+3 -2
lib/dim/dim.c
··· 54 54 } 55 55 EXPORT_SYMBOL(dim_park_tired); 56 56 57 - void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, 57 + bool dim_calc_stats(struct dim_sample *start, struct dim_sample *end, 58 58 struct dim_stats *curr_stats) 59 59 { 60 60 /* u32 holds up to 71 minutes, should be enough */ ··· 66 66 start->comp_ctr); 67 67 68 68 if (!delta_us) 69 - return; 69 + return false; 70 70 71 71 curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us); 72 72 curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us); ··· 79 79 else 80 80 curr_stats->cpe_ratio = 0; 81 81 82 + return true; 82 83 } 83 84 EXPORT_SYMBOL(dim_calc_stats);
+2 -1
lib/dim/net_dim.c
··· 227 227 dim->start_sample.event_ctr); 228 228 if (nevents < DIM_NEVENTS) 229 229 break; 230 - dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats); 230 + if (!dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats)) 231 + break; 231 232 if (net_dim_decision(&curr_stats, dim)) { 232 233 dim->state = DIM_APPLY_NEW_PROFILE; 233 234 schedule_work(&dim->work);
+2 -1
lib/dim/rdma_dim.c
··· 88 88 nevents = curr_sample->event_ctr - dim->start_sample.event_ctr; 89 89 if (nevents < DIM_NEVENTS) 90 90 break; 91 - dim_calc_stats(&dim->start_sample, curr_sample, &curr_stats); 91 + if (!dim_calc_stats(&dim->start_sample, curr_sample, &curr_stats)) 92 + break; 92 93 if (rdma_dim_decision(&curr_stats, dim)) { 93 94 dim->state = DIM_APPLY_NEW_PROFILE; 94 95 schedule_work(&dim->work);
+1 -1
net/bridge/br_forward.c
··· 42 42 eth_type_vlan(skb->protocol)) { 43 43 int depth; 44 44 45 - if (!__vlan_get_protocol(skb, skb->protocol, &depth)) 45 + if (!vlan_get_protocol_and_depth(skb, skb->protocol, &depth)) 46 46 goto drop; 47 47 48 48 skb_set_network_header(skb, depth);
+10 -5
net/core/datagram.c
··· 807 807 { 808 808 struct sock *sk = sock->sk; 809 809 __poll_t mask; 810 + u8 shutdown; 810 811 811 812 sock_poll_wait(file, sock, wait); 812 813 mask = 0; 813 814 814 815 /* exceptional events? */ 815 - if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) 816 + if (READ_ONCE(sk->sk_err) || 817 + !skb_queue_empty_lockless(&sk->sk_error_queue)) 816 818 mask |= EPOLLERR | 817 819 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); 818 820 819 - if (sk->sk_shutdown & RCV_SHUTDOWN) 821 + shutdown = READ_ONCE(sk->sk_shutdown); 822 + if (shutdown & RCV_SHUTDOWN) 820 823 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 821 - if (sk->sk_shutdown == SHUTDOWN_MASK) 824 + if (shutdown == SHUTDOWN_MASK) 822 825 mask |= EPOLLHUP; 823 826 824 827 /* readable? */ ··· 830 827 831 828 /* Connection-based need to check for termination and startup */ 832 829 if (connection_based(sk)) { 833 - if (sk->sk_state == TCP_CLOSE) 830 + int state = READ_ONCE(sk->sk_state); 831 + 832 + if (state == TCP_CLOSE) 834 833 mask |= EPOLLHUP; 835 834 /* connection hasn't started yet? */ 836 - if (sk->sk_state == TCP_SYN_SENT) 835 + if (state == TCP_SYN_SENT) 837 836 return mask; 838 837 } 839 838
+1 -1
net/core/dev.c
··· 3335 3335 type = eth->h_proto; 3336 3336 } 3337 3337 3338 - return __vlan_get_protocol(skb, type, depth); 3338 + return vlan_get_protocol_and_depth(skb, type, depth); 3339 3339 } 3340 3340 3341 3341 /* openvswitch calls this on rx path, so we need a different check.
+2 -2
net/core/skbuff.c
··· 5298 5298 u32 csum_end = (u32)start + (u32)off + sizeof(__sum16); 5299 5299 u32 csum_start = skb_headroom(skb) + (u32)start; 5300 5300 5301 - if (unlikely(csum_start > U16_MAX || csum_end > skb_headlen(skb))) { 5301 + if (unlikely(csum_start >= U16_MAX || csum_end > skb_headlen(skb))) { 5302 5302 net_warn_ratelimited("bad partial csum: csum=%u/%u headroom=%u headlen=%u\n", 5303 5303 start, off, skb_headroom(skb), skb_headlen(skb)); 5304 5304 return false; ··· 5306 5306 skb->ip_summed = CHECKSUM_PARTIAL; 5307 5307 skb->csum_start = csum_start; 5308 5308 skb->csum_offset = off; 5309 - skb_set_transport_header(skb, start); 5309 + skb->transport_header = csum_start; 5310 5310 return true; 5311 5311 } 5312 5312 EXPORT_SYMBOL_GPL(skb_partial_csum_set);
+6 -6
net/core/stream.c
··· 73 73 add_wait_queue(sk_sleep(sk), &wait); 74 74 sk->sk_write_pending++; 75 75 done = sk_wait_event(sk, timeo_p, 76 - !sk->sk_err && 77 - !((1 << sk->sk_state) & 76 + !READ_ONCE(sk->sk_err) && 77 + !((1 << READ_ONCE(sk->sk_state)) & 78 78 ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)), &wait); 79 79 remove_wait_queue(sk_sleep(sk), &wait); 80 80 sk->sk_write_pending--; ··· 87 87 * sk_stream_closing - Return 1 if we still have things to send in our buffers. 88 88 * @sk: socket to verify 89 89 */ 90 - static inline int sk_stream_closing(struct sock *sk) 90 + static int sk_stream_closing(const struct sock *sk) 91 91 { 92 - return (1 << sk->sk_state) & 92 + return (1 << READ_ONCE(sk->sk_state)) & 93 93 (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK); 94 94 } 95 95 ··· 142 142 143 143 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 144 144 sk->sk_write_pending++; 145 - sk_wait_event(sk, &current_timeo, sk->sk_err || 146 - (sk->sk_shutdown & SEND_SHUTDOWN) || 145 + sk_wait_event(sk, &current_timeo, READ_ONCE(sk->sk_err) || 146 + (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) || 147 147 (sk_stream_memory_free(sk) && 148 148 !vm_wait), &wait); 149 149 sk->sk_write_pending--;
+1 -1
net/ipv4/af_inet.c
··· 894 894 EPOLLHUP, even on eg. unconnected UDP sockets -- RR */ 895 895 fallthrough; 896 896 default: 897 - sk->sk_shutdown |= how; 897 + WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | how); 898 898 if (sk->sk_prot->shutdown) 899 899 sk->sk_prot->shutdown(sk, how); 900 900 break;
+8 -6
net/ipv4/tcp.c
··· 498 498 __poll_t mask; 499 499 struct sock *sk = sock->sk; 500 500 const struct tcp_sock *tp = tcp_sk(sk); 501 + u8 shutdown; 501 502 int state; 502 503 503 504 sock_poll_wait(file, sock, wait); ··· 541 540 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent 542 541 * blocking on fresh not-connected or disconnected socket. --ANK 543 542 */ 544 - if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) 543 + shutdown = READ_ONCE(sk->sk_shutdown); 544 + if (shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) 545 545 mask |= EPOLLHUP; 546 - if (sk->sk_shutdown & RCV_SHUTDOWN) 546 + if (shutdown & RCV_SHUTDOWN) 547 547 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; 548 548 549 549 /* Connected or passive Fast Open socket? */ ··· 561 559 if (tcp_stream_is_readable(sk, target)) 562 560 mask |= EPOLLIN | EPOLLRDNORM; 563 561 564 - if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { 562 + if (!(shutdown & SEND_SHUTDOWN)) { 565 563 if (__sk_stream_is_writeable(sk, 1)) { 566 564 mask |= EPOLLOUT | EPOLLWRNORM; 567 565 } else { /* send SIGIO later */ ··· 2869 2867 int data_was_unread = 0; 2870 2868 int state; 2871 2869 2872 - sk->sk_shutdown = SHUTDOWN_MASK; 2870 + WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); 2873 2871 2874 2872 if (sk->sk_state == TCP_LISTEN) { 2875 2873 tcp_set_state(sk, TCP_CLOSE); ··· 3121 3119 3122 3120 inet_bhash2_reset_saddr(sk); 3123 3121 3124 - sk->sk_shutdown = 0; 3122 + WRITE_ONCE(sk->sk_shutdown, 0); 3125 3123 sock_reset_flag(sk, SOCK_DONE); 3126 3124 tp->srtt_us = 0; 3127 3125 tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); ··· 4651 4649 if (req) 4652 4650 reqsk_fastopen_remove(sk, req, false); 4653 4651 4654 - sk->sk_shutdown = SHUTDOWN_MASK; 4652 + WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); 4655 4653 4656 4654 if (!sock_flag(sk, SOCK_DEAD)) 4657 4655 sk->sk_state_change(sk);
+1 -1
net/ipv4/tcp_bpf.c
··· 168 168 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 169 169 ret = sk_wait_event(sk, &timeo, 170 170 !list_empty(&psock->ingress_msg) || 171 - !skb_queue_empty(&sk->sk_receive_queue), &wait); 171 + !skb_queue_empty_lockless(&sk->sk_receive_queue), &wait); 172 172 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 173 173 remove_wait_queue(sk_sleep(sk), &wait); 174 174 return ret;
+2 -2
net/ipv4/tcp_input.c
··· 4362 4362 4363 4363 inet_csk_schedule_ack(sk); 4364 4364 4365 - sk->sk_shutdown |= RCV_SHUTDOWN; 4365 + WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN); 4366 4366 sock_set_flag(sk, SOCK_DONE); 4367 4367 4368 4368 switch (sk->sk_state) { ··· 6599 6599 break; 6600 6600 6601 6601 tcp_set_state(sk, TCP_FIN_WAIT2); 6602 - sk->sk_shutdown |= SEND_SHUTDOWN; 6602 + WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | SEND_SHUTDOWN); 6603 6603 6604 6604 sk_dst_confirm(sk); 6605 6605
+5 -3
net/llc/af_llc.c
··· 583 583 584 584 add_wait_queue(sk_sleep(sk), &wait); 585 585 while (1) { 586 - if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE, &wait)) 586 + if (sk_wait_event(sk, &timeout, 587 + READ_ONCE(sk->sk_state) == TCP_CLOSE, &wait)) 587 588 break; 588 589 rc = -ERESTARTSYS; 589 590 if (signal_pending(current)) ··· 604 603 605 604 add_wait_queue(sk_sleep(sk), &wait); 606 605 while (1) { 607 - if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT, &wait)) 606 + if (sk_wait_event(sk, &timeout, 607 + READ_ONCE(sk->sk_state) != TCP_SYN_SENT, &wait)) 608 608 break; 609 609 if (signal_pending(current) || !timeout) 610 610 break; ··· 624 622 while (1) { 625 623 rc = 0; 626 624 if (sk_wait_event(sk, &timeout, 627 - (sk->sk_shutdown & RCV_SHUTDOWN) || 625 + (READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN) || 628 626 (!llc_data_accept_state(llc->state) && 629 627 !llc->remote_busy_flag && 630 628 !llc->p_flag), &wait))
+4 -2
net/netfilter/core.c
··· 711 711 712 712 rcu_read_lock(); 713 713 ct_hook = rcu_dereference(nf_ct_hook); 714 - BUG_ON(ct_hook == NULL); 715 - ct_hook->destroy(nfct); 714 + if (ct_hook) 715 + ct_hook->destroy(nfct); 716 716 rcu_read_unlock(); 717 + 718 + WARN_ON(!ct_hook); 717 719 } 718 720 EXPORT_SYMBOL(nf_conntrack_destroy); 719 721
+2 -1
net/netfilter/nf_conntrack_standalone.c
··· 1218 1218 nf_conntrack_htable_size_user = nf_conntrack_htable_size; 1219 1219 #endif 1220 1220 1221 + nf_conntrack_init_end(); 1222 + 1221 1223 ret = register_pernet_subsys(&nf_conntrack_net_ops); 1222 1224 if (ret < 0) 1223 1225 goto out_pernet; 1224 1226 1225 - nf_conntrack_init_end(); 1226 1227 return 0; 1227 1228 1228 1229 out_pernet:
+6 -3
net/netfilter/nft_chain_filter.c
··· 344 344 return; 345 345 } 346 346 347 + /* UNREGISTER events are also happening on netns exit. 348 + * 349 + * Although nf_tables core releases all tables/chains, only this event 350 + * handler provides guarantee that hook->ops.dev is still accessible, 351 + * so we cannot skip exiting net namespaces. 352 + */ 347 353 __nft_release_basechain(ctx); 348 354 } 349 355 ··· 366 360 367 361 if (event != NETDEV_UNREGISTER && 368 362 event != NETDEV_CHANGENAME) 369 - return NOTIFY_DONE; 370 - 371 - if (!check_net(ctx.net)) 372 363 return NOTIFY_DONE; 373 364 374 365 nft_net = nft_pernet(ctx.net);
+4 -4
net/netlink/af_netlink.c
··· 1990 1990 1991 1991 skb_free_datagram(sk, skb); 1992 1992 1993 - if (nlk->cb_running && 1993 + if (READ_ONCE(nlk->cb_running) && 1994 1994 atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { 1995 1995 ret = netlink_dump(sk); 1996 1996 if (ret) { ··· 2302 2302 if (cb->done) 2303 2303 cb->done(cb); 2304 2304 2305 - nlk->cb_running = false; 2305 + WRITE_ONCE(nlk->cb_running, false); 2306 2306 module = cb->module; 2307 2307 skb = cb->skb; 2308 2308 mutex_unlock(nlk->cb_mutex); ··· 2365 2365 goto error_put; 2366 2366 } 2367 2367 2368 - nlk->cb_running = true; 2368 + WRITE_ONCE(nlk->cb_running, true); 2369 2369 nlk->dump_done_errno = INT_MAX; 2370 2370 2371 2371 mutex_unlock(nlk->cb_mutex); ··· 2703 2703 nlk->groups ? (u32)nlk->groups[0] : 0, 2704 2704 sk_rmem_alloc_get(s), 2705 2705 sk_wmem_alloc_get(s), 2706 - nlk->cb_running, 2706 + READ_ONCE(nlk->cb_running), 2707 2707 refcount_read(&s->sk_refcnt), 2708 2708 atomic_read(&s->sk_drops), 2709 2709 sock_i_ino(s)
+2 -4
net/packet/af_packet.c
··· 1934 1934 /* Move network header to the right position for VLAN tagged packets */ 1935 1935 if (likely(skb->dev->type == ARPHRD_ETHER) && 1936 1936 eth_type_vlan(skb->protocol) && 1937 - __vlan_get_protocol(skb, skb->protocol, &depth) != 0) { 1938 - if (pskb_may_pull(skb, depth)) 1939 - skb_set_network_header(skb, depth); 1940 - } 1937 + vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0) 1938 + skb_set_network_header(skb, depth); 1941 1939 1942 1940 skb_probe_transport_header(skb); 1943 1941 }
+2 -2
net/smc/smc_close.c
··· 67 67 68 68 rc = sk_wait_event(sk, &timeout, 69 69 !smc_tx_prepared_sends(&smc->conn) || 70 - sk->sk_err == ECONNABORTED || 71 - sk->sk_err == ECONNRESET || 70 + READ_ONCE(sk->sk_err) == ECONNABORTED || 71 + READ_ONCE(sk->sk_err) == ECONNRESET || 72 72 smc->conn.killed, 73 73 &wait); 74 74 if (rc)
+2 -2
net/smc/smc_rx.c
··· 267 267 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 268 268 add_wait_queue(sk_sleep(sk), &wait); 269 269 rc = sk_wait_event(sk, timeo, 270 - sk->sk_err || 270 + READ_ONCE(sk->sk_err) || 271 271 cflags->peer_conn_abort || 272 - sk->sk_shutdown & RCV_SHUTDOWN || 272 + READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN || 273 273 conn->killed || 274 274 fcrit(conn), 275 275 &wait);
+2 -2
net/smc/smc_tx.c
··· 113 113 break; /* at least 1 byte of free & no urgent data */ 114 114 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 115 115 sk_wait_event(sk, &timeo, 116 - sk->sk_err || 117 - (sk->sk_shutdown & SEND_SHUTDOWN) || 116 + READ_ONCE(sk->sk_err) || 117 + (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) || 118 118 smc_cdc_rxed_any_close(conn) || 119 119 (atomic_read(&conn->sndbuf_space) && 120 120 !conn->urg_tx_pend),
+1 -1
net/socket.c
··· 2911 2911 * error to return on the next call or if the 2912 2912 * app asks about it using getsockopt(SO_ERROR). 2913 2913 */ 2914 - sock->sk->sk_err = -err; 2914 + WRITE_ONCE(sock->sk->sk_err, -err); 2915 2915 } 2916 2916 out_put: 2917 2917 fput_light(sock->file, fput_needed);
+2 -2
net/tipc/socket.c
··· 314 314 tipc_sk_respond(sk, skb, error); 315 315 } 316 316 317 - static bool tipc_sk_connected(struct sock *sk) 317 + static bool tipc_sk_connected(const struct sock *sk) 318 318 { 319 - return sk->sk_state == TIPC_ESTABLISHED; 319 + return READ_ONCE(sk->sk_state) == TIPC_ESTABLISHED; 320 320 } 321 321 322 322 /* tipc_sk_type_connectionless - check if the socket is datagram socket
+2 -1
net/tls/tls_main.c
··· 111 111 break; 112 112 } 113 113 114 - if (sk_wait_event(sk, timeo, !sk->sk_write_pending, &wait)) 114 + if (sk_wait_event(sk, timeo, 115 + !READ_ONCE(sk->sk_write_pending), &wait)) 115 116 break; 116 117 } 117 118 remove_wait_queue(sk_sleep(sk), &wait);
+13 -9
net/unix/af_unix.c
··· 603 603 /* Clear state */ 604 604 unix_state_lock(sk); 605 605 sock_orphan(sk); 606 - sk->sk_shutdown = SHUTDOWN_MASK; 606 + WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); 607 607 path = u->path; 608 608 u->path.dentry = NULL; 609 609 u->path.mnt = NULL; ··· 628 628 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { 629 629 unix_state_lock(skpair); 630 630 /* No more writes */ 631 - skpair->sk_shutdown = SHUTDOWN_MASK; 631 + WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK); 632 632 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) 633 633 WRITE_ONCE(skpair->sk_err, ECONNRESET); 634 634 unix_state_unlock(skpair); ··· 1442 1442 1443 1443 sched = !sock_flag(other, SOCK_DEAD) && 1444 1444 !(other->sk_shutdown & RCV_SHUTDOWN) && 1445 - unix_recvq_full(other); 1445 + unix_recvq_full_lockless(other); 1446 1446 1447 1447 unix_state_unlock(other); 1448 1448 ··· 3008 3008 ++mode; 3009 3009 3010 3010 unix_state_lock(sk); 3011 - sk->sk_shutdown |= mode; 3011 + WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode); 3012 3012 other = unix_peer(sk); 3013 3013 if (other) 3014 3014 sock_hold(other); ··· 3028 3028 if (mode&SEND_SHUTDOWN) 3029 3029 peer_mode |= RCV_SHUTDOWN; 3030 3030 unix_state_lock(other); 3031 - other->sk_shutdown |= peer_mode; 3031 + WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode); 3032 3032 unix_state_unlock(other); 3033 3033 other->sk_state_change(other); 3034 3034 if (peer_mode == SHUTDOWN_MASK) ··· 3160 3160 { 3161 3161 struct sock *sk = sock->sk; 3162 3162 __poll_t mask; 3163 + u8 shutdown; 3163 3164 3164 3165 sock_poll_wait(file, sock, wait); 3165 3166 mask = 0; 3167 + shutdown = READ_ONCE(sk->sk_shutdown); 3166 3168 3167 3169 /* exceptional events? */ 3168 3170 if (READ_ONCE(sk->sk_err)) 3169 3171 mask |= EPOLLERR; 3170 - if (sk->sk_shutdown == SHUTDOWN_MASK) 3172 + if (shutdown == SHUTDOWN_MASK) 3171 3173 mask |= EPOLLHUP; 3172 - if (sk->sk_shutdown & RCV_SHUTDOWN) 3174 + if (shutdown & RCV_SHUTDOWN) 3173 3175 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3174 3176 3175 3177 /* readable? */ ··· 3205 3203 struct sock *sk = sock->sk, *other; 3206 3204 unsigned int writable; 3207 3205 __poll_t mask; 3206 + u8 shutdown; 3208 3207 3209 3208 sock_poll_wait(file, sock, wait); 3210 3209 mask = 0; 3210 + shutdown = READ_ONCE(sk->sk_shutdown); 3211 3211 3212 3212 /* exceptional events? */ 3213 3213 if (READ_ONCE(sk->sk_err) || ··· 3217 3213 mask |= EPOLLERR | 3218 3214 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); 3219 3215 3220 - if (sk->sk_shutdown & RCV_SHUTDOWN) 3216 + if (shutdown & RCV_SHUTDOWN) 3221 3217 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3222 - if (sk->sk_shutdown == SHUTDOWN_MASK) 3218 + if (shutdown == SHUTDOWN_MASK) 3223 3219 mask |= EPOLLHUP; 3224 3220 3225 3221 /* readable? */
+50
tools/testing/selftests/drivers/net/bonding/bond_options.sh
··· 6 6 ALL_TESTS=" 7 7 prio 8 8 arp_validate 9 + num_grat_arp 9 10 " 10 11 11 12 REQUIRE_MZ=no ··· 254 253 { 255 254 arp_validate_arp "active-backup" 256 255 arp_validate_ns "active-backup" 256 + } 257 + 258 + garp_test() 259 + { 260 + local param="$1" 261 + local active_slave exp_num real_num i 262 + RET=0 263 + 264 + # create bond 265 + bond_reset "${param}" 266 + 267 + bond_check_connection 268 + [ $RET -ne 0 ] && log_test "num_grat_arp" "$retmsg" 269 + 270 + 271 + # Add tc rules to count GARP number 272 + for i in $(seq 0 2); do 273 + tc -n ${g_ns} filter add dev s$i ingress protocol arp pref 1 handle 101 \ 274 + flower skip_hw arp_op request arp_sip ${s_ip4} arp_tip ${s_ip4} action pass 275 + done 276 + 277 + # Do failover 278 + active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") 279 + ip -n ${s_ns} link set ${active_slave} down 280 + 281 + exp_num=$(echo "${param}" | cut -f6 -d ' ') 282 + sleep $((exp_num + 2)) 283 + 284 + active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") 285 + 286 + # check result 287 + real_num=$(tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}") 288 + if [ "${real_num}" -ne "${exp_num}" ]; then 289 + echo "$real_num garp packets sent on active slave ${active_slave}" 290 + RET=1 291 + fi 292 + 293 + for i in $(seq 0 2); do 294 + tc -n ${g_ns} filter del dev s$i ingress 295 + done 296 + } 297 + 298 + num_grat_arp() 299 + { 300 + local val 301 + for val in 10 20 30 50; do 302 + garp_test "mode active-backup miimon 100 num_grat_arp $val peer_notify_delay 1000" 303 + log_test "num_grat_arp" "active-backup miimon num_grat_arp $val" 304 + done 257 305 } 258 306 259 307 trap cleanup EXIT
+2
tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
··· 61 61 ip -n ${g_ns} link set s${i} up 62 62 ip -n ${g_ns} link set s${i} master br0 63 63 ip -n ${s_ns} link set eth${i} master bond0 64 + 65 + tc -n ${g_ns} qdisc add dev s${i} clsact 64 66 done 65 67 66 68 ip -n ${s_ns} link set bond0 up
+2 -1
tools/testing/selftests/net/forwarding/lib.sh
··· 791 791 local id=$1; shift 792 792 local handle=$1; shift 793 793 local selector=${1:-.packets}; shift 794 + local netns=${1:-""}; shift 794 795 795 - tc -j -s filter show $id \ 796 + tc $netns -j -s filter show $id \ 796 797 | jq ".[] | select(.options.handle == $handle) | \ 797 798 .options.actions[0].stats$selector" 798 799 }
+139 -6
tools/testing/selftests/netfilter/nft_flowtable.sh
··· 188 188 exit $ksft_skip 189 189 fi 190 190 191 + ip netns exec $ns2 nft -f - <<EOF 192 + table inet filter { 193 + counter ip4dscp0 { } 194 + counter ip4dscp3 { } 195 + 196 + chain input { 197 + type filter hook input priority 0; policy accept; 198 + meta l4proto tcp goto { 199 + ip dscp cs3 counter name ip4dscp3 accept 200 + ip dscp 0 counter name ip4dscp0 accept 201 + } 202 + } 203 + } 204 + EOF 205 + 206 + if [ $? -ne 0 ]; then 207 + echo "SKIP: Could not load nft ruleset" 208 + exit $ksft_skip 209 + fi 210 + 191 211 # test basic connectivity 192 212 if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then 193 213 echo "ERROR: $ns1 cannot reach ns2" 1>&2 ··· 275 255 fi 276 256 } 277 257 258 + check_dscp() 259 + { 260 + local what=$1 261 + local ok=1 262 + 263 + local counter=$(ip netns exec $ns2 nft reset counter inet filter ip4dscp3 | grep packets) 264 + 265 + local pc4=${counter%*bytes*} 266 + local pc4=${pc4#*packets} 267 + 268 + local counter=$(ip netns exec $ns2 nft reset counter inet filter ip4dscp0 | grep packets) 269 + local pc4z=${counter%*bytes*} 270 + local pc4z=${pc4z#*packets} 271 + 272 + case "$what" in 273 + "dscp_none") 274 + if [ $pc4 -gt 0 ] || [ $pc4z -eq 0 ]; then 275 + echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2 276 + ret=1 277 + ok=0 278 + fi 279 + ;; 280 + "dscp_fwd") 281 + if [ $pc4 -eq 0 ] || [ $pc4z -eq 0 ]; then 282 + echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2 283 + ret=1 284 + ok=0 285 + fi 286 + ;; 287 + "dscp_ingress") 288 + if [ $pc4 -eq 0 ] || [ $pc4z -gt 0 ]; then 289 + echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 290 + ret=1 291 + ok=0 292 + fi 293 + ;; 294 + "dscp_egress") 295 + if [ $pc4 -eq 0 ] || [ $pc4z -gt 0 ]; then 296 + echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 297 + ret=1 298 + ok=0 299 + fi 300 + ;; 301 + *) 302 + echo "FAIL: Unknown DSCP check" 1>&2 303 + ret=1 304 + ok=0 305 + esac 306 + 307 + if [ $ok -eq 1 ] ;then 308 + echo "PASS: $what: dscp packet counters match" 309 + fi 310 + } 311 + 278 312 check_transfer() 279 313 { 280 314 in=$1 ··· 360 286 ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$nsin" > "$ns1out" & 361 287 cpid=$! 362 288 363 - sleep 3 289 + sleep 1 364 290 365 - if ps -p $lpid > /dev/null;then 291 + prev="$(ls -l $ns1out $ns2out)" 292 + sleep 1 293 + 294 + while [[ "$prev" != "$(ls -l $ns1out $ns2out)" ]]; do 295 + sleep 1; 296 + prev="$(ls -l $ns1out $ns2out)" 297 + done 298 + 299 + if test -d /proc/"$lpid"/; then 366 300 kill $lpid 367 301 fi 368 302 369 - if ps -p $cpid > /dev/null;then 303 + if test -d /proc/"$cpid"/; then 370 304 kill $cpid 371 305 fi 372 306 373 - wait 307 + wait $lpid 308 + wait $cpid 374 309 375 310 if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then 376 311 lret=1 ··· 397 314 test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 398 315 399 316 return $? 317 + } 318 + 319 + test_tcp_forwarding_set_dscp() 320 + { 321 + check_dscp "dscp_none" 322 + 323 + ip netns exec $nsr1 nft -f - <<EOF 324 + table netdev dscpmangle { 325 + chain setdscp0 { 326 + type filter hook ingress device "veth0" priority 0; policy accept 327 + ip dscp set cs3 328 + } 329 + } 330 + EOF 331 + if [ $? -eq 0 ]; then 332 + test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 333 + check_dscp "dscp_ingress" 334 + 335 + ip netns exec $nsr1 nft delete table netdev dscpmangle 336 + else 337 + echo "SKIP: Could not load netdev:ingress for veth0" 338 + fi 339 + 340 + ip netns exec $nsr1 nft -f - <<EOF 341 + table netdev dscpmangle { 342 + chain setdscp0 { 343 + type filter hook egress device "veth1" priority 0; policy accept 344 + ip dscp set cs3 345 + } 346 + } 347 + EOF 348 + if [ $? -eq 0 ]; then 349 + test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 350 + check_dscp "dscp_egress" 351 + 352 + ip netns exec $nsr1 nft flush table netdev dscpmangle 353 + else 354 + echo "SKIP: Could not load netdev:egress for veth1" 355 + fi 356 + 357 + # partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3 358 + # counters should have seen packets (before and after ft offload kicks in). 359 + ip netns exec $nsr1 nft -a insert rule inet filter forward ip dscp set cs3 360 + test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 361 + check_dscp "dscp_fwd" 400 362 } 401 363 402 364 test_tcp_forwarding_nat() ··· 512 384 } 513 385 } 514 386 EOF 387 + 388 + if ! test_tcp_forwarding_set_dscp $ns1 $ns2 0 ""; then 389 + echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2 390 + exit 0 391 + fi 515 392 516 393 if ! test_tcp_forwarding_nat $ns1 $ns2 0 ""; then 517 394 echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2 ··· 622 489 ip -net $nsr1 addr add dead:1::1/64 dev veth0 623 490 ip -net $nsr1 link set up dev veth0 624 491 625 - KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1) 626 - KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1) 492 + KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1) 493 + KEY_AES="0x"$(ps -af | md5sum | cut -d " " -f 1) 627 494 SPI1=$RANDOM 628 495 SPI2=$RANDOM 629 496