Merge tag 'net-7.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

+13

Documentation/networking/netdevices.rst

··· 289 289 ndo_set_rx_mode: 290 290 Synchronization: netif_addr_lock spinlock. 291 291 Context: BHs disabled 292 + Notes: Deprecated in favor of ndo_set_rx_mode_async which runs 293 + in process context. 294 + 295 + ndo_set_rx_mode_async: 296 + Synchronization: rtnl_lock() semaphore. In addition, netdev instance 297 + lock if the driver implements queue management or shaper API. 298 + Context: process (from a work queue) 299 + Notes: Async version of ndo_set_rx_mode which runs in process 300 + context. Receives snapshots of the unicast and multicast address lists. 301 + 302 + ndo_change_rx_flags: 303 + Synchronization: rtnl_lock() semaphore. In addition, netdev instance 304 + lock if the driver implements queue management or shaper API. 292 305 293 306 ndo_setup_tc: 294 307 ``TC_SETUP_BLOCK`` and ``TC_SETUP_FT`` are running under NFT locks

+1 -1

Documentation/process/maintainer-netdev.rst

··· 528 528 status will be withdrawn. 529 529 530 530 5. Test failures due to bugs either in the driver or the test itself, 531 - or lack of support for the feature the test is targgeting are 531 + or lack of support for the feature the test is targeting are 532 532 *not* a basis for losing the ``Supported`` status. 533 533 534 534 netdev CI will maintain an official page of supported devices, listing their

+7

MAINTAINERS

··· 15337 15337 W: http://www.tazenda.demon.co.uk/phil/linux-hp 15338 15338 F: arch/m68k/hp300/ 15339 15339 15340 + M68K ON MVME147 15341 + M: Daniel Palmer <daniel@thingy.jp> 15342 + S: Maintained 15343 + F: arch/m68k/mvme147/ 15344 + F: drivers/net/ethernet/amd/mvme147.c 15345 + F: drivers/scsi/mvme147.* 15346 + 15340 15347 M88DS3103 MEDIA DRIVER 15341 15348 L: linux-media@vger.kernel.org 15342 15349 S: Orphan

+1 -1

drivers/net/dsa/realtek/rtl8365mb.c

··· 216 216 (_extint) == 2 ? RTL8365MB_DIGITAL_INTERFACE_SELECT_REG1 : \ 217 217 0x0) 218 218 #define RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_MASK(_extint) \ 219 - (0xF << (((_extint) % 2))) 219 + (0xF << (((_extint) % 2) * 4)) 220 220 #define RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_OFFSET(_extint) \ 221 221 (((_extint) % 2) * 4) 222 222

+4 -2

drivers/net/dummy.c

··· 47 47 static int numdummies = 1; 48 48 49 49 /* fake multicast ability */ 50 - static void set_multicast_list(struct net_device *dev) 50 + static void set_multicast_list(struct net_device *dev, 51 + struct netdev_hw_addr_list *uc, 52 + struct netdev_hw_addr_list *mc) 51 53 { 52 54 } 53 55 ··· 89 87 .ndo_init = dummy_dev_init, 90 88 .ndo_start_xmit = dummy_xmit, 91 89 .ndo_validate_addr = eth_validate_addr, 92 - .ndo_set_rx_mode = set_multicast_list, 90 + .ndo_set_rx_mode_async = set_multicast_list, 93 91 .ndo_set_mac_address = eth_mac_addr, 94 92 .ndo_get_stats64 = dummy_get_stats64, 95 93 .ndo_change_carrier = dummy_change_carrier,

+87 -23

drivers/net/ethernet/airoha/airoha_eth.c

··· 745 745 dma_addr_t dma_addr; 746 746 747 747 q->buf_size = PAGE_SIZE / 2; 748 - q->ndesc = ndesc; 749 748 q->qdma = qdma; 750 749 751 - q->entry = devm_kzalloc(eth->dev, q->ndesc * sizeof(*q->entry), 750 + q->entry = devm_kzalloc(eth->dev, ndesc * sizeof(*q->entry), 752 751 GFP_KERNEL); 753 752 if (!q->entry) 753 + return -ENOMEM; 754 + 755 + q->desc = dmam_alloc_coherent(eth->dev, ndesc * sizeof(*q->desc), 756 + &dma_addr, GFP_KERNEL); 757 + if (!q->desc) 754 758 return -ENOMEM; 755 759 756 760 q->page_pool = page_pool_create(&pp_params); ··· 765 761 return err; 766 762 } 767 763 768 - q->desc = dmam_alloc_coherent(eth->dev, q->ndesc * sizeof(*q->desc), 769 - &dma_addr, GFP_KERNEL); 770 - if (!q->desc) 771 - return -ENOMEM; 772 - 764 + q->ndesc = ndesc; 773 765 netif_napi_add(eth->napi_dev, &q->napi, airoha_qdma_rx_napi_poll); 774 766 775 767 airoha_qdma_wr(qdma, REG_RX_RING_BASE(qid), dma_addr); ··· 841 841 } 842 842 843 843 return 0; 844 + } 845 + 846 + static void airoha_qdma_wake_netdev_txqs(struct airoha_queue *q) 847 + { 848 + struct airoha_qdma *qdma = q->qdma; 849 + struct airoha_eth *eth = qdma->eth; 850 + int i; 851 + 852 + for (i = 0; i < ARRAY_SIZE(eth->ports); i++) { 853 + struct airoha_gdm_port *port = eth->ports[i]; 854 + 855 + if (port && port->qdma == qdma) 856 + netif_tx_wake_all_queues(port->dev); 857 + } 858 + q->txq_stopped = false; 844 859 } 845 860 846 861 static int airoha_qdma_tx_napi_poll(struct napi_struct *napi, int budget) ··· 934 919 935 920 txq = netdev_get_tx_queue(skb->dev, queue); 936 921 netdev_tx_completed_queue(txq, 1, skb->len); 937 - if (netif_tx_queue_stopped(txq) && 938 - q->ndesc - q->queued >= q->free_thr) 939 - netif_tx_wake_queue(txq); 940 - 941 922 dev_kfree_skb_any(skb); 942 923 } 924 + 925 + if (q->txq_stopped && q->ndesc - q->queued >= q->free_thr) { 926 + /* Since multiple net_device TX queues can share the 927 + * same hw QDMA TX queue, there is no guarantee we have 928 + * inflight packets queued in hw belonging to a 929 + * net_device TX queue stopped in the xmit path. 930 + * In order to avoid any potential net_device TX queue 931 + * stall, we need to wake all the net_device TX queues 932 + * feeding the same hw QDMA TX queue. 933 + */ 934 + airoha_qdma_wake_netdev_txqs(q); 935 + } 936 + 943 937 unlock: 944 938 spin_unlock_bh(&q->lock); 945 939 } ··· 978 954 dma_addr_t dma_addr; 979 955 980 956 spin_lock_init(&q->lock); 981 - q->ndesc = size; 982 957 q->qdma = qdma; 983 958 q->free_thr = 1 + MAX_SKB_FRAGS; 984 959 INIT_LIST_HEAD(&q->tx_list); 985 960 986 - q->entry = devm_kzalloc(eth->dev, q->ndesc * sizeof(*q->entry), 961 + q->entry = devm_kzalloc(eth->dev, size * sizeof(*q->entry), 987 962 GFP_KERNEL); 988 963 if (!q->entry) 989 964 return -ENOMEM; 990 965 991 - q->desc = dmam_alloc_coherent(eth->dev, q->ndesc * sizeof(*q->desc), 966 + q->desc = dmam_alloc_coherent(eth->dev, size * sizeof(*q->desc), 992 967 &dma_addr, GFP_KERNEL); 993 968 if (!q->desc) 994 969 return -ENOMEM; 995 970 996 - for (i = 0; i < q->ndesc; i++) { 971 + for (i = 0; i < size; i++) { 997 972 u32 val = FIELD_PREP(QDMA_DESC_DONE_MASK, 1); 998 973 999 974 list_add_tail(&q->entry[i].list, &q->tx_list); 1000 975 WRITE_ONCE(q->desc[i].ctrl, cpu_to_le32(val)); 1001 976 } 977 + q->ndesc = size; 1002 978 1003 979 /* xmit ring drop default setting */ 1004 980 airoha_qdma_set(qdma, REG_TX_RING_BLOCKING(qid), ··· 1020 996 struct airoha_eth *eth = qdma->eth; 1021 997 dma_addr_t dma_addr; 1022 998 1023 - netif_napi_add_tx(eth->napi_dev, &irq_q->napi, 1024 - airoha_qdma_tx_napi_poll); 1025 999 irq_q->q = dmam_alloc_coherent(eth->dev, size * sizeof(u32), 1026 1000 &dma_addr, GFP_KERNEL); 1027 1001 if (!irq_q->q) ··· 1028 1006 memset(irq_q->q, 0xff, size * sizeof(u32)); 1029 1007 irq_q->size = size; 1030 1008 irq_q->qdma = qdma; 1009 + 1010 + netif_napi_add_tx(eth->napi_dev, &irq_q->napi, 1011 + airoha_qdma_tx_napi_poll); 1031 1012 1032 1013 airoha_qdma_wr(qdma, REG_TX_IRQ_BASE(id), dma_addr); 1033 1014 airoha_qdma_rmw(qdma, REG_TX_IRQ_CFG(id), TX_IRQ_DEPTH_MASK, ··· 1064 1039 1065 1040 static void airoha_qdma_cleanup_tx_queue(struct airoha_queue *q) 1066 1041 { 1067 - struct airoha_eth *eth = q->qdma->eth; 1068 - int i; 1042 + struct airoha_qdma *qdma = q->qdma; 1043 + struct airoha_eth *eth = qdma->eth; 1044 + int i, qid = q - &qdma->q_tx[0]; 1045 + u16 index = 0; 1069 1046 1070 1047 spin_lock_bh(&q->lock); 1071 1048 for (i = 0; i < q->ndesc; i++) { 1072 1049 struct airoha_queue_entry *e = &q->entry[i]; 1050 + struct airoha_qdma_desc *desc = &q->desc[i]; 1073 1051 1074 1052 if (!e->dma_addr) 1075 1053 continue; ··· 1083 1055 e->dma_addr = 0; 1084 1056 e->skb = NULL; 1085 1057 list_add_tail(&e->list, &q->tx_list); 1058 + 1059 + /* Reset DMA descriptor */ 1060 + WRITE_ONCE(desc->ctrl, 0); 1061 + WRITE_ONCE(desc->addr, 0); 1062 + WRITE_ONCE(desc->data, 0); 1063 + WRITE_ONCE(desc->msg0, 0); 1064 + WRITE_ONCE(desc->msg1, 0); 1065 + WRITE_ONCE(desc->msg2, 0); 1066 + 1086 1067 q->queued--; 1087 1068 } 1069 + 1070 + if (!list_empty(&q->tx_list)) { 1071 + struct airoha_queue_entry *e; 1072 + 1073 + e = list_first_entry(&q->tx_list, struct airoha_queue_entry, 1074 + list); 1075 + index = e - q->entry; 1076 + } 1077 + /* Set TX_DMA_IDX to TX_CPU_IDX to notify the hw the QDMA TX ring is 1078 + * empty. 1079 + */ 1080 + airoha_qdma_rmw(qdma, REG_TX_CPU_IDX(qid), TX_RING_CPU_IDX_MASK, 1081 + FIELD_PREP(TX_RING_CPU_IDX_MASK, index)); 1082 + airoha_qdma_rmw(qdma, REG_TX_DMA_IDX(qid), TX_RING_DMA_IDX_MASK, 1083 + FIELD_PREP(TX_RING_DMA_IDX_MASK, index)); 1084 + 1088 1085 spin_unlock_bh(&q->lock); 1089 1086 } 1090 1087 ··· 1451 1398 } 1452 1399 } 1453 1400 1454 - for (i = 0; i < ARRAY_SIZE(qdma->q_tx_irq); i++) 1401 + for (i = 0; i < ARRAY_SIZE(qdma->q_tx_irq); i++) { 1402 + if (!qdma->q_tx_irq[i].size) 1403 + continue; 1404 + 1455 1405 netif_napi_del(&qdma->q_tx_irq[i].napi); 1406 + } 1456 1407 1457 1408 for (i = 0; i < ARRAY_SIZE(qdma->q_tx); i++) { 1458 1409 if (!qdma->q_tx[i].ndesc) ··· 1784 1727 { 1785 1728 struct airoha_eth *eth = port->qdma->eth; 1786 1729 u32 val, pse_port, chan; 1787 - int src_port; 1730 + int i, src_port; 1788 1731 1789 1732 /* Forward the traffic to the proper GDM port */ 1790 1733 pse_port = port->id == AIROHA_GDM3_IDX ? FE_PSE_PORT_GDM3 ··· 1826 1769 SP_CPORT_MASK(val), 1827 1770 __field_prep(SP_CPORT_MASK(val), FE_PSE_PORT_CDM2)); 1828 1771 1772 + for (i = 0; i < eth->soc->num_ppe; i++) 1773 + airoha_ppe_set_cpu_port(port, i, AIROHA_GDM2_IDX); 1774 + 1829 1775 if (port->id == AIROHA_GDM4_IDX && airoha_is_7581(eth)) { 1830 1776 u32 mask = FC_ID_OF_SRC_PORT_MASK(port->nbq); 1831 1777 ··· 1867 1807 } 1868 1808 1869 1809 for (i = 0; i < eth->soc->num_ppe; i++) 1870 - airoha_ppe_set_cpu_port(port, i); 1810 + airoha_ppe_set_cpu_port(port, i, 1811 + airoha_get_fe_port(port)); 1871 1812 1872 1813 return 0; 1873 1814 } ··· 2045 1984 if (q->queued + nr_frags >= q->ndesc) { 2046 1985 /* not enough space in the queue */ 2047 1986 netif_tx_stop_queue(txq); 1987 + q->txq_stopped = true; 2048 1988 spin_unlock_bh(&q->lock); 2049 1989 return NETDEV_TX_BUSY; 2050 1990 } ··· 2101 2039 TX_RING_CPU_IDX_MASK, 2102 2040 FIELD_PREP(TX_RING_CPU_IDX_MASK, index)); 2103 2041 2104 - if (q->ndesc - q->queued < q->free_thr) 2042 + if (q->ndesc - q->queued < q->free_thr) { 2105 2043 netif_tx_stop_queue(txq); 2044 + q->txq_stopped = true; 2045 + } 2106 2046 2107 2047 spin_unlock_bh(&q->lock); 2108 2048

+3 -1

drivers/net/ethernet/airoha/airoha_eth.h

··· 193 193 int ndesc; 194 194 int free_thr; 195 195 int buf_size; 196 + bool txq_stopped; 196 197 197 198 struct napi_struct napi; 198 199 struct page_pool *page_pool; ··· 654 653 bool airoha_is_valid_gdm_port(struct airoha_eth *eth, 655 654 struct airoha_gdm_port *port); 656 655 657 - void airoha_ppe_set_cpu_port(struct airoha_gdm_port *port, u8 ppe_id); 656 + void airoha_ppe_set_cpu_port(struct airoha_gdm_port *port, u8 ppe_id, 657 + u8 fport); 658 658 bool airoha_ppe_is_enabled(struct airoha_eth *eth, int index); 659 659 void airoha_ppe_check_skb(struct airoha_ppe_dev *dev, struct sk_buff *skb, 660 660 u16 hash, bool rx_wlan);

+31 -3

drivers/net/ethernet/airoha/airoha_ppe.c

··· 85 85 return FIELD_GET(AIROHA_FOE_IB1_BIND_TIMESTAMP, timestamp); 86 86 } 87 87 88 - void airoha_ppe_set_cpu_port(struct airoha_gdm_port *port, u8 ppe_id) 88 + void airoha_ppe_set_cpu_port(struct airoha_gdm_port *port, u8 ppe_id, u8 fport) 89 89 { 90 90 struct airoha_qdma *qdma = port->qdma; 91 - u8 fport = airoha_get_fe_port(port); 92 91 struct airoha_eth *eth = qdma->eth; 93 92 u8 qdma_id = qdma - &eth->qdma[0]; 94 93 u32 fe_cpu_port; ··· 181 182 if (!port) 182 183 continue; 183 184 184 - airoha_ppe_set_cpu_port(port, i); 185 + airoha_ppe_set_cpu_port(port, i, 186 + airoha_get_fe_port(port)); 185 187 } 186 188 } 187 189 } ··· 1356 1356 return npu; 1357 1357 } 1358 1358 1359 + static int airoha_ppe_wait_for_npu_init(struct airoha_eth *eth) 1360 + { 1361 + int err; 1362 + u32 val; 1363 + 1364 + /* PPE_FLOW_CFG default register value is 0. Since we reset FE 1365 + * during the device probe we can just check the configured value 1366 + * is not 0 here. 1367 + */ 1368 + err = read_poll_timeout(airoha_fe_rr, val, val, USEC_PER_MSEC, 1369 + 100 * USEC_PER_MSEC, false, eth, 1370 + REG_PPE_PPE_FLOW_CFG(0)); 1371 + if (err) 1372 + return err; 1373 + 1374 + if (airoha_ppe_is_enabled(eth, 1)) 1375 + err = read_poll_timeout(airoha_fe_rr, val, val, USEC_PER_MSEC, 1376 + 100 * USEC_PER_MSEC, false, eth, 1377 + REG_PPE_PPE_FLOW_CFG(1)); 1378 + 1379 + return err; 1380 + } 1381 + 1359 1382 static int airoha_ppe_offload_setup(struct airoha_eth *eth) 1360 1383 { 1361 1384 struct airoha_npu *npu = airoha_ppe_npu_get(eth); ··· 1389 1366 return PTR_ERR(npu); 1390 1367 1391 1368 err = npu->ops.ppe_init(npu); 1369 + if (err) 1370 + goto error_npu_put; 1371 + 1372 + /* Wait for NPU PPE configuration to complete */ 1373 + err = airoha_ppe_wait_for_npu_init(eth); 1392 1374 if (err) 1393 1375 goto error_npu_put; 1394 1376

+22 -8

drivers/net/ethernet/broadcom/bnge/bnge_core.c

··· 74 74 return rc; 75 75 } 76 76 77 + return 0; 78 + } 79 + 80 + static int bnge_func_qrcaps_qcfg(struct bnge_dev *bd) 81 + { 82 + int rc; 83 + 77 84 rc = bnge_hwrm_func_resc_qcaps(bd); 78 85 if (rc) { 79 86 dev_err(bd->dev, "query resc caps failure rc: %d\n", rc); ··· 140 133 141 134 bnge_hwrm_fw_set_time(bd); 142 135 143 - rc = bnge_hwrm_func_drv_rgtr(bd); 136 + /* Get the resources and configuration from firmware */ 137 + rc = bnge_func_qcaps(bd); 144 138 if (rc) { 145 - dev_err(bd->dev, "Failed to rgtr with firmware rc: %d\n", rc); 139 + dev_err(bd->dev, "Failed querying caps rc: %d\n", rc); 146 140 return rc; 147 141 } 148 142 149 143 rc = bnge_alloc_ctx_mem(bd); 150 144 if (rc) { 151 145 dev_err(bd->dev, "Failed to allocate ctx mem rc: %d\n", rc); 152 - goto err_func_unrgtr; 146 + goto err_free_ctx_mem; 153 147 } 154 148 155 - /* Get the resources and configuration from firmware */ 156 - rc = bnge_func_qcaps(bd); 149 + rc = bnge_hwrm_func_drv_rgtr(bd); 157 150 if (rc) { 158 - dev_err(bd->dev, "Failed initial configuration rc: %d\n", rc); 159 - rc = -ENODEV; 151 + dev_err(bd->dev, "Failed to rgtr with firmware rc: %d\n", rc); 152 + goto err_free_ctx_mem; 153 + } 154 + 155 + rc = bnge_func_qrcaps_qcfg(bd); 156 + if (rc) { 157 + dev_err(bd->dev, "Failed querying resources rc: %d\n", rc); 160 158 goto err_func_unrgtr; 161 159 } 162 160 ··· 170 158 return 0; 171 159 172 160 err_func_unrgtr: 173 - bnge_fw_unregister_dev(bd); 161 + bnge_hwrm_func_drv_unrgtr(bd); 162 + err_free_ctx_mem: 163 + bnge_free_ctx_mem(bd); 174 164 return rc; 175 165 } 176 166

-16

drivers/net/ethernet/broadcom/bnge/bnge_rmem.c

··· 324 324 u32 l2_qps, qp1_qps, max_qps; 325 325 u32 ena, entries_sp, entries; 326 326 u32 srqs, max_srqs, min; 327 - u32 num_mr, num_ah; 328 327 u32 extra_srqs = 0; 329 328 u32 extra_qps = 0; 330 329 u32 fast_qpmd_qps; ··· 388 389 389 390 if (!bnge_is_roce_en(bd)) 390 391 goto skip_rdma; 391 - 392 - ctxm = &ctx->ctx_arr[BNGE_CTX_MRAV]; 393 - /* 128K extra is needed to accommodate static AH context 394 - * allocation by f/w. 395 - */ 396 - num_mr = min_t(u32, ctxm->max_entries / 2, 1024 * 256); 397 - num_ah = min_t(u32, num_mr, 1024 * 128); 398 - ctxm->split_entry_cnt = BNGE_CTX_MRAV_AV_SPLIT_ENTRY + 1; 399 - if (!ctxm->mrav_av_entries || ctxm->mrav_av_entries > num_ah) 400 - ctxm->mrav_av_entries = num_ah; 401 - 402 - rc = bnge_setup_ctxm_pg_tbls(bd, ctxm, num_mr + num_ah, 2); 403 - if (rc) 404 - return rc; 405 - ena |= FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV; 406 392 407 393 ctxm = &ctx->ctx_arr[BNGE_CTX_TIM]; 408 394 rc = bnge_setup_ctxm_pg_tbls(bd, ctxm, l2_qps + qp1_qps + extra_qps, 1);

+31 -27

drivers/net/ethernet/broadcom/bnxt/bnxt.c

··· 11132 11132 return rc; 11133 11133 } 11134 11134 11135 - static int bnxt_cfg_rx_mode(struct bnxt *); 11136 - static bool bnxt_mc_list_updated(struct bnxt *, u32 *); 11135 + static int bnxt_cfg_rx_mode(struct bnxt *, struct netdev_hw_addr_list *, bool); 11136 + static bool bnxt_mc_list_updated(struct bnxt *, u32 *, 11137 + const struct netdev_hw_addr_list *); 11137 11138 11138 11139 static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) 11139 11140 { ··· 11224 11223 } else if (bp->dev->flags & IFF_MULTICAST) { 11225 11224 u32 mask = 0; 11226 11225 11227 - bnxt_mc_list_updated(bp, &mask); 11226 + bnxt_mc_list_updated(bp, &mask, &bp->dev->mc); 11228 11227 vnic->rx_mask |= mask; 11229 11228 } 11230 11229 11231 - rc = bnxt_cfg_rx_mode(bp); 11230 + rc = bnxt_cfg_rx_mode(bp, &bp->dev->uc, true); 11232 11231 if (rc) 11233 11232 goto err_out; 11234 11233 ··· 13623 13622 bnxt_get_one_ring_drv_stats(bp, stats, &bp->bnapi[i]->cp_ring); 13624 13623 } 13625 13624 13626 - static bool bnxt_mc_list_updated(struct bnxt *bp, u32 *rx_mask) 13625 + static bool bnxt_mc_list_updated(struct bnxt *bp, u32 *rx_mask, 13626 + const struct netdev_hw_addr_list *mc) 13627 13627 { 13628 13628 struct bnxt_vnic_info *vnic = &bp->vnic_info[BNXT_VNIC_DEFAULT]; 13629 - struct net_device *dev = bp->dev; 13630 13629 struct netdev_hw_addr *ha; 13631 13630 u8 *haddr; 13632 13631 int mc_count = 0; 13633 13632 bool update = false; 13634 13633 int off = 0; 13635 13634 13636 - netdev_for_each_mc_addr(ha, dev) { 13635 + netdev_hw_addr_list_for_each(ha, mc) { 13637 13636 if (mc_count >= BNXT_MAX_MC_ADDRS) { 13638 13637 *rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST; 13639 13638 vnic->mc_list_count = 0; ··· 13657 13656 return update; 13658 13657 } 13659 13658 13660 - static bool bnxt_uc_list_updated(struct bnxt *bp) 13659 + static bool bnxt_uc_list_updated(struct bnxt *bp, 13660 + const struct netdev_hw_addr_list *uc) 13661 13661 { 13662 - struct net_device *dev = bp->dev; 13663 13662 struct bnxt_vnic_info *vnic = &bp->vnic_info[BNXT_VNIC_DEFAULT]; 13664 13663 struct netdev_hw_addr *ha; 13665 13664 int off = 0; 13666 13665 13667 - if (netdev_uc_count(dev) != (vnic->uc_filter_count - 1)) 13666 + if (netdev_hw_addr_list_count(uc) != (vnic->uc_filter_count - 1)) 13668 13667 return true; 13669 13668 13670 - netdev_for_each_uc_addr(ha, dev) { 13669 + netdev_hw_addr_list_for_each(ha, uc) { 13671 13670 if (!ether_addr_equal(ha->addr, vnic->uc_list + off)) 13672 13671 return true; 13673 13672 ··· 13676 13675 return false; 13677 13676 } 13678 13677 13679 - static void bnxt_set_rx_mode(struct net_device *dev) 13678 + static void bnxt_set_rx_mode(struct net_device *dev, 13679 + struct netdev_hw_addr_list *uc, 13680 + struct netdev_hw_addr_list *mc) 13680 13681 { 13681 13682 struct bnxt *bp = netdev_priv(dev); 13682 13683 struct bnxt_vnic_info *vnic; ··· 13699 13696 if (dev->flags & IFF_PROMISC) 13700 13697 mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS; 13701 13698 13702 - uc_update = bnxt_uc_list_updated(bp); 13699 + uc_update = bnxt_uc_list_updated(bp, uc); 13703 13700 13704 13701 if (dev->flags & IFF_BROADCAST) 13705 13702 mask |= CFA_L2_SET_RX_MASK_REQ_MASK_BCAST; ··· 13707 13704 mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST; 13708 13705 vnic->mc_list_count = 0; 13709 13706 } else if (dev->flags & IFF_MULTICAST) { 13710 - mc_update = bnxt_mc_list_updated(bp, &mask); 13707 + mc_update = bnxt_mc_list_updated(bp, &mask, mc); 13711 13708 } 13712 13709 13713 13710 if (mask != vnic->rx_mask || uc_update || mc_update) { 13714 13711 vnic->rx_mask = mask; 13715 13712 13716 - bnxt_queue_sp_work(bp, BNXT_RX_MASK_SP_EVENT); 13713 + bnxt_cfg_rx_mode(bp, uc, uc_update); 13717 13714 } 13718 13715 } 13719 13716 13720 - static int bnxt_cfg_rx_mode(struct bnxt *bp) 13717 + static int bnxt_cfg_rx_mode(struct bnxt *bp, struct netdev_hw_addr_list *uc, 13718 + bool uc_update) 13721 13719 { 13722 13720 struct net_device *dev = bp->dev; 13723 13721 struct bnxt_vnic_info *vnic = &bp->vnic_info[BNXT_VNIC_DEFAULT]; 13724 13722 struct netdev_hw_addr *ha; 13725 13723 int i, off = 0, rc; 13726 - bool uc_update; 13727 - 13728 - netif_addr_lock_bh(dev); 13729 - uc_update = bnxt_uc_list_updated(bp); 13730 - netif_addr_unlock_bh(dev); 13731 13724 13732 13725 if (!uc_update) 13733 13726 goto skip_uc; ··· 13738 13739 vnic->uc_filter_count = 1; 13739 13740 13740 13741 netif_addr_lock_bh(dev); 13741 - if (netdev_uc_count(dev) > (BNXT_MAX_UC_ADDRS - 1)) { 13742 + if (netdev_hw_addr_list_count(uc) > (BNXT_MAX_UC_ADDRS - 1)) { 13742 13743 vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS; 13743 13744 } else { 13744 - netdev_for_each_uc_addr(ha, dev) { 13745 + netdev_hw_addr_list_for_each(ha, uc) { 13745 13746 memcpy(vnic->uc_list + off, ha->addr, ETH_ALEN); 13746 13747 off += ETH_ALEN; 13747 13748 vnic->uc_filter_count++; ··· 14707 14708 static void bnxt_sp_task(struct work_struct *work) 14708 14709 { 14709 14710 struct bnxt *bp = container_of(work, struct bnxt, sp_task); 14711 + struct net_device *dev = bp->dev; 14710 14712 14711 14713 set_bit(BNXT_STATE_IN_SP_TASK, &bp->state); 14712 14714 smp_mb__after_atomic(); ··· 14720 14720 bnxt_ulp_restart(bp); 14721 14721 bnxt_reenable_sriov(bp); 14722 14722 } 14723 - 14724 - if (test_and_clear_bit(BNXT_RX_MASK_SP_EVENT, &bp->sp_event)) 14725 - bnxt_cfg_rx_mode(bp); 14726 14723 14727 14724 if (test_and_clear_bit(BNXT_RX_NTP_FLTR_SP_EVENT, &bp->sp_event)) 14728 14725 bnxt_cfg_ntp_filters(bp); ··· 14785 14788 /* These functions below will clear BNXT_STATE_IN_SP_TASK. They 14786 14789 * must be the last functions to be called before exiting. 14787 14790 */ 14791 + if (test_and_clear_bit(BNXT_RX_MASK_SP_EVENT, &bp->sp_event)) { 14792 + bnxt_lock_sp(bp); 14793 + if (test_bit(BNXT_STATE_OPEN, &bp->state)) 14794 + bnxt_cfg_rx_mode(bp, &dev->uc, true); 14795 + bnxt_unlock_sp(bp); 14796 + } 14797 + 14788 14798 if (test_and_clear_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event)) 14789 14799 bnxt_reset(bp, false); 14790 14800 ··· 15993 15989 .ndo_start_xmit = bnxt_start_xmit, 15994 15990 .ndo_stop = bnxt_close, 15995 15991 .ndo_get_stats64 = bnxt_get_stats64, 15996 - .ndo_set_rx_mode = bnxt_set_rx_mode, 15992 + .ndo_set_rx_mode_async = bnxt_set_rx_mode, 15997 15993 .ndo_eth_ioctl = bnxt_ioctl, 15998 15994 .ndo_validate_addr = eth_validate_addr, 15999 15995 .ndo_set_mac_address = bnxt_change_mac_addr,

+137 -94

drivers/net/ethernet/freescale/enetc/ntmp.c

··· 7 7 #include <linux/dma-mapping.h> 8 8 #include <linux/fsl/netc_global.h> 9 9 #include <linux/iopoll.h> 10 + #include <linux/vmalloc.h> 10 11 11 12 #include "ntmp_private.h" 12 13 ··· 43 42 if (!cbdr->addr_base) 44 43 return -ENOMEM; 45 44 45 + cbdr->swcbd = vcalloc(cbd_num, sizeof(struct netc_swcbd)); 46 + if (!cbdr->swcbd) { 47 + dma_free_coherent(dev, size, cbdr->addr_base, cbdr->dma_base); 48 + return -ENOMEM; 49 + } 50 + 46 51 cbdr->dma_size = size; 47 52 cbdr->bd_num = cbd_num; 48 53 cbdr->regs = *regs; ··· 59 52 cbdr->addr_base_align = PTR_ALIGN(cbdr->addr_base, 60 53 NTMP_BASE_ADDR_ALIGN); 61 54 62 - spin_lock_init(&cbdr->ring_lock); 55 + mutex_init(&cbdr->ring_lock); 63 56 64 57 cbdr->next_to_use = netc_read(cbdr->regs.pir); 65 - cbdr->next_to_clean = netc_read(cbdr->regs.cir); 58 + cbdr->next_to_clean = netc_read(cbdr->regs.cir) & NETC_CBDRCIR_INDEX; 66 59 67 60 /* Step 1: Configure the base address of the Control BD Ring */ 68 61 netc_write(cbdr->regs.bar0, lower_32_bits(cbdr->dma_base_align)); ··· 78 71 } 79 72 EXPORT_SYMBOL_GPL(ntmp_init_cbdr); 80 73 74 + static void ntmp_free_data_mem(struct device *dev, struct netc_swcbd *swcbd) 75 + { 76 + if (unlikely(!swcbd->buf)) 77 + return; 78 + 79 + dma_free_coherent(dev, swcbd->size + NTMP_DATA_ADDR_ALIGN, 80 + swcbd->buf, swcbd->dma); 81 + } 82 + 81 83 void ntmp_free_cbdr(struct netc_cbdr *cbdr) 82 84 { 83 85 /* Disable the Control BD Ring */ 84 86 netc_write(cbdr->regs.mr, 0); 87 + 88 + for (int i = 0; i < cbdr->bd_num; i++) 89 + ntmp_free_data_mem(cbdr->dev, &cbdr->swcbd[i]); 90 + 91 + vfree(cbdr->swcbd); 85 92 dma_free_coherent(cbdr->dev, cbdr->dma_size, cbdr->addr_base, 86 93 cbdr->dma_base); 87 94 memset(cbdr, 0, sizeof(*cbdr)); ··· 115 94 116 95 static void ntmp_clean_cbdr(struct netc_cbdr *cbdr) 117 96 { 118 - union netc_cbd *cbd; 119 - int i; 97 + int i = cbdr->next_to_clean; 120 98 121 - i = cbdr->next_to_clean; 122 - while (netc_read(cbdr->regs.cir) != i) { 123 - cbd = ntmp_get_cbd(cbdr, i); 99 + while ((netc_read(cbdr->regs.cir) & NETC_CBDRCIR_INDEX) != i) { 100 + union netc_cbd *cbd = ntmp_get_cbd(cbdr, i); 101 + struct netc_swcbd *swcbd = &cbdr->swcbd[i]; 102 + 103 + ntmp_free_data_mem(cbdr->dev, swcbd); 104 + memset(swcbd, 0, sizeof(*swcbd)); 124 105 memset(cbd, 0, sizeof(*cbd)); 125 106 i = (i + 1) % cbdr->bd_num; 126 107 } 127 108 109 + dma_wmb(); 128 110 cbdr->next_to_clean = i; 129 111 } 130 112 131 - static int netc_xmit_ntmp_cmd(struct ntmp_user *user, union netc_cbd *cbd) 113 + static void ntmp_select_and_lock_cbdr(struct ntmp_user *user, 114 + struct netc_cbdr **cbdr) 115 + { 116 + /* Currently only ENETC is supported, and it has only one command 117 + * BD ring. 118 + */ 119 + *cbdr = &user->ring[0]; 120 + 121 + mutex_lock(&(*cbdr)->ring_lock); 122 + } 123 + 124 + static void ntmp_unlock_cbdr(struct netc_cbdr *cbdr) 125 + { 126 + mutex_unlock(&cbdr->ring_lock); 127 + } 128 + 129 + static int netc_xmit_ntmp_cmd(struct netc_cbdr *cbdr, union netc_cbd *cbd, 130 + struct netc_swcbd *swcbd) 132 131 { 133 132 union netc_cbd *cur_cbd; 134 - struct netc_cbdr *cbdr; 135 - int i, err; 133 + int i, err, used_bds; 136 134 u16 status; 137 135 u32 val; 138 136 139 - /* Currently only i.MX95 ENETC is supported, and it only has one 140 - * command BD ring 141 - */ 142 - cbdr = &user->ring[0]; 143 - 144 - spin_lock_bh(&cbdr->ring_lock); 145 - 146 - if (unlikely(!ntmp_get_free_cbd_num(cbdr))) 137 + used_bds = cbdr->bd_num - ntmp_get_free_cbd_num(cbdr); 138 + if (unlikely(used_bds >= NETC_CBDR_CLEAN_WORK)) { 147 139 ntmp_clean_cbdr(cbdr); 140 + if (unlikely(!ntmp_get_free_cbd_num(cbdr))) { 141 + ntmp_free_data_mem(cbdr->dev, swcbd); 142 + return -EBUSY; 143 + } 144 + } 148 145 149 146 i = cbdr->next_to_use; 150 147 cur_cbd = ntmp_get_cbd(cbdr, i); 151 148 *cur_cbd = *cbd; 149 + cbdr->swcbd[i] = *swcbd; 152 150 dma_wmb(); 153 151 154 152 /* Update producer index of both software and hardware */ ··· 175 135 cbdr->next_to_use = i; 176 136 netc_write(cbdr->regs.pir, i); 177 137 178 - err = read_poll_timeout_atomic(netc_read, val, val == i, 179 - NETC_CBDR_DELAY_US, NETC_CBDR_TIMEOUT, 180 - true, cbdr->regs.cir); 138 + err = read_poll_timeout(netc_read, val, 139 + (val & NETC_CBDRCIR_INDEX) == i, 140 + NETC_CBDR_DELAY_US, NETC_CBDR_TIMEOUT, 141 + true, cbdr->regs.cir); 181 142 if (unlikely(err)) 182 - goto cbdr_unlock; 143 + return err; 144 + 145 + if (unlikely(val & NETC_CBDRCIR_SBE)) { 146 + dev_err(cbdr->dev, "Command BD system bus error\n"); 147 + return -EIO; 148 + } 183 149 184 150 dma_rmb(); 185 151 /* Get the writeback command BD, because the caller may need ··· 196 150 /* Check the writeback error status */ 197 151 status = le16_to_cpu(cbd->resp_hdr.error_rr) & NTMP_RESP_ERROR; 198 152 if (unlikely(status)) { 199 - err = -EIO; 200 - dev_err(user->dev, "Command BD error: 0x%04x\n", status); 153 + dev_err(cbdr->dev, "Command BD error: 0x%04x\n", status); 154 + return -EIO; 201 155 } 202 - 203 - ntmp_clean_cbdr(cbdr); 204 - dma_wmb(); 205 - 206 - cbdr_unlock: 207 - spin_unlock_bh(&cbdr->ring_lock); 208 - 209 - return err; 210 - } 211 - 212 - static int ntmp_alloc_data_mem(struct ntmp_dma_buf *data, void **buf_align) 213 - { 214 - void *buf; 215 - 216 - buf = dma_alloc_coherent(data->dev, data->size + NTMP_DATA_ADDR_ALIGN, 217 - &data->dma, GFP_KERNEL); 218 - if (!buf) 219 - return -ENOMEM; 220 - 221 - data->buf = buf; 222 - *buf_align = PTR_ALIGN(buf, NTMP_DATA_ADDR_ALIGN); 223 156 224 157 return 0; 225 158 } 226 159 227 - static void ntmp_free_data_mem(struct ntmp_dma_buf *data) 160 + static int ntmp_alloc_data_mem(struct device *dev, struct netc_swcbd *swcbd, 161 + void **buf_align) 228 162 { 229 - dma_free_coherent(data->dev, data->size + NTMP_DATA_ADDR_ALIGN, 230 - data->buf, data->dma); 163 + void *buf; 164 + 165 + buf = dma_alloc_coherent(dev, swcbd->size + NTMP_DATA_ADDR_ALIGN, 166 + &swcbd->dma, GFP_KERNEL); 167 + if (!buf) 168 + return -ENOMEM; 169 + 170 + swcbd->buf = buf; 171 + *buf_align = PTR_ALIGN(buf, NTMP_DATA_ADDR_ALIGN); 172 + 173 + return 0; 231 174 } 232 175 233 176 static void ntmp_fill_request_hdr(union netc_cbd *cbd, dma_addr_t dma, ··· 269 234 u8 tbl_ver, u32 entry_id, u32 req_len, 270 235 u32 resp_len) 271 236 { 272 - struct ntmp_dma_buf data = { 273 - .dev = user->dev, 237 + struct netc_swcbd swcbd = { 274 238 .size = max(req_len, resp_len), 275 239 }; 276 240 struct ntmp_req_by_eid *req; 241 + struct netc_cbdr *cbdr; 277 242 union netc_cbd cbd; 278 243 int err; 279 244 280 - err = ntmp_alloc_data_mem(&data, (void **)&req); 245 + err = ntmp_alloc_data_mem(user->dev, &swcbd, (void **)&req); 281 246 if (err) 282 247 return err; 283 248 284 249 ntmp_fill_crd_eid(req, tbl_ver, 0, 0, entry_id); 285 - ntmp_fill_request_hdr(&cbd, data.dma, NTMP_LEN(req_len, resp_len), 250 + ntmp_fill_request_hdr(&cbd, swcbd.dma, NTMP_LEN(req_len, resp_len), 286 251 tbl_id, NTMP_CMD_DELETE, NTMP_AM_ENTRY_ID); 287 252 288 - err = netc_xmit_ntmp_cmd(user, &cbd); 253 + ntmp_select_and_lock_cbdr(user, &cbdr); 254 + err = netc_xmit_ntmp_cmd(cbdr, &cbd, &swcbd); 289 255 if (err) 290 256 dev_err(user->dev, 291 257 "Failed to delete entry 0x%x of %s, err: %pe", 292 258 entry_id, ntmp_table_name(tbl_id), ERR_PTR(err)); 293 - 294 - ntmp_free_data_mem(&data); 259 + ntmp_unlock_cbdr(cbdr); 295 260 296 261 return err; 297 262 } 298 263 299 - static int ntmp_query_entry_by_id(struct ntmp_user *user, int tbl_id, 300 - u32 len, struct ntmp_req_by_eid *req, 301 - dma_addr_t dma, bool compare_eid) 264 + static int ntmp_query_entry_by_id(struct netc_cbdr *cbdr, int tbl_id, 265 + struct ntmp_req_by_eid *req, 266 + struct netc_swcbd *swcbd, 267 + bool compare_eid) 302 268 { 269 + u32 len = NTMP_LEN(sizeof(*req), swcbd->size); 303 270 struct ntmp_cmn_resp_query *resp; 304 271 int cmd = NTMP_CMD_QUERY; 305 272 union netc_cbd cbd; ··· 313 276 cmd = NTMP_CMD_QU; 314 277 315 278 /* Request header */ 316 - ntmp_fill_request_hdr(&cbd, dma, len, tbl_id, cmd, NTMP_AM_ENTRY_ID); 317 - err = netc_xmit_ntmp_cmd(user, &cbd); 279 + ntmp_fill_request_hdr(&cbd, swcbd->dma, len, tbl_id, cmd, 280 + NTMP_AM_ENTRY_ID); 281 + err = netc_xmit_ntmp_cmd(cbdr, &cbd, swcbd); 318 282 if (err) { 319 - dev_err(user->dev, 283 + dev_err(cbdr->dev, 320 284 "Failed to query entry 0x%x of %s, err: %pe\n", 321 285 entry_id, ntmp_table_name(tbl_id), ERR_PTR(err)); 322 286 return err; ··· 331 293 332 294 resp = (struct ntmp_cmn_resp_query *)req; 333 295 if (unlikely(le32_to_cpu(resp->entry_id) != entry_id)) { 334 - dev_err(user->dev, 296 + dev_err(cbdr->dev, 335 297 "%s: query EID 0x%x doesn't match response EID 0x%x\n", 336 298 ntmp_table_name(tbl_id), entry_id, le32_to_cpu(resp->entry_id)); 337 299 return -EIO; ··· 343 305 int ntmp_maft_add_entry(struct ntmp_user *user, u32 entry_id, 344 306 struct maft_entry_data *maft) 345 307 { 346 - struct ntmp_dma_buf data = { 347 - .dev = user->dev, 308 + struct netc_swcbd swcbd = { 348 309 .size = sizeof(struct maft_req_add), 349 310 }; 350 311 struct maft_req_add *req; 312 + struct netc_cbdr *cbdr; 351 313 union netc_cbd cbd; 352 314 int err; 353 315 354 - err = ntmp_alloc_data_mem(&data, (void **)&req); 316 + err = ntmp_alloc_data_mem(user->dev, &swcbd, (void **)&req); 355 317 if (err) 356 318 return err; 357 319 ··· 360 322 req->keye = maft->keye; 361 323 req->cfge = maft->cfge; 362 324 363 - ntmp_fill_request_hdr(&cbd, data.dma, NTMP_LEN(data.size, 0), 325 + ntmp_fill_request_hdr(&cbd, swcbd.dma, NTMP_LEN(swcbd.size, 0), 364 326 NTMP_MAFT_ID, NTMP_CMD_ADD, NTMP_AM_ENTRY_ID); 365 - err = netc_xmit_ntmp_cmd(user, &cbd); 327 + 328 + ntmp_select_and_lock_cbdr(user, &cbdr); 329 + err = netc_xmit_ntmp_cmd(cbdr, &cbd, &swcbd); 366 330 if (err) 367 331 dev_err(user->dev, "Failed to add MAFT entry 0x%x, err: %pe\n", 368 332 entry_id, ERR_PTR(err)); 369 - 370 - ntmp_free_data_mem(&data); 333 + ntmp_unlock_cbdr(cbdr); 371 334 372 335 return err; 373 336 } ··· 377 338 int ntmp_maft_query_entry(struct ntmp_user *user, u32 entry_id, 378 339 struct maft_entry_data *maft) 379 340 { 380 - struct ntmp_dma_buf data = { 381 - .dev = user->dev, 341 + struct netc_swcbd swcbd = { 382 342 .size = sizeof(struct maft_resp_query), 383 343 }; 384 344 struct maft_resp_query *resp; 385 345 struct ntmp_req_by_eid *req; 346 + struct netc_cbdr *cbdr; 386 347 int err; 387 348 388 - err = ntmp_alloc_data_mem(&data, (void **)&req); 349 + err = ntmp_alloc_data_mem(user->dev, &swcbd, (void **)&req); 389 350 if (err) 390 351 return err; 391 352 392 353 ntmp_fill_crd_eid(req, user->tbl.maft_ver, 0, 0, entry_id); 393 - err = ntmp_query_entry_by_id(user, NTMP_MAFT_ID, 394 - NTMP_LEN(sizeof(*req), data.size), 395 - req, data.dma, true); 354 + 355 + ntmp_select_and_lock_cbdr(user, &cbdr); 356 + err = ntmp_query_entry_by_id(cbdr, NTMP_MAFT_ID, req, &swcbd, true); 396 357 if (err) 397 - goto end; 358 + goto unlock_cbdr; 398 359 399 360 resp = (struct maft_resp_query *)req; 400 361 maft->keye = resp->keye; 401 362 maft->cfge = resp->cfge; 402 363 403 - end: 404 - ntmp_free_data_mem(&data); 364 + unlock_cbdr: 365 + ntmp_unlock_cbdr(cbdr); 405 366 406 367 return err; 407 368 } ··· 417 378 int ntmp_rsst_update_entry(struct ntmp_user *user, const u32 *table, 418 379 int count) 419 380 { 420 - struct ntmp_dma_buf data = {.dev = user->dev}; 421 381 struct rsst_req_update *req; 382 + struct netc_swcbd swcbd; 383 + struct netc_cbdr *cbdr; 422 384 union netc_cbd cbd; 423 385 int err, i; 424 386 ··· 427 387 /* HW only takes in a full 64 entry table */ 428 388 return -EINVAL; 429 389 430 - data.size = struct_size(req, groups, count); 431 - err = ntmp_alloc_data_mem(&data, (void **)&req); 390 + swcbd.size = struct_size(req, groups, count); 391 + err = ntmp_alloc_data_mem(user->dev, &swcbd, (void **)&req); 432 392 if (err) 433 393 return err; 434 394 ··· 438 398 for (i = 0; i < count; i++) 439 399 req->groups[i] = (u8)(table[i]); 440 400 441 - ntmp_fill_request_hdr(&cbd, data.dma, NTMP_LEN(data.size, 0), 401 + ntmp_fill_request_hdr(&cbd, swcbd.dma, NTMP_LEN(swcbd.size, 0), 442 402 NTMP_RSST_ID, NTMP_CMD_UPDATE, NTMP_AM_ENTRY_ID); 443 403 444 - err = netc_xmit_ntmp_cmd(user, &cbd); 404 + ntmp_select_and_lock_cbdr(user, &cbdr); 405 + err = netc_xmit_ntmp_cmd(cbdr, &cbd, &swcbd); 445 406 if (err) 446 407 dev_err(user->dev, "Failed to update RSST entry, err: %pe\n", 447 408 ERR_PTR(err)); 448 - 449 - ntmp_free_data_mem(&data); 409 + ntmp_unlock_cbdr(cbdr); 450 410 451 411 return err; 452 412 } ··· 454 414 455 415 int ntmp_rsst_query_entry(struct ntmp_user *user, u32 *table, int count) 456 416 { 457 - struct ntmp_dma_buf data = {.dev = user->dev}; 458 417 struct ntmp_req_by_eid *req; 418 + struct netc_swcbd swcbd; 419 + struct netc_cbdr *cbdr; 459 420 union netc_cbd cbd; 460 421 int err, i; 461 422 u8 *group; ··· 465 424 /* HW only takes in a full 64 entry table */ 466 425 return -EINVAL; 467 426 468 - data.size = NTMP_ENTRY_ID_SIZE + RSST_STSE_DATA_SIZE(count) + 469 - RSST_CFGE_DATA_SIZE(count); 470 - err = ntmp_alloc_data_mem(&data, (void **)&req); 427 + swcbd.size = NTMP_ENTRY_ID_SIZE + RSST_STSE_DATA_SIZE(count) + 428 + RSST_CFGE_DATA_SIZE(count); 429 + err = ntmp_alloc_data_mem(user->dev, &swcbd, (void **)&req); 471 430 if (err) 472 431 return err; 473 432 474 433 /* Set the request data buffer */ 475 434 ntmp_fill_crd_eid(req, user->tbl.rsst_ver, 0, 0, 0); 476 - ntmp_fill_request_hdr(&cbd, data.dma, NTMP_LEN(sizeof(*req), data.size), 435 + ntmp_fill_request_hdr(&cbd, swcbd.dma, NTMP_LEN(sizeof(*req), swcbd.size), 477 436 NTMP_RSST_ID, NTMP_CMD_QUERY, NTMP_AM_ENTRY_ID); 478 - err = netc_xmit_ntmp_cmd(user, &cbd); 437 + 438 + ntmp_select_and_lock_cbdr(user, &cbdr); 439 + err = netc_xmit_ntmp_cmd(cbdr, &cbd, &swcbd); 479 440 if (err) { 480 441 dev_err(user->dev, "Failed to query RSST entry, err: %pe\n", 481 442 ERR_PTR(err)); 482 - goto end; 443 + goto unlock_cbdr; 483 444 } 484 445 485 446 group = (u8 *)req; ··· 489 446 for (i = 0; i < count; i++) 490 447 table[i] = group[i]; 491 448 492 - end: 493 - ntmp_free_data_mem(&data); 449 + unlock_cbdr: 450 + ntmp_unlock_cbdr(cbdr); 494 451 495 452 return err; 496 453 }

+3 -7

drivers/net/ethernet/freescale/enetc/ntmp_private.h

··· 12 12 13 13 #define NTMP_EID_REQ_LEN 8 14 14 #define NETC_CBDR_BD_NUM 256 15 + #define NETC_CBDRCIR_INDEX GENMASK(9, 0) 16 + #define NETC_CBDRCIR_SBE BIT(31) 17 + #define NETC_CBDR_CLEAN_WORK 16 15 18 16 19 union netc_cbd { 17 20 struct { ··· 55 52 #define NTMP_RESP_RR BIT(15) 56 53 __le32 resv1[4]; 57 54 } resp_hdr; /* NTMP Response Message Header Format */ 58 - }; 59 - 60 - struct ntmp_dma_buf { 61 - struct device *dev; 62 - size_t size; 63 - void *buf; 64 - dma_addr_t dma; 65 55 }; 66 56 67 57 struct ntmp_cmn_req_data {

+1

drivers/net/ethernet/intel/e1000e/netdev.c

··· 7706 7706 err_register: 7707 7707 if (!(adapter->flags & FLAG_HAS_AMT)) 7708 7708 e1000e_release_hw_control(adapter); 7709 + e1000e_ptp_remove(adapter); 7709 7710 err_eeprom: 7710 7711 if (hw->phy.ops.check_reset_block && !hw->phy.ops.check_reset_block(hw)) 7711 7712 e1000_phy_hw_reset(&adapter->hw);

-1

drivers/net/ethernet/intel/i40e/i40e_main.c

··· 13783 13783 netdev->neigh_priv_len = sizeof(u32) * 4; 13784 13784 13785 13785 netdev->priv_flags |= IFF_UNICAST_FLT; 13786 - netdev->priv_flags |= IFF_SUPP_NOFCS; 13787 13786 /* Setup netdev TC information */ 13788 13787 i40e_vsi_config_netdev_tc(vsi, vsi->tc_config.enabled_tc); 13789 13788

+11 -5

drivers/net/ethernet/intel/iavf/iavf_main.c

··· 1150 1150 /** 1151 1151 * iavf_set_rx_mode - NDO callback to set the netdev filters 1152 1152 * @netdev: network interface device structure 1153 + * @uc: snapshot of uc address list 1154 + * @mc: snapshot of mc address list 1153 1155 **/ 1154 - static void iavf_set_rx_mode(struct net_device *netdev) 1156 + static void iavf_set_rx_mode(struct net_device *netdev, 1157 + struct netdev_hw_addr_list *uc, 1158 + struct netdev_hw_addr_list *mc) 1155 1159 { 1156 1160 struct iavf_adapter *adapter = netdev_priv(netdev); 1157 1161 1158 1162 spin_lock_bh(&adapter->mac_vlan_list_lock); 1159 - __dev_uc_sync(netdev, iavf_addr_sync, iavf_addr_unsync); 1160 - __dev_mc_sync(netdev, iavf_addr_sync, iavf_addr_unsync); 1163 + __hw_addr_sync_dev(uc, netdev, iavf_addr_sync, iavf_addr_unsync); 1164 + __hw_addr_sync_dev(mc, netdev, iavf_addr_sync, iavf_addr_unsync); 1161 1165 spin_unlock_bh(&adapter->mac_vlan_list_lock); 1162 1166 1163 1167 spin_lock_bh(&adapter->current_netdev_promisc_flags_lock); ··· 1214 1210 struct net_device *netdev = adapter->netdev; 1215 1211 int i; 1216 1212 1217 - iavf_set_rx_mode(netdev); 1213 + netif_addr_lock_bh(netdev); 1214 + iavf_set_rx_mode(netdev, &netdev->uc, &netdev->mc); 1215 + netif_addr_unlock_bh(netdev); 1218 1216 1219 1217 iavf_configure_tx(adapter); 1220 1218 iavf_configure_rx(adapter); ··· 5159 5153 .ndo_open = iavf_open, 5160 5154 .ndo_stop = iavf_close, 5161 5155 .ndo_start_xmit = iavf_xmit_frame, 5162 - .ndo_set_rx_mode = iavf_set_rx_mode, 5156 + .ndo_set_rx_mode_async = iavf_set_rx_mode, 5163 5157 .ndo_validate_addr = eth_validate_addr, 5164 5158 .ndo_set_mac_address = iavf_set_mac, 5165 5159 .ndo_change_mtu = iavf_change_mtu,

+1 -1

drivers/net/ethernet/intel/iavf/iavf_type.h

··· 277 277 /* L2 Tag 2 Presence */ 278 278 #define IAVF_RXD_LEGACY_L2TAG2P_M BIT(0) 279 279 /* Stripped S-TAG VLAN from the receive packet */ 280 - #define IAVF_RXD_LEGACY_L2TAG2_M GENMASK_ULL(63, 32) 280 + #define IAVF_RXD_LEGACY_L2TAG2_M GENMASK_ULL(63, 48) 281 281 /* Stripped S-TAG VLAN from the receive packet */ 282 282 #define IAVF_RXD_FLEX_L2TAG2_2_M GENMASK_ULL(63, 48) 283 283 /* The packet is a UDP tunneled packet */

+2 -2

drivers/net/ethernet/intel/ice/ice.h

··· 753 753 754 754 static inline void ice_set_ring_xdp(struct ice_tx_ring *ring) 755 755 { 756 - ring->flags |= ICE_TX_FLAGS_RING_XDP; 756 + set_bit(ICE_TX_RING_FLAGS_XDP, ring->flags); 757 757 } 758 758 759 759 /** ··· 778 778 */ 779 779 static inline bool ice_is_txtime_cfg(const struct ice_tx_ring *ring) 780 780 { 781 - return !!(ring->flags & ICE_TX_FLAGS_TXTIME); 781 + return test_bit(ICE_TX_RING_FLAGS_TXTIME, ring->flags); 782 782 } 783 783 784 784 /**

+1 -1

drivers/net/ethernet/intel/ice/ice_adminq_cmd.h

··· 1252 1252 #define ICE_AQ_LINK_PWR_QSFP_CLASS_3 2 1253 1253 #define ICE_AQ_LINK_PWR_QSFP_CLASS_4 3 1254 1254 __le16 link_speed; 1255 - #define ICE_AQ_LINK_SPEED_M 0x7FF 1255 + #define ICE_AQ_LINK_SPEED_M GENMASK(11, 0) 1256 1256 #define ICE_AQ_LINK_SPEED_10MB BIT(0) 1257 1257 #define ICE_AQ_LINK_SPEED_100MB BIT(1) 1258 1258 #define ICE_AQ_LINK_SPEED_1000MB BIT(2)

+1 -1

drivers/net/ethernet/intel/ice/ice_dcb_lib.c

··· 943 943 /* if this is not already set it means a VLAN 0 + priority needs 944 944 * to be offloaded 945 945 */ 946 - if (tx_ring->flags & ICE_TX_FLAGS_RING_VLAN_L2TAG2) 946 + if (test_bit(ICE_TX_RING_FLAGS_VLAN_L2TAG2, tx_ring->flags)) 947 947 first->tx_flags |= ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN; 948 948 else 949 949 first->tx_flags |= ICE_TX_FLAGS_HW_VLAN;

+1

drivers/net/ethernet/intel/ice/ice_ethtool.c

··· 3290 3290 tx_rings[i].desc = NULL; 3291 3291 tx_rings[i].tx_buf = NULL; 3292 3292 tx_rings[i].tstamp_ring = NULL; 3293 + clear_bit(ICE_TX_RING_FLAGS_TXTIME, tx_rings[i].flags); 3293 3294 tx_rings[i].tx_tstamps = &pf->ptp.port.tx; 3294 3295 err = ice_setup_tx_ring(&tx_rings[i]); 3295 3296 if (err) {

+2 -2

drivers/net/ethernet/intel/ice/ice_lib.c

··· 1412 1412 ring->count = vsi->num_tx_desc; 1413 1413 ring->txq_teid = ICE_INVAL_TEID; 1414 1414 if (dvm_ena) 1415 - ring->flags |= ICE_TX_FLAGS_RING_VLAN_L2TAG2; 1415 + set_bit(ICE_TX_RING_FLAGS_VLAN_L2TAG2, ring->flags); 1416 1416 else 1417 - ring->flags |= ICE_TX_FLAGS_RING_VLAN_L2TAG1; 1417 + set_bit(ICE_TX_RING_FLAGS_VLAN_L2TAG1, ring->flags); 1418 1418 WRITE_ONCE(vsi->tx_rings[i], ring); 1419 1419 } 1420 1420

+27 -94

drivers/net/ethernet/intel/ice/ice_main.c

··· 1923 1923 } 1924 1924 1925 1925 /** 1926 - * ice_force_phys_link_state - Force the physical link state 1927 - * @vsi: VSI to force the physical link state to up/down 1928 - * @link_up: true/false indicates to set the physical link to up/down 1929 - * 1930 - * Force the physical link state by getting the current PHY capabilities from 1931 - * hardware and setting the PHY config based on the determined capabilities. If 1932 - * link changes a link event will be triggered because both the Enable Automatic 1933 - * Link Update and LESM Enable bits are set when setting the PHY capabilities. 1934 - * 1935 - * Returns 0 on success, negative on failure 1936 - */ 1937 - static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up) 1938 - { 1939 - struct ice_aqc_get_phy_caps_data *pcaps; 1940 - struct ice_aqc_set_phy_cfg_data *cfg; 1941 - struct ice_port_info *pi; 1942 - struct device *dev; 1943 - int retcode; 1944 - 1945 - if (!vsi || !vsi->port_info || !vsi->back) 1946 - return -EINVAL; 1947 - if (vsi->type != ICE_VSI_PF) 1948 - return 0; 1949 - 1950 - dev = ice_pf_to_dev(vsi->back); 1951 - 1952 - pi = vsi->port_info; 1953 - 1954 - pcaps = kzalloc_obj(*pcaps); 1955 - if (!pcaps) 1956 - return -ENOMEM; 1957 - 1958 - retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps, 1959 - NULL); 1960 - if (retcode) { 1961 - dev_err(dev, "Failed to get phy capabilities, VSI %d error %d\n", 1962 - vsi->vsi_num, retcode); 1963 - retcode = -EIO; 1964 - goto out; 1965 - } 1966 - 1967 - /* No change in link */ 1968 - if (link_up == !!(pcaps->caps & ICE_AQC_PHY_EN_LINK) && 1969 - link_up == !!(pi->phy.link_info.link_info & ICE_AQ_LINK_UP)) 1970 - goto out; 1971 - 1972 - /* Use the current user PHY configuration. The current user PHY 1973 - * configuration is initialized during probe from PHY capabilities 1974 - * software mode, and updated on set PHY configuration. 1975 - */ 1976 - cfg = kmemdup(&pi->phy.curr_user_phy_cfg, sizeof(*cfg), GFP_KERNEL); 1977 - if (!cfg) { 1978 - retcode = -ENOMEM; 1979 - goto out; 1980 - } 1981 - 1982 - cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT; 1983 - if (link_up) 1984 - cfg->caps |= ICE_AQ_PHY_ENA_LINK; 1985 - else 1986 - cfg->caps &= ~ICE_AQ_PHY_ENA_LINK; 1987 - 1988 - retcode = ice_aq_set_phy_cfg(&vsi->back->hw, pi, cfg, NULL); 1989 - if (retcode) { 1990 - dev_err(dev, "Failed to set phy config, VSI %d error %d\n", 1991 - vsi->vsi_num, retcode); 1992 - retcode = -EIO; 1993 - } 1994 - 1995 - kfree(cfg); 1996 - out: 1997 - kfree(pcaps); 1998 - return retcode; 1999 - } 2000 - 2001 - /** 2002 1926 * ice_init_nvm_phy_type - Initialize the NVM PHY type 2003 1927 * @pi: port info structure 2004 1928 * ··· 1990 2066 * first time media is available. The ICE_LINK_DEFAULT_OVERRIDE_PENDING state 1991 2067 * is used to indicate that the user PHY cfg default override is initialized 1992 2068 * and the PHY has not been configured with the default override settings. The 1993 - * state is set here, and cleared in ice_configure_phy the first time the PHY is 2069 + * state is set here, and cleared in ice_phy_cfg the first time the PHY is 1994 2070 * configured. 1995 2071 * 1996 2072 * This function should be called only if the FW doesn't support default ··· 2096 2172 } 2097 2173 2098 2174 /** 2099 - * ice_configure_phy - configure PHY 2175 + * ice_phy_cfg - configure PHY 2100 2176 * @vsi: VSI of PHY 2177 + * @link_en: true/false indicates to set link to enable/disable 2101 2178 * 2102 2179 * Set the PHY configuration. If the current PHY configuration is the same as 2103 - * the curr_user_phy_cfg, then do nothing to avoid link flap. Otherwise 2104 - * configure the based get PHY capabilities for topology with media. 2180 + * the curr_user_phy_cfg and link_en hasn't changed, then do nothing to avoid 2181 + * link flap. Otherwise configure the PHY based get PHY capabilities for 2182 + * topology with media and link_en. 2183 + * 2184 + * Return: 0 on success, negative on failure 2105 2185 */ 2106 - static int ice_configure_phy(struct ice_vsi *vsi) 2186 + static int ice_phy_cfg(struct ice_vsi *vsi, bool link_en) 2107 2187 { 2108 2188 struct device *dev = ice_pf_to_dev(vsi->back); 2109 2189 struct ice_port_info *pi = vsi->port_info; ··· 2127 2199 phy->link_info.topo_media_conflict == ICE_AQ_LINK_TOPO_UNSUPP_MEDIA) 2128 2200 return -EPERM; 2129 2201 2130 - if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) 2131 - return ice_force_phys_link_state(vsi, true); 2132 - 2133 2202 pcaps = kzalloc_obj(*pcaps); 2134 2203 if (!pcaps) 2135 2204 return -ENOMEM; ··· 2140 2215 goto done; 2141 2216 } 2142 2217 2143 - /* If PHY enable link is configured and configuration has not changed, 2144 - * there's nothing to do 2145 - */ 2146 - if (pcaps->caps & ICE_AQC_PHY_EN_LINK && 2218 + /* Configuration has not changed. There's nothing to do. */ 2219 + if (link_en == !!(pcaps->caps & ICE_AQC_PHY_EN_LINK) && 2147 2220 ice_phy_caps_equals_cfg(pcaps, &phy->curr_user_phy_cfg)) 2148 2221 goto done; 2149 2222 ··· 2205 2282 */ 2206 2283 ice_cfg_phy_fc(pi, cfg, phy->curr_user_fc_req); 2207 2284 2208 - /* Enable link and link update */ 2209 - cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT | ICE_AQ_PHY_ENA_LINK; 2285 + /* Enable/Disable link and link update */ 2286 + cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT; 2287 + if (link_en) 2288 + cfg->caps |= ICE_AQ_PHY_ENA_LINK; 2289 + else 2290 + cfg->caps &= ~ICE_AQ_PHY_ENA_LINK; 2210 2291 2211 2292 err = ice_aq_set_phy_cfg(&pf->hw, pi, cfg, NULL); 2212 2293 if (err) ··· 2263 2336 test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) 2264 2337 return; 2265 2338 2266 - err = ice_configure_phy(vsi); 2339 + err = ice_phy_cfg(vsi, true); 2267 2340 if (!err) 2268 2341 clear_bit(ICE_FLAG_NO_MEDIA, pf->flags); 2269 2342 ··· 4819 4892 4820 4893 if (!test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) { 4821 4894 struct ice_vsi *vsi = ice_get_main_vsi(pf); 4895 + struct ice_link_default_override_tlv *ldo; 4896 + bool link_en; 4897 + 4898 + ldo = &pf->link_dflt_override; 4899 + link_en = !(ldo->options & 4900 + ICE_LINK_OVERRIDE_AUTO_LINK_DIS); 4822 4901 4823 4902 if (vsi) 4824 - ice_configure_phy(vsi); 4903 + ice_phy_cfg(vsi, link_en); 4825 4904 } 4826 4905 } else { 4827 4906 set_bit(ICE_FLAG_NO_MEDIA, pf->flags); ··· 9640 9707 } 9641 9708 } 9642 9709 9643 - err = ice_configure_phy(vsi); 9710 + err = ice_phy_cfg(vsi, true); 9644 9711 if (err) { 9645 9712 netdev_err(netdev, "Failed to set physical link up, error %d\n", 9646 9713 err); ··· 9681 9748 } 9682 9749 9683 9750 if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) { 9684 - int link_err = ice_force_phys_link_state(vsi, false); 9751 + int link_err = ice_phy_cfg(vsi, false); 9685 9752 9686 9753 if (link_err) { 9687 9754 if (link_err == -ENOMEDIUM)

+18 -26

drivers/net/ethernet/intel/ice/ice_ptp.c

··· 2710 2710 bool ice_ptp_tx_tstamps_pending(struct ice_pf *pf) 2711 2711 { 2712 2712 struct ice_hw *hw = &pf->hw; 2713 - unsigned int i; 2713 + int ret; 2714 2714 2715 2715 /* Check software indicator */ 2716 2716 switch (pf->ptp.tx_interrupt_mode) { ··· 2731 2731 } 2732 2732 2733 2733 /* Check hardware indicator */ 2734 - for (i = 0; i < ICE_GET_QUAD_NUM(hw->ptp.num_lports); i++) { 2735 - u64 tstamp_ready = 0; 2736 - int err; 2737 - 2738 - err = ice_get_phy_tx_tstamp_ready(&pf->hw, i, &tstamp_ready); 2739 - if (err || tstamp_ready) 2740 - return true; 2734 + ret = ice_check_phy_tx_tstamp_ready(hw); 2735 + if (ret < 0) { 2736 + dev_dbg(ice_pf_to_dev(pf), "Unable to read PHY Tx timestamp ready bitmap, err %d\n", 2737 + ret); 2738 + /* Stop triggering IRQs if we're unable to read PHY */ 2739 + return false; 2741 2740 } 2742 2741 2743 - return false; 2742 + /* ice_check_phy_tx_tstamp_ready() returns 1 if there are timestamps 2743 + * available, 0 if there are no waiting timestamps, and a negative 2744 + * value if there was an error (which we checked for above). 2745 + */ 2746 + return ret > 0; 2744 2747 } 2745 2748 2746 2749 /** ··· 2827 2824 { 2828 2825 struct device *dev = ice_pf_to_dev(pf); 2829 2826 struct ice_hw *hw = &pf->hw; 2830 - bool trigger_oicr = false; 2831 - unsigned int i; 2827 + int ret; 2832 2828 2833 2829 if (!pf->ptp.port.tx.has_ready_bitmap) 2834 2830 return; ··· 2835 2833 if (!ice_pf_src_tmr_owned(pf)) 2836 2834 return; 2837 2835 2838 - for (i = 0; i < ICE_GET_QUAD_NUM(hw->ptp.num_lports); i++) { 2839 - u64 tstamp_ready; 2840 - int err; 2841 - 2842 - err = ice_get_phy_tx_tstamp_ready(&pf->hw, i, &tstamp_ready); 2843 - if (!err && tstamp_ready) { 2844 - trigger_oicr = true; 2845 - break; 2846 - } 2847 - } 2848 - 2849 - if (trigger_oicr) { 2850 - /* Trigger a software interrupt, to ensure this data 2851 - * gets processed. 2852 - */ 2836 + ret = ice_check_phy_tx_tstamp_ready(hw); 2837 + if (ret < 0) { 2838 + dev_dbg(dev, "PTP periodic task unable to read PHY timestamp ready bitmap, err %d\n", 2839 + ret); 2840 + } else if (ret) { 2853 2841 dev_dbg(dev, "PTP periodic task detected waiting timestamps. Triggering Tx timestamp interrupt now.\n"); 2854 2842 2855 2843 wr32(hw, PFINT_OICR, PFINT_OICR_TSYN_TX_M);

+6 -6

drivers/net/ethernet/intel/ice/ice_ptp_consts.h

··· 78 78 .blktime = 0x666, /* 3.2 */ 79 79 .tx_offset = { 80 80 .serdes = 0x234c, /* 17.6484848 */ 81 - .no_fec = 0x8e80, /* 71.25 */ 81 + .no_fec = 0x93d9, /* 73 */ 82 82 .fc = 0xb4a4, /* 90.32 */ 83 83 .sfd = 0x4a4, /* 2.32 */ 84 84 .onestep = 0x4ccd /* 38.4 */ 85 85 }, 86 86 .rx_offset = { 87 87 .serdes = 0xffffeb27, /* -10.42424 */ 88 - .no_fec = 0xffffcccd, /* -25.6 */ 88 + .no_fec = 0xffffc7b6, /* -28 */ 89 89 .fc = 0xfffc557b, /* -469.26 */ 90 90 .sfd = 0x4a4, /* 2.32 */ 91 91 .bs_ds = 0x32 /* 0.0969697 */ ··· 118 118 .mktime = 0x147b, /* 10.24, only if RS-FEC enabled */ 119 119 .tx_offset = { 120 120 .serdes = 0xe1e, /* 7.0593939 */ 121 - .no_fec = 0x3857, /* 28.17 */ 121 + .no_fec = 0x4266, /* 33 */ 122 122 .fc = 0x48c3, /* 36.38 */ 123 - .rs = 0x8100, /* 64.5 */ 123 + .rs = 0x8a00, /* 69 */ 124 124 .sfd = 0x1dc, /* 0.93 */ 125 125 .onestep = 0x1eb8 /* 15.36 */ 126 126 }, 127 127 .rx_offset = { 128 128 .serdes = 0xfffff7a9, /* -4.1697 */ 129 - .no_fec = 0xffffe71a, /* -12.45 */ 129 + .no_fec = 0xffffe700, /* -12 */ 130 130 .fc = 0xfffe894d, /* -187.35 */ 131 - .rs = 0xfffff8cd, /* -3.6 */ 131 + .rs = 0xfffff8cc, /* -3 */ 132 132 .sfd = 0x1dc, /* 0.93 */ 133 133 .bs_ds = 0x14 /* 0.0387879, RS-FEC 0 */ 134 134 }

+249 -10

drivers/net/ethernet/intel/ice/ice_ptp_hw.c

··· 378 378 */ 379 379 380 380 /** 381 + * ice_ptp_init_phc_e825c - Perform E825C specific PHC initialization 382 + * @hw: pointer to HW struct 383 + * 384 + * Perform E825C-specific PTP hardware clock initialization steps. 385 + * 386 + * Return: 0 on success, or a negative error value on failure. 387 + */ 388 + static int ice_ptp_init_phc_e825c(struct ice_hw *hw) 389 + { 390 + int err; 391 + 392 + /* Soft reset all ports, to ensure everything is at a clean state */ 393 + for (int port = 0; port < hw->ptp.num_lports; port++) { 394 + err = ice_ptp_phy_soft_reset_eth56g(hw, port); 395 + if (err) { 396 + ice_debug(hw, ICE_DBG_PTP, "Failed to soft reset port %d, err %d\n", 397 + port, err); 398 + return err; 399 + } 400 + } 401 + 402 + return 0; 403 + } 404 + 405 + /** 381 406 * ice_ptp_get_dest_dev_e825 - get destination PHY for given port number 382 407 * @hw: pointer to the HW struct 383 408 * @port: destination port ··· 1872 1847 * @ena: enable or disable interrupt 1873 1848 * @threshold: interrupt threshold 1874 1849 * 1850 + * The threshold cannot be 0 while the interrupt is enabled. 1851 + * 1875 1852 * Configure TX timestamp interrupt for the specified port 1876 1853 * 1877 1854 * Return: ··· 1885 1858 int err; 1886 1859 u32 val; 1887 1860 1861 + if (ena && !threshold) 1862 + return -EINVAL; 1863 + 1888 1864 err = ice_read_ptp_reg_eth56g(hw, port, PHY_REG_TS_INT_CONFIG, &val); 1889 1865 if (err) 1890 1866 return err; 1891 1867 1868 + val &= ~PHY_TS_INT_CONFIG_ENA_M; 1892 1869 if (ena) { 1893 - val |= PHY_TS_INT_CONFIG_ENA_M; 1894 1870 val &= ~PHY_TS_INT_CONFIG_THRESHOLD_M; 1895 1871 val |= FIELD_PREP(PHY_TS_INT_CONFIG_THRESHOLD_M, threshold); 1896 - } else { 1897 - val &= ~PHY_TS_INT_CONFIG_ENA_M; 1872 + err = ice_write_ptp_reg_eth56g(hw, port, PHY_REG_TS_INT_CONFIG, 1873 + val); 1874 + if (err) { 1875 + ice_debug(hw, ICE_DBG_PTP, 1876 + "Failed to update 'threshold' PHY_REG_TS_INT_CONFIG port=%u ena=%u threshold=%u\n", 1877 + port, !!ena, threshold); 1878 + return err; 1879 + } 1880 + val |= PHY_TS_INT_CONFIG_ENA_M; 1898 1881 } 1899 1882 1900 - return ice_write_ptp_reg_eth56g(hw, port, PHY_REG_TS_INT_CONFIG, val); 1883 + err = ice_write_ptp_reg_eth56g(hw, port, PHY_REG_TS_INT_CONFIG, val); 1884 + if (err) { 1885 + ice_debug(hw, ICE_DBG_PTP, 1886 + "Failed to update 'ena' PHY_REG_TS_INT_CONFIG port=%u ena=%u threshold=%u\n", 1887 + port, !!ena, threshold); 1888 + return err; 1889 + } 1890 + 1891 + err = ice_read_ptp_reg_eth56g(hw, port, PHY_REG_TS_INT_CONFIG, &val); 1892 + if (err) { 1893 + ice_debug(hw, ICE_DBG_PTP, 1894 + "Failed to read PHY_REG_TS_INT_CONFIG port=%u ena=%u threshold=%u\n", 1895 + port, !!ena, threshold); 1896 + return err; 1897 + } 1898 + 1899 + return 0; 1901 1900 } 1902 1901 1903 1902 /** ··· 2169 2116 } 2170 2117 2171 2118 /** 2119 + * ice_check_phy_tx_tstamp_ready_eth56g - Check Tx memory status for all ports 2120 + * @hw: pointer to the HW struct 2121 + * 2122 + * Check the PHY_REG_TX_MEMORY_STATUS for all ports. A set bit indicates 2123 + * a waiting timestamp. 2124 + * 2125 + * Return: 1 if any port has at least one timestamp ready bit set, 2126 + * 0 otherwise, and a negative error code if unable to read the bitmap. 2127 + */ 2128 + static int ice_check_phy_tx_tstamp_ready_eth56g(struct ice_hw *hw) 2129 + { 2130 + int port; 2131 + 2132 + for (port = 0; port < hw->ptp.num_lports; port++) { 2133 + u64 tstamp_ready; 2134 + int err; 2135 + 2136 + err = ice_get_phy_tx_tstamp_ready(hw, port, &tstamp_ready); 2137 + if (err) 2138 + return err; 2139 + 2140 + if (tstamp_ready) 2141 + return 1; 2142 + } 2143 + 2144 + return 0; 2145 + } 2146 + 2147 + /** 2172 2148 * ice_ptp_read_tx_hwtstamp_status_eth56g - Get TX timestamp status 2173 2149 * @hw: pointer to the HW struct 2174 2150 * @ts_status: the timestamp mask pointer ··· 2219 2137 *ts_status = 0; 2220 2138 2221 2139 for (phy = 0; phy < params->num_phys; phy++) { 2140 + u8 port; 2222 2141 int err; 2223 2142 2224 - err = ice_read_phy_eth56g(hw, phy, PHY_PTP_INT_STATUS, &status); 2143 + /* ice_read_phy_eth56g expects a port index, so use the first 2144 + * port of the PHY 2145 + */ 2146 + port = phy * hw->ptp.ports_per_phy; 2147 + 2148 + err = ice_read_phy_eth56g(hw, port, PHY_PTP_INT_STATUS, &status); 2225 2149 if (err) 2226 2150 return err; 2227 2151 2228 - *ts_status |= (status & mask) << (phy * hw->ptp.ports_per_phy); 2152 + *ts_status |= (status & mask) << port; 2229 2153 } 2230 2154 2231 2155 ice_debug(hw, ICE_DBG_PTP, "PHY interrupt err: %x\n", *ts_status); 2232 2156 2233 2157 return 0; 2158 + } 2159 + 2160 + /** 2161 + * ice_ptp_phy_soft_reset_eth56g - Perform a PHY soft reset on ETH56G 2162 + * @hw: pointer to the HW structure 2163 + * @port: PHY port number 2164 + * 2165 + * Trigger a soft reset of the ETH56G PHY by toggling the soft reset 2166 + * bit in the PHY global register. The reset sequence consists of: 2167 + * 1. Clearing the soft reset bit 2168 + * 2. Asserting the soft reset bit 2169 + * 3. Clearing the soft reset bit again 2170 + * 2171 + * Short delays are inserted between each step to allow the hardware 2172 + * to settle. This provides a controlled way to reinitialize the PHY 2173 + * without requiring a full device reset. 2174 + * 2175 + * Return: 0 on success, or a negative error code on failure when 2176 + * reading or writing the PHY register. 2177 + */ 2178 + int ice_ptp_phy_soft_reset_eth56g(struct ice_hw *hw, u8 port) 2179 + { 2180 + u32 global_val; 2181 + int err; 2182 + 2183 + err = ice_read_ptp_reg_eth56g(hw, port, PHY_REG_GLOBAL, &global_val); 2184 + if (err) { 2185 + ice_debug(hw, ICE_DBG_PTP, "Failed to read PHY_REG_GLOBAL for port %d, err %d\n", 2186 + port, err); 2187 + return err; 2188 + } 2189 + 2190 + global_val &= ~PHY_REG_GLOBAL_SOFT_RESET_M; 2191 + ice_debug(hw, ICE_DBG_PTP, "Clearing soft reset bit for port %d, val: 0x%x\n", 2192 + port, global_val); 2193 + err = ice_write_ptp_reg_eth56g(hw, port, PHY_REG_GLOBAL, global_val); 2194 + if (err) { 2195 + ice_debug(hw, ICE_DBG_PTP, "Failed to write PHY_REG_GLOBAL for port %d, err %d\n", 2196 + port, err); 2197 + return err; 2198 + } 2199 + 2200 + usleep_range(5000, 6000); 2201 + 2202 + global_val |= PHY_REG_GLOBAL_SOFT_RESET_M; 2203 + ice_debug(hw, ICE_DBG_PTP, "Set soft reset bit for port %d, val: 0x%x\n", 2204 + port, global_val); 2205 + err = ice_write_ptp_reg_eth56g(hw, port, PHY_REG_GLOBAL, global_val); 2206 + if (err) { 2207 + ice_debug(hw, ICE_DBG_PTP, "Failed to write PHY_REG_GLOBAL for port %d, err %d\n", 2208 + port, err); 2209 + return err; 2210 + } 2211 + usleep_range(5000, 6000); 2212 + 2213 + global_val &= ~PHY_REG_GLOBAL_SOFT_RESET_M; 2214 + ice_debug(hw, ICE_DBG_PTP, "Clear soft reset bit for port %d, val: 0x%x\n", 2215 + port, global_val); 2216 + err = ice_write_ptp_reg_eth56g(hw, port, PHY_REG_GLOBAL, global_val); 2217 + if (err) 2218 + ice_debug(hw, ICE_DBG_PTP, "Failed to write PHY_REG_GLOBAL for port %d, err %d\n", 2219 + port, err); 2220 + return err; 2234 2221 } 2235 2222 2236 2223 /** ··· 4354 4203 } 4355 4204 4356 4205 /** 4206 + * ice_check_phy_tx_tstamp_ready_e82x - Check Tx memory status for all quads 4207 + * @hw: pointer to the HW struct 4208 + * 4209 + * Check the Q_REG_TX_MEMORY_STATUS for all quads. A set bit indicates 4210 + * a waiting timestamp. 4211 + * 4212 + * Return: 1 if any quad has at least one timestamp ready bit set, 4213 + * 0 otherwise, and a negative error value if unable to read the bitmap. 4214 + */ 4215 + static int ice_check_phy_tx_tstamp_ready_e82x(struct ice_hw *hw) 4216 + { 4217 + int quad; 4218 + 4219 + for (quad = 0; quad < ICE_GET_QUAD_NUM(hw->ptp.num_lports); quad++) { 4220 + u64 tstamp_ready; 4221 + int err; 4222 + 4223 + err = ice_get_phy_tx_tstamp_ready(hw, quad, &tstamp_ready); 4224 + if (err) 4225 + return err; 4226 + 4227 + if (tstamp_ready) 4228 + return 1; 4229 + } 4230 + 4231 + return 0; 4232 + } 4233 + 4234 + /** 4357 4235 * ice_phy_cfg_intr_e82x - Configure TX timestamp interrupt 4358 4236 * @hw: pointer to the HW struct 4359 4237 * @quad: the timestamp quad ··· 4935 4755 return 0; 4936 4756 } 4937 4757 4758 + /** 4759 + * ice_check_phy_tx_tstamp_ready_e810 - Check Tx memory status register 4760 + * @hw: pointer to the HW struct 4761 + * 4762 + * The E810 devices do not have a Tx memory status register. Note this is 4763 + * intentionally different behavior from ice_get_phy_tx_tstamp_ready_e810 4764 + * which always says that all bits are ready. This function is called in cases 4765 + * where code will trigger interrupts if timestamps are waiting, and should 4766 + * not be called for E810 hardware. 4767 + * 4768 + * Return: 0. 4769 + */ 4770 + static int ice_check_phy_tx_tstamp_ready_e810(struct ice_hw *hw) 4771 + { 4772 + return 0; 4773 + } 4774 + 4938 4775 /* E810 SMA functions 4939 4776 * 4940 4777 * The following functions operate specifically on E810 hardware and are used ··· 5204 5007 *tstamp_ready = rd32(hw, E830_PRTMAC_TS_TX_MEM_VALID_H); 5205 5008 *tstamp_ready <<= 32; 5206 5009 *tstamp_ready |= rd32(hw, E830_PRTMAC_TS_TX_MEM_VALID_L); 5010 + } 5011 + 5012 + /** 5013 + * ice_check_phy_tx_tstamp_ready_e830 - Check Tx memory status register 5014 + * @hw: pointer to the HW struct 5015 + * 5016 + * Return: 1 if the device has waiting timestamps, 0 otherwise. 5017 + */ 5018 + static int ice_check_phy_tx_tstamp_ready_e830(struct ice_hw *hw) 5019 + { 5020 + u64 tstamp_ready; 5021 + 5022 + ice_get_phy_tx_tstamp_ready_e830(hw, 0, &tstamp_ready); 5023 + 5024 + return !!tstamp_ready; 5207 5025 } 5208 5026 5209 5027 /** ··· 5593 5381 */ 5594 5382 int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj) 5595 5383 { 5384 + int err = 0; 5596 5385 u8 tmr_idx; 5597 - int err; 5598 5386 5599 5387 tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; 5600 5388 ··· 5611 5399 err = ice_ptp_prep_phy_adj_e810(hw, adj); 5612 5400 break; 5613 5401 case ICE_MAC_E830: 5614 - /* E830 sync PHYs automatically after setting GLTSYN_SHADJ */ 5615 - return 0; 5402 + /* E830 sync PHYs automatically after setting cmd register */ 5403 + break; 5616 5404 case ICE_MAC_GENERIC: 5617 5405 err = ice_ptp_prep_phy_adj_e82x(hw, adj); 5618 5406 break; ··· 5776 5564 case ICE_MAC_GENERIC: 5777 5565 return ice_ptp_init_phc_e82x(hw); 5778 5566 case ICE_MAC_GENERIC_3K_E825: 5779 - return 0; 5567 + return ice_ptp_init_phc_e825c(hw); 5780 5568 default: 5781 5569 return -EOPNOTSUPP; 5782 5570 } ··· 5808 5596 case ICE_MAC_GENERIC_3K_E825: 5809 5597 return ice_get_phy_tx_tstamp_ready_eth56g(hw, block, 5810 5598 tstamp_ready); 5599 + default: 5600 + return -EOPNOTSUPP; 5601 + } 5602 + } 5603 + 5604 + /** 5605 + * ice_check_phy_tx_tstamp_ready - Check PHY Tx timestamp memory status 5606 + * @hw: pointer to the HW struct 5607 + * 5608 + * Check the PHY for Tx timestamp memory status on all ports. If you need to 5609 + * see individual timestamp status for each index, use 5610 + * ice_get_phy_tx_tstamp_ready() instead. 5611 + * 5612 + * Return: 1 if any port has timestamps available, 0 if there are no timestamps 5613 + * available, and a negative error code on failure. 5614 + */ 5615 + int ice_check_phy_tx_tstamp_ready(struct ice_hw *hw) 5616 + { 5617 + switch (hw->mac_type) { 5618 + case ICE_MAC_E810: 5619 + return ice_check_phy_tx_tstamp_ready_e810(hw); 5620 + case ICE_MAC_E830: 5621 + return ice_check_phy_tx_tstamp_ready_e830(hw); 5622 + case ICE_MAC_GENERIC: 5623 + return ice_check_phy_tx_tstamp_ready_e82x(hw); 5624 + case ICE_MAC_GENERIC_3K_E825: 5625 + return ice_check_phy_tx_tstamp_ready_eth56g(hw); 5811 5626 default: 5812 5627 return -EOPNOTSUPP; 5813 5628 }

+5

drivers/net/ethernet/intel/ice/ice_ptp_hw.h

··· 300 300 int ice_ptp_init_phc(struct ice_hw *hw); 301 301 void ice_ptp_init_hw(struct ice_hw *hw); 302 302 int ice_get_phy_tx_tstamp_ready(struct ice_hw *hw, u8 block, u64 *tstamp_ready); 303 + int ice_check_phy_tx_tstamp_ready(struct ice_hw *hw); 303 304 int ice_ptp_one_port_cmd(struct ice_hw *hw, u8 configured_port, 304 305 enum ice_ptp_tmr_cmd configured_cmd); 305 306 ··· 375 374 int ice_start_phy_timer_eth56g(struct ice_hw *hw, u8 port); 376 375 int ice_phy_cfg_intr_eth56g(struct ice_hw *hw, u8 port, bool ena, u8 threshold); 377 376 int ice_phy_cfg_ptp_1step_eth56g(struct ice_hw *hw, u8 port); 377 + int ice_ptp_phy_soft_reset_eth56g(struct ice_hw *hw, u8 port); 378 378 379 379 #define ICE_ETH56G_NOMINAL_INCVAL 0x140000000ULL 380 380 #define ICE_ETH56G_NOMINAL_PCS_REF_TUS 0x100000000ULL ··· 678 676 #define ICE_P0_GNSS_PRSNT_N BIT(4) 679 677 680 678 /* ETH56G PHY register addresses */ 679 + #define PHY_REG_GLOBAL 0x0 680 + #define PHY_REG_GLOBAL_SOFT_RESET_M BIT(11) 681 + 681 682 /* Timestamp PHY incval registers */ 682 683 #define PHY_REG_TIMETUS_L 0x8 683 684 #define PHY_REG_TIMETUS_U 0xC

+2

drivers/net/ethernet/intel/ice/ice_sf_eth.c

··· 305 305 306 306 aux_dev_uninit: 307 307 auxiliary_device_uninit(&sf_dev->adev); 308 + return err; 309 + 308 310 sf_dev_free: 309 311 kfree(sf_dev); 310 312 xa_erase:

+20 -9

drivers/net/ethernet/intel/ice/ice_txrx.c

··· 190 190 void ice_free_tx_tstamp_ring(struct ice_tx_ring *tx_ring) 191 191 { 192 192 ice_free_tstamp_ring(tx_ring); 193 + clear_bit(ICE_TX_RING_FLAGS_TXTIME, tx_ring->flags); 194 + smp_wmb(); /* order flag clear before pointer NULL */ 193 195 kfree_rcu(tx_ring->tstamp_ring, rcu); 194 - tx_ring->tstamp_ring = NULL; 195 - tx_ring->flags &= ~ICE_TX_FLAGS_TXTIME; 196 + WRITE_ONCE(tx_ring->tstamp_ring, NULL); 196 197 } 197 198 198 199 /** ··· 406 405 tx_ring->tstamp_ring = tstamp_ring; 407 406 tstamp_ring->desc = NULL; 408 407 tstamp_ring->count = ice_calc_ts_ring_count(tx_ring); 409 - tx_ring->flags |= ICE_TX_FLAGS_TXTIME; 408 + set_bit(ICE_TX_RING_FLAGS_TXTIME, tx_ring->flags); 410 409 return 0; 411 410 } 412 411 ··· 1522 1521 return; 1523 1522 1524 1523 if (ice_is_txtime_cfg(tx_ring)) { 1525 - struct ice_tstamp_ring *tstamp_ring = tx_ring->tstamp_ring; 1526 - u32 tstamp_count = tstamp_ring->count; 1527 - u32 j = tstamp_ring->next_to_use; 1524 + struct ice_tstamp_ring *tstamp_ring; 1525 + u32 tstamp_count, j; 1528 1526 struct ice_ts_desc *ts_desc; 1529 1527 struct timespec64 ts; 1530 1528 u32 tstamp; 1529 + 1530 + smp_rmb(); /* order flag read before pointer read */ 1531 + tstamp_ring = READ_ONCE(tx_ring->tstamp_ring); 1532 + if (unlikely(!tstamp_ring)) 1533 + goto ring_kick; 1534 + 1535 + tstamp_count = tstamp_ring->count; 1536 + j = tstamp_ring->next_to_use; 1531 1537 1532 1538 ts = ktime_to_timespec64(first->skb->tstamp); 1533 1539 tstamp = ts.tv_nsec >> ICE_TXTIME_CTX_RESOLUTION_128NS; ··· 1563 1555 tstamp_ring->next_to_use = j; 1564 1556 writel_relaxed(j, tstamp_ring->tail); 1565 1557 } else { 1558 + ring_kick: 1566 1559 writel_relaxed(i, tx_ring->tail); 1567 1560 } 1568 1561 return; ··· 1823 1814 */ 1824 1815 if (skb_vlan_tag_present(skb)) { 1825 1816 first->vid = skb_vlan_tag_get(skb); 1826 - if (tx_ring->flags & ICE_TX_FLAGS_RING_VLAN_L2TAG2) 1817 + if (test_bit(ICE_TX_RING_FLAGS_VLAN_L2TAG2, tx_ring->flags)) 1827 1818 first->tx_flags |= ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN; 1828 1819 else 1829 1820 first->tx_flags |= ICE_TX_FLAGS_HW_VLAN; ··· 2167 2158 2168 2159 ice_trace(xmit_frame_ring, tx_ring, skb); 2169 2160 2161 + /* record the location of the first descriptor for this packet */ 2162 + first = &tx_ring->tx_buf[tx_ring->next_to_use]; 2163 + 2170 2164 count = ice_xmit_desc_count(skb); 2171 2165 if (ice_chk_linearize(skb, count)) { 2172 2166 if (__skb_linearize(skb)) ··· 2195 2183 2196 2184 offload.tx_ring = tx_ring; 2197 2185 2198 - /* record the location of the first descriptor for this packet */ 2199 - first = &tx_ring->tx_buf[tx_ring->next_to_use]; 2200 2186 first->skb = skb; 2201 2187 first->type = ICE_TX_BUF_SKB; 2202 2188 first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN); ··· 2259 2249 out_drop: 2260 2250 ice_trace(xmit_frame_ring_drop, tx_ring, skb); 2261 2251 dev_kfree_skb_any(skb); 2252 + first->type = ICE_TX_BUF_EMPTY; 2262 2253 return NETDEV_TX_OK; 2263 2254 } 2264 2255

+10 -6

drivers/net/ethernet/intel/ice/ice_txrx.h

··· 212 212 ICE_RX_DTYPE_SPLIT_ALWAYS = 2, 213 213 }; 214 214 215 + enum ice_tx_ring_flags { 216 + ICE_TX_RING_FLAGS_XDP, 217 + ICE_TX_RING_FLAGS_VLAN_L2TAG1, 218 + ICE_TX_RING_FLAGS_VLAN_L2TAG2, 219 + ICE_TX_RING_FLAGS_TXTIME, 220 + ICE_TX_RING_FLAGS_NBITS, 221 + }; 222 + 215 223 struct ice_pkt_ctx { 216 224 u64 cached_phctime; 217 225 __be16 vlan_proto; ··· 360 352 u16 count; /* Number of descriptors */ 361 353 u16 q_index; /* Queue number of ring */ 362 354 363 - u8 flags; 364 - #define ICE_TX_FLAGS_RING_XDP BIT(0) 365 - #define ICE_TX_FLAGS_RING_VLAN_L2TAG1 BIT(1) 366 - #define ICE_TX_FLAGS_RING_VLAN_L2TAG2 BIT(2) 367 - #define ICE_TX_FLAGS_TXTIME BIT(3) 355 + DECLARE_BITMAP(flags, ICE_TX_RING_FLAGS_NBITS); 368 356 369 357 struct xsk_buff_pool *xsk_pool; 370 358 ··· 402 398 403 399 static inline bool ice_ring_is_xdp(struct ice_tx_ring *ring) 404 400 { 405 - return !!(ring->flags & ICE_TX_FLAGS_RING_XDP); 401 + return test_bit(ICE_TX_RING_FLAGS_XDP, ring->flags); 406 402 } 407 403 408 404 enum ice_container_type {

+4 -1

drivers/net/ethernet/mellanox/mlx5/core/en/fs.h

··· 201 201 void mlx5e_remove_vlan_trap(struct mlx5e_flow_steering *fs); 202 202 int mlx5e_add_mac_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num); 203 203 void mlx5e_remove_mac_trap(struct mlx5e_flow_steering *fs); 204 - void mlx5e_fs_set_rx_mode_work(struct mlx5e_flow_steering *fs, struct net_device *netdev); 204 + void mlx5e_fs_set_rx_mode_work(struct mlx5e_flow_steering *fs, 205 + struct net_device *netdev, 206 + struct netdev_hw_addr_list *uc, 207 + struct netdev_hw_addr_list *mc); 205 208 int mlx5e_fs_vlan_rx_add_vid(struct mlx5e_flow_steering *fs, 206 209 struct net_device *netdev, 207 210 __be16 proto, u16 vid);

+21 -11

drivers/net/ethernet/mellanox/mlx5/core/en_fs.c

··· 609 609 } 610 610 611 611 static void mlx5e_sync_netdev_addr(struct mlx5e_flow_steering *fs, 612 - struct net_device *netdev) 612 + struct net_device *netdev, 613 + struct netdev_hw_addr_list *uc, 614 + struct netdev_hw_addr_list *mc) 613 615 { 614 616 struct netdev_hw_addr *ha; 615 617 616 - netif_addr_lock_bh(netdev); 618 + if (!uc || !mc) { 619 + netif_addr_lock_bh(netdev); 620 + mlx5e_sync_netdev_addr(fs, netdev, &netdev->uc, &netdev->mc); 621 + netif_addr_unlock_bh(netdev); 622 + return; 623 + } 617 624 618 625 mlx5e_add_l2_to_hash(fs->l2.netdev_uc, netdev->dev_addr); 619 - netdev_for_each_uc_addr(ha, netdev) 626 + 627 + netdev_hw_addr_list_for_each(ha, uc) 620 628 mlx5e_add_l2_to_hash(fs->l2.netdev_uc, ha->addr); 621 629 622 - netdev_for_each_mc_addr(ha, netdev) 630 + netdev_hw_addr_list_for_each(ha, mc) 623 631 mlx5e_add_l2_to_hash(fs->l2.netdev_mc, ha->addr); 624 - 625 - netif_addr_unlock_bh(netdev); 626 632 } 627 633 628 634 static void mlx5e_fill_addr_array(struct mlx5e_flow_steering *fs, int list_type, ··· 730 724 } 731 725 732 726 static void mlx5e_handle_netdev_addr(struct mlx5e_flow_steering *fs, 733 - struct net_device *netdev) 727 + struct net_device *netdev, 728 + struct netdev_hw_addr_list *uc, 729 + struct netdev_hw_addr_list *mc) 734 730 { 735 731 struct mlx5e_l2_hash_node *hn; 736 732 struct hlist_node *tmp; ··· 744 736 hn->action = MLX5E_ACTION_DEL; 745 737 746 738 if (fs->state_destroy) 747 - mlx5e_sync_netdev_addr(fs, netdev); 739 + mlx5e_sync_netdev_addr(fs, netdev, uc, mc); 748 740 749 741 mlx5e_apply_netdev_addr(fs); 750 742 } ··· 828 820 } 829 821 830 822 void mlx5e_fs_set_rx_mode_work(struct mlx5e_flow_steering *fs, 831 - struct net_device *netdev) 823 + struct net_device *netdev, 824 + struct netdev_hw_addr_list *uc, 825 + struct netdev_hw_addr_list *mc) 832 826 { 833 827 struct mlx5e_priv *priv = netdev_priv(netdev); 834 828 struct mlx5e_l2_table *ea = &fs->l2; 835 829 836 830 if (mlx5e_is_uplink_rep(priv)) { 837 - mlx5e_handle_netdev_addr(fs, netdev); 831 + mlx5e_handle_netdev_addr(fs, netdev, uc, mc); 838 832 goto update_vport_context; 839 833 } 840 834 ··· 866 856 if (enable_broadcast) 867 857 mlx5e_add_l2_flow_rule(fs, &ea->broadcast, MLX5E_FULLMATCH); 868 858 869 - mlx5e_handle_netdev_addr(fs, netdev); 859 + mlx5e_handle_netdev_addr(fs, netdev, uc, mc); 870 860 871 861 if (disable_broadcast) 872 862 mlx5e_del_l2_flow_rule(fs, &ea->broadcast);

+9 -4

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

··· 4145 4145 queue_work(priv->wq, &priv->set_rx_mode_work); 4146 4146 } 4147 4147 4148 - static void mlx5e_set_rx_mode(struct net_device *dev) 4148 + static void mlx5e_set_rx_mode(struct net_device *dev, 4149 + struct netdev_hw_addr_list *uc, 4150 + struct netdev_hw_addr_list *mc) 4149 4151 { 4150 4152 struct mlx5e_priv *priv = netdev_priv(dev); 4151 4153 4152 - mlx5e_nic_set_rx_mode(priv); 4154 + mlx5e_fs_set_rx_mode_work(priv->fs, dev, uc, mc); 4153 4155 } 4154 4156 4155 4157 static int mlx5e_set_mac(struct net_device *netdev, void *addr) ··· 5326 5324 .ndo_setup_tc = mlx5e_setup_tc, 5327 5325 .ndo_select_queue = mlx5e_select_queue, 5328 5326 .ndo_get_stats64 = mlx5e_get_stats, 5329 - .ndo_set_rx_mode = mlx5e_set_rx_mode, 5327 + .ndo_set_rx_mode_async = mlx5e_set_rx_mode, 5330 5328 .ndo_set_mac_address = mlx5e_set_mac, 5331 5329 .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, 5332 5330 .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, ··· 6311 6309 { 6312 6310 struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, 6313 6311 set_rx_mode_work); 6312 + struct net_device *dev = priv->netdev; 6314 6313 6315 - return mlx5e_fs_set_rx_mode_work(priv->fs, priv->netdev); 6314 + netdev_lock_ops(dev); 6315 + mlx5e_fs_set_rx_mode_work(priv->fs, dev, NULL, NULL); 6316 + netdev_unlock_ops(dev); 6316 6317 } 6317 6318 6318 6319 /* mlx5e generic netdev management API (move to en_common.c) */

+3 -1

drivers/net/ethernet/mellanox/mlx5/core/main.c

··· 1849 1849 1850 1850 err = mlx5_notifiers_init(dev); 1851 1851 if (err) 1852 - goto err_hca_caps; 1852 + goto err_notifiers_init; 1853 1853 1854 1854 /* The conjunction of sw_vhca_id with sw_owner_id will be a global 1855 1855 * unique id per function which uses mlx5_core. ··· 1865 1865 1866 1866 return 0; 1867 1867 1868 + err_notifiers_init: 1869 + mlx5_hca_caps_free(dev); 1868 1870 err_hca_caps: 1869 1871 mlx5_adev_cleanup(dev); 1870 1872 err_adev_init:

+13 -7

drivers/net/ethernet/meta/fbnic/fbnic_netdev.c

··· 183 183 return ret; 184 184 } 185 185 186 - void __fbnic_set_rx_mode(struct fbnic_dev *fbd) 186 + void __fbnic_set_rx_mode(struct fbnic_dev *fbd, 187 + struct netdev_hw_addr_list *uc, 188 + struct netdev_hw_addr_list *mc) 187 189 { 188 190 bool uc_promisc = false, mc_promisc = false; 189 191 struct net_device *netdev = fbd->netdev; ··· 215 213 } 216 214 217 215 /* Synchronize unicast and multicast address lists */ 218 - err = __dev_uc_sync(netdev, fbnic_uc_sync, fbnic_uc_unsync); 216 + err = __hw_addr_sync_dev(uc, netdev, fbnic_uc_sync, fbnic_uc_unsync); 219 217 if (err == -ENOSPC) 220 218 uc_promisc = true; 221 - err = __dev_mc_sync(netdev, fbnic_mc_sync, fbnic_mc_unsync); 219 + err = __hw_addr_sync_dev(mc, netdev, fbnic_mc_sync, fbnic_mc_unsync); 222 220 if (err == -ENOSPC) 223 221 mc_promisc = true; 224 222 ··· 240 238 fbnic_write_tce_tcam(fbd); 241 239 } 242 240 243 - static void fbnic_set_rx_mode(struct net_device *netdev) 241 + static void fbnic_set_rx_mode(struct net_device *netdev, 242 + struct netdev_hw_addr_list *uc, 243 + struct netdev_hw_addr_list *mc) 244 244 { 245 245 struct fbnic_net *fbn = netdev_priv(netdev); 246 246 struct fbnic_dev *fbd = fbn->fbd; 247 247 248 248 /* No need to update the hardware if we are not running */ 249 249 if (netif_running(netdev)) 250 - __fbnic_set_rx_mode(fbd); 250 + __fbnic_set_rx_mode(fbd, uc, mc); 251 251 } 252 252 253 253 static int fbnic_set_mac(struct net_device *netdev, void *p) 254 254 { 255 + struct fbnic_net *fbn = netdev_priv(netdev); 255 256 struct sockaddr *addr = p; 256 257 257 258 if (!is_valid_ether_addr(addr->sa_data)) ··· 262 257 263 258 eth_hw_addr_set(netdev, addr->sa_data); 264 259 265 - fbnic_set_rx_mode(netdev); 260 + if (netif_running(netdev)) 261 + __fbnic_set_rx_mode(fbn->fbd, &netdev->uc, &netdev->mc); 266 262 267 263 return 0; 268 264 } ··· 557 551 .ndo_features_check = fbnic_features_check, 558 552 .ndo_set_mac_address = fbnic_set_mac, 559 553 .ndo_change_mtu = fbnic_change_mtu, 560 - .ndo_set_rx_mode = fbnic_set_rx_mode, 554 + .ndo_set_rx_mode_async = fbnic_set_rx_mode, 561 555 .ndo_get_stats64 = fbnic_get_stats64, 562 556 .ndo_bpf = fbnic_bpf, 563 557 .ndo_hwtstamp_get = fbnic_hwtstamp_get,

+3 -1

drivers/net/ethernet/meta/fbnic/fbnic_netdev.h

··· 97 97 int fbnic_time_start(struct fbnic_net *fbn); 98 98 void fbnic_time_stop(struct fbnic_net *fbn); 99 99 100 - void __fbnic_set_rx_mode(struct fbnic_dev *fbd); 100 + void __fbnic_set_rx_mode(struct fbnic_dev *fbd, 101 + struct netdev_hw_addr_list *uc, 102 + struct netdev_hw_addr_list *mc); 101 103 void fbnic_clear_rx_mode(struct fbnic_dev *fbd); 102 104 103 105 void fbnic_phylink_get_pauseparam(struct net_device *netdev,

+2 -2

drivers/net/ethernet/meta/fbnic/fbnic_pci.c

··· 135 135 136 136 fbnic_rss_reinit_hw(fbn->fbd, fbn); 137 137 138 - __fbnic_set_rx_mode(fbn->fbd); 138 + __fbnic_set_rx_mode(fbn->fbd, &fbn->netdev->uc, &fbn->netdev->mc); 139 139 140 140 /* Enable Tx/Rx processing */ 141 141 fbnic_napi_enable(fbn); ··· 180 180 } 181 181 182 182 fbnic_rpc_reset_valid_entries(fbd); 183 - __fbnic_set_rx_mode(fbd); 183 + __fbnic_set_rx_mode(fbd, &fbd->netdev->uc, &fbd->netdev->mc); 184 184 185 185 return 0; 186 186 }

+1 -1

drivers/net/ethernet/meta/fbnic/fbnic_rpc.c

··· 244 244 245 245 if (fbd->fw_cap.need_bmc_tcam_reinit) { 246 246 fbnic_bmc_rpc_init(fbd); 247 - __fbnic_set_rx_mode(fbd); 247 + __fbnic_set_rx_mode(fbd, &fbd->netdev->uc, &fbd->netdev->mc); 248 248 fbd->fw_cap.need_bmc_tcam_reinit = false; 249 249 } 250 250

+2 -4

drivers/net/ethernet/micrel/ks8851.h

··· 408 408 struct gpio_desc *gpio; 409 409 struct mii_bus *mii_bus; 410 410 411 - void (*lock)(struct ks8851_net *ks, 412 - unsigned long *flags); 413 - void (*unlock)(struct ks8851_net *ks, 414 - unsigned long *flags); 411 + void (*lock)(struct ks8851_net *ks); 412 + void (*unlock)(struct ks8851_net *ks); 415 413 unsigned int (*rdreg16)(struct ks8851_net *ks, 416 414 unsigned int reg); 417 415 void (*wrreg16)(struct ks8851_net *ks,

+30 -39

drivers/net/ethernet/micrel/ks8851_common.c

··· 28 28 /** 29 29 * ks8851_lock - register access lock 30 30 * @ks: The chip state 31 - * @flags: Spinlock flags 32 31 * 33 32 * Claim chip register access lock 34 33 */ 35 - static void ks8851_lock(struct ks8851_net *ks, unsigned long *flags) 34 + static void ks8851_lock(struct ks8851_net *ks) 36 35 { 37 - ks->lock(ks, flags); 36 + ks->lock(ks); 38 37 } 39 38 40 39 /** 41 40 * ks8851_unlock - register access unlock 42 41 * @ks: The chip state 43 - * @flags: Spinlock flags 44 42 * 45 43 * Release chip register access lock 46 44 */ 47 - static void ks8851_unlock(struct ks8851_net *ks, unsigned long *flags) 45 + static void ks8851_unlock(struct ks8851_net *ks) 48 46 { 49 - ks->unlock(ks, flags); 47 + ks->unlock(ks); 50 48 } 51 49 52 50 /** ··· 127 129 static int ks8851_write_mac_addr(struct net_device *dev) 128 130 { 129 131 struct ks8851_net *ks = netdev_priv(dev); 130 - unsigned long flags; 131 132 u16 val; 132 133 int i; 133 134 134 - ks8851_lock(ks, &flags); 135 + ks8851_lock(ks); 135 136 136 137 /* 137 138 * Wake up chip in case it was powered off when stopped; otherwise, ··· 146 149 if (!netif_running(dev)) 147 150 ks8851_set_powermode(ks, PMECR_PM_SOFTDOWN); 148 151 149 - ks8851_unlock(ks, &flags); 152 + ks8851_unlock(ks); 150 153 151 154 return 0; 152 155 } ··· 160 163 static void ks8851_read_mac_addr(struct net_device *dev) 161 164 { 162 165 struct ks8851_net *ks = netdev_priv(dev); 163 - unsigned long flags; 164 166 u8 addr[ETH_ALEN]; 165 167 u16 reg; 166 168 int i; 167 169 168 - ks8851_lock(ks, &flags); 170 + ks8851_lock(ks); 169 171 170 172 for (i = 0; i < ETH_ALEN; i += 2) { 171 173 reg = ks8851_rdreg16(ks, KS_MAR(i)); ··· 173 177 } 174 178 eth_hw_addr_set(dev, addr); 175 179 176 - ks8851_unlock(ks, &flags); 180 + ks8851_unlock(ks); 177 181 } 178 182 179 183 /** ··· 308 312 { 309 313 struct ks8851_net *ks = _ks; 310 314 struct sk_buff_head rxq; 311 - unsigned long flags; 312 315 unsigned int status; 313 316 struct sk_buff *skb; 314 317 315 - ks8851_lock(ks, &flags); 318 + ks8851_lock(ks); 316 319 317 320 status = ks8851_rdreg16(ks, KS_ISR); 318 321 ks8851_wrreg16(ks, KS_ISR, status); ··· 368 373 ks8851_wrreg16(ks, KS_RXCR1, rxc->rxcr1); 369 374 } 370 375 371 - ks8851_unlock(ks, &flags); 376 + ks8851_unlock(ks); 372 377 373 378 if (status & IRQ_LCI) 374 379 mii_check_link(&ks->mii); 375 380 376 - if (status & IRQ_RXI) 381 + if (status & IRQ_RXI) { 382 + local_bh_disable(); 377 383 while ((skb = __skb_dequeue(&rxq))) 378 384 netif_rx(skb); 385 + local_bh_enable(); 386 + } 379 387 380 388 return IRQ_HANDLED; 381 389 } ··· 403 405 static int ks8851_net_open(struct net_device *dev) 404 406 { 405 407 struct ks8851_net *ks = netdev_priv(dev); 406 - unsigned long flags; 407 408 int ret; 408 409 409 410 ret = request_threaded_irq(dev->irq, NULL, ks8851_irq, ··· 415 418 416 419 /* lock the card, even if we may not actually be doing anything 417 420 * else at the moment */ 418 - ks8851_lock(ks, &flags); 421 + ks8851_lock(ks); 419 422 420 423 netif_dbg(ks, ifup, ks->netdev, "opening\n"); 421 424 ··· 468 471 469 472 netif_dbg(ks, ifup, ks->netdev, "network device up\n"); 470 473 471 - ks8851_unlock(ks, &flags); 474 + ks8851_unlock(ks); 472 475 mii_check_link(&ks->mii); 473 476 return 0; 474 477 } ··· 484 487 static int ks8851_net_stop(struct net_device *dev) 485 488 { 486 489 struct ks8851_net *ks = netdev_priv(dev); 487 - unsigned long flags; 488 490 489 491 netif_info(ks, ifdown, dev, "shutting down\n"); 490 492 491 493 netif_stop_queue(dev); 492 494 493 - ks8851_lock(ks, &flags); 495 + ks8851_lock(ks); 494 496 /* turn off the IRQs and ack any outstanding */ 495 497 ks8851_wrreg16(ks, KS_IER, 0x0000); 496 498 ks8851_wrreg16(ks, KS_ISR, 0xffff); 497 - ks8851_unlock(ks, &flags); 499 + ks8851_unlock(ks); 498 500 499 501 /* stop any outstanding work */ 500 502 ks8851_flush_tx_work(ks); 501 503 flush_work(&ks->rxctrl_work); 502 504 503 - ks8851_lock(ks, &flags); 505 + ks8851_lock(ks); 504 506 /* shutdown RX process */ 505 507 ks8851_wrreg16(ks, KS_RXCR1, 0x0000); 506 508 ··· 508 512 509 513 /* set powermode to soft power down to save power */ 510 514 ks8851_set_powermode(ks, PMECR_PM_SOFTDOWN); 511 - ks8851_unlock(ks, &flags); 515 + ks8851_unlock(ks); 512 516 513 517 /* ensure any queued tx buffers are dumped */ 514 518 while (!skb_queue_empty(&ks->txq)) { ··· 562 566 static void ks8851_rxctrl_work(struct work_struct *work) 563 567 { 564 568 struct ks8851_net *ks = container_of(work, struct ks8851_net, rxctrl_work); 565 - unsigned long flags; 566 569 567 - ks8851_lock(ks, &flags); 570 + ks8851_lock(ks); 568 571 569 572 /* need to shutdown RXQ before modifying filter parameters */ 570 573 ks8851_wrreg16(ks, KS_RXCR1, 0x00); 571 574 572 - ks8851_unlock(ks, &flags); 575 + ks8851_unlock(ks); 573 576 } 574 577 575 578 static void ks8851_set_rx_mode(struct net_device *dev) ··· 775 780 { 776 781 struct ks8851_net *ks = netdev_priv(dev); 777 782 int offset = ee->offset; 778 - unsigned long flags; 779 783 int len = ee->len; 780 784 u16 tmp; 781 785 ··· 788 794 if (!(ks->rc_ccr & CCR_EEPROM)) 789 795 return -ENOENT; 790 796 791 - ks8851_lock(ks, &flags); 797 + ks8851_lock(ks); 792 798 793 799 ks8851_eeprom_claim(ks); 794 800 ··· 811 817 eeprom_93cx6_wren(&ks->eeprom, false); 812 818 813 819 ks8851_eeprom_release(ks); 814 - ks8851_unlock(ks, &flags); 820 + ks8851_unlock(ks); 815 821 816 822 return 0; 817 823 } ··· 821 827 { 822 828 struct ks8851_net *ks = netdev_priv(dev); 823 829 int offset = ee->offset; 824 - unsigned long flags; 825 830 int len = ee->len; 826 831 827 832 /* must be 2 byte aligned */ ··· 830 837 if (!(ks->rc_ccr & CCR_EEPROM)) 831 838 return -ENOENT; 832 839 833 - ks8851_lock(ks, &flags); 840 + ks8851_lock(ks); 834 841 835 842 ks8851_eeprom_claim(ks); 836 843 ··· 838 845 839 846 eeprom_93cx6_multiread(&ks->eeprom, offset/2, (__le16 *)data, len/2); 840 847 ks8851_eeprom_release(ks); 841 - ks8851_unlock(ks, &flags); 848 + ks8851_unlock(ks); 842 849 843 850 return 0; 844 851 } ··· 897 904 static int ks8851_phy_read_common(struct net_device *dev, int phy_addr, int reg) 898 905 { 899 906 struct ks8851_net *ks = netdev_priv(dev); 900 - unsigned long flags; 901 907 int result; 902 908 int ksreg; 903 909 ··· 904 912 if (ksreg < 0) 905 913 return ksreg; 906 914 907 - ks8851_lock(ks, &flags); 915 + ks8851_lock(ks); 908 916 result = ks8851_rdreg16(ks, ksreg); 909 - ks8851_unlock(ks, &flags); 917 + ks8851_unlock(ks); 910 918 911 919 return result; 912 920 } ··· 941 949 int phy, int reg, int value) 942 950 { 943 951 struct ks8851_net *ks = netdev_priv(dev); 944 - unsigned long flags; 945 952 int ksreg; 946 953 947 954 ksreg = ks8851_phy_reg(reg); 948 955 if (ksreg >= 0) { 949 - ks8851_lock(ks, &flags); 956 + ks8851_lock(ks); 950 957 ks8851_wrreg16(ks, ksreg, value); 951 - ks8851_unlock(ks, &flags); 958 + ks8851_unlock(ks); 952 959 } 953 960 } 954 961

+6 -9

drivers/net/ethernet/micrel/ks8851_par.c

··· 55 55 /** 56 56 * ks8851_lock_par - register access lock 57 57 * @ks: The chip state 58 - * @flags: Spinlock flags 59 58 * 60 59 * Claim chip register access lock 61 60 */ 62 - static void ks8851_lock_par(struct ks8851_net *ks, unsigned long *flags) 61 + static void ks8851_lock_par(struct ks8851_net *ks) 63 62 { 64 63 struct ks8851_net_par *ksp = to_ks8851_par(ks); 65 64 66 - spin_lock_irqsave(&ksp->lock, *flags); 65 + spin_lock_bh(&ksp->lock); 67 66 } 68 67 69 68 /** 70 69 * ks8851_unlock_par - register access unlock 71 70 * @ks: The chip state 72 - * @flags: Spinlock flags 73 71 * 74 72 * Release chip register access lock 75 73 */ 76 - static void ks8851_unlock_par(struct ks8851_net *ks, unsigned long *flags) 74 + static void ks8851_unlock_par(struct ks8851_net *ks) 77 75 { 78 76 struct ks8851_net_par *ksp = to_ks8851_par(ks); 79 77 80 - spin_unlock_irqrestore(&ksp->lock, *flags); 78 + spin_unlock_bh(&ksp->lock); 81 79 } 82 80 83 81 /** ··· 231 233 { 232 234 struct ks8851_net *ks = netdev_priv(dev); 233 235 netdev_tx_t ret = NETDEV_TX_OK; 234 - unsigned long flags; 235 236 unsigned int txqcr; 236 237 u16 txmir; 237 238 int err; ··· 238 241 netif_dbg(ks, tx_queued, ks->netdev, 239 242 "%s: skb %p, %d@%p\n", __func__, skb, skb->len, skb->data); 240 243 241 - ks8851_lock_par(ks, &flags); 244 + ks8851_lock_par(ks); 242 245 243 246 txmir = ks8851_rdreg16_par(ks, KS_TXMIR) & 0x1fff; 244 247 ··· 259 262 ret = NETDEV_TX_BUSY; 260 263 } 261 264 262 - ks8851_unlock_par(ks, &flags); 265 + ks8851_unlock_par(ks); 263 266 264 267 return ret; 265 268 }

+4 -7

drivers/net/ethernet/micrel/ks8851_spi.c

··· 71 71 /** 72 72 * ks8851_lock_spi - register access lock 73 73 * @ks: The chip state 74 - * @flags: Spinlock flags 75 74 * 76 75 * Claim chip register access lock 77 76 */ 78 - static void ks8851_lock_spi(struct ks8851_net *ks, unsigned long *flags) 77 + static void ks8851_lock_spi(struct ks8851_net *ks) 79 78 { 80 79 struct ks8851_net_spi *kss = to_ks8851_spi(ks); 81 80 ··· 84 85 /** 85 86 * ks8851_unlock_spi - register access unlock 86 87 * @ks: The chip state 87 - * @flags: Spinlock flags 88 88 * 89 89 * Release chip register access lock 90 90 */ 91 - static void ks8851_unlock_spi(struct ks8851_net *ks, unsigned long *flags) 91 + static void ks8851_unlock_spi(struct ks8851_net *ks) 92 92 { 93 93 struct ks8851_net_spi *kss = to_ks8851_spi(ks); 94 94 ··· 307 309 struct ks8851_net_spi *kss; 308 310 unsigned short tx_space; 309 311 struct ks8851_net *ks; 310 - unsigned long flags; 311 312 struct sk_buff *txb; 312 313 bool last; 313 314 ··· 314 317 ks = &kss->ks8851; 315 318 last = skb_queue_empty(&ks->txq); 316 319 317 - ks8851_lock_spi(ks, &flags); 320 + ks8851_lock_spi(ks); 318 321 319 322 while (!last) { 320 323 txb = skb_dequeue(&ks->txq); ··· 340 343 ks->tx_space = tx_space; 341 344 spin_unlock_bh(&ks->statelock); 342 345 343 - ks8851_unlock_spi(ks, &flags); 346 + ks8851_unlock_spi(ks); 344 347 } 345 348 346 349 /**

+20 -15

drivers/net/ethernet/microsoft/mana/mana_en.c

··· 3640 3640 3641 3641 ac->gdma_dev = gd; 3642 3642 gd->driver_data = ac; 3643 + 3644 + INIT_WORK(&ac->link_change_work, mana_link_state_handle); 3643 3645 } 3646 + 3647 + INIT_DELAYED_WORK(&ac->gf_stats_work, mana_gf_stats_work_handler); 3644 3648 3645 3649 err = mana_create_eq(ac); 3646 3650 if (err) { ··· 3661 3657 3662 3658 if (!resuming) { 3663 3659 ac->num_ports = num_ports; 3664 - 3665 - INIT_WORK(&ac->link_change_work, mana_link_state_handle); 3666 3660 } else { 3667 3661 if (ac->num_ports != num_ports) { 3668 3662 dev_err(dev, "The number of vPorts changed: %d->%d\n", ··· 3689 3687 if (!resuming) { 3690 3688 for (i = 0; i < ac->num_ports; i++) { 3691 3689 err = mana_probe_port(ac, i, &ac->ports[i]); 3692 - /* we log the port for which the probe failed and stop 3693 - * probes for subsequent ports. 3694 - * Note that we keep running ports, for which the probes 3695 - * were successful, unless add_adev fails too 3690 + /* Log the port for which the probe failed, stop probing 3691 + * subsequent ports, and skip add_adev. 3692 + * mana_remove() will clean up already-probed ports. 3696 3693 */ 3697 3694 if (err) { 3698 3695 dev_err(dev, "Probe Failed for port %d\n", i); ··· 3705 3704 enable_work(&apc->queue_reset_work); 3706 3705 err = mana_attach(ac->ports[i]); 3707 3706 rtnl_unlock(); 3708 - /* we log the port for which the attach failed and stop 3709 - * attach for subsequent ports 3710 - * Note that we keep running ports, for which the attach 3711 - * were successful, unless add_adev fails too 3707 + /* Log the port for which the attach failed, stop 3708 + * attaching subsequent ports, and skip add_adev. 3709 + * mana_remove() will clean up already-attached ports. 3712 3710 */ 3713 3711 if (err) { 3714 3712 dev_err(dev, "Attach Failed for port %d\n", i); ··· 3716 3716 } 3717 3717 } 3718 3718 3719 - err = add_adev(gd, "eth"); 3719 + if (!err) 3720 + err = add_adev(gd, "eth"); 3720 3721 3721 - INIT_DELAYED_WORK(&ac->gf_stats_work, mana_gf_stats_work_handler); 3722 3722 schedule_delayed_work(&ac->gf_stats_work, MANA_GF_STATS_PERIOD); 3723 3723 3724 3724 out: ··· 3739 3739 struct gdma_context *gc = gd->gdma_context; 3740 3740 struct mana_context *ac = gd->driver_data; 3741 3741 struct mana_port_context *apc; 3742 - struct device *dev = gc->dev; 3742 + struct device *dev; 3743 3743 struct net_device *ndev; 3744 3744 int err; 3745 3745 int i; 3746 + 3747 + if (!gc || !ac) 3748 + return; 3749 + 3750 + dev = gc->dev; 3746 3751 3747 3752 disable_work_sync(&ac->link_change_work); 3748 3753 cancel_delayed_work_sync(&ac->gf_stats_work); ··· 3761 3756 if (!ndev) { 3762 3757 if (i == 0) 3763 3758 dev_err(dev, "No net device to remove\n"); 3764 - goto out; 3759 + break; 3765 3760 } 3766 3761 3767 3762 apc = netdev_priv(ndev); ··· 3792 3787 } 3793 3788 3794 3789 mana_destroy_eq(ac); 3795 - out: 3790 + 3796 3791 if (ac->per_port_queue_reset_wq) { 3797 3792 destroy_workqueue(ac->per_port_queue_reset_wq); 3798 3793 ac->per_port_queue_reset_wq = NULL;

+11 -6

drivers/net/ethernet/netronome/nfp/nfpcore/nfp_target.c

··· 435 435 436 436 /* Full Island ID and channel bits overlap? */ 437 437 ret = nfp_decode_basic(addr, &v, cpp_tgt, mode, addr40, isld1, isld0); 438 - if (ret) 438 + if (ret) { 439 + pr_warn("%s: decode dest_island failed: %d\n", __func__, ret); 439 440 return ret; 441 + } 440 442 441 443 /* The current address won't go where expected? */ 442 - if (dest_island != -1 && dest_island != v) 444 + if (dest_island != -1 && dest_island != v) { 445 + pr_warn("%s: dest_island mismatch: current (%d) != decoded (%d)\n", 446 + __func__, dest_island, v); 443 447 return -EINVAL; 448 + } 444 449 445 450 /* If dest_island was -1, we don't care where it goes. */ 446 451 return 0; ··· 498 493 * the address but we can verify if the existing 499 494 * contents will point to a valid island. 500 495 */ 501 - return nfp_encode_basic_qdr(*addr, cpp_tgt, dest_island, 496 + return nfp_encode_basic_qdr(*addr, dest_island, cpp_tgt, 502 497 mode, addr40, isld1, isld0); 503 498 504 499 iid_lsb = addr40 ? 34 : 26; ··· 509 504 return 0; 510 505 case 1: 511 506 if (cpp_tgt == NFP_CPP_TARGET_QDR && !addr40) 512 - return nfp_encode_basic_qdr(*addr, cpp_tgt, dest_island, 507 + return nfp_encode_basic_qdr(*addr, dest_island, cpp_tgt, 513 508 mode, addr40, isld1, isld0); 514 509 515 510 idx_lsb = addr40 ? 39 : 31; ··· 535 530 * be set before hand and with them select an island. 536 531 * So we need to confirm that it's at least plausible. 537 532 */ 538 - return nfp_encode_basic_qdr(*addr, cpp_tgt, dest_island, 533 + return nfp_encode_basic_qdr(*addr, dest_island, cpp_tgt, 539 534 mode, addr40, isld1, isld0); 540 535 541 536 /* Make sure we compare against isldN values ··· 556 551 * iid<1> = addr<30> = channel<0> 557 552 * channel<1> = addr<31> = Index 558 553 */ 559 - return nfp_encode_basic_qdr(*addr, cpp_tgt, dest_island, 554 + return nfp_encode_basic_qdr(*addr, dest_island, cpp_tgt, 560 555 mode, addr40, isld1, isld0); 561 556 562 557 isld[0] &= ~3;

+2 -2

drivers/net/ethernet/stmicro/stmmac/stmmac_main.c

··· 1410 1410 priv->tx_lpi_clk_stop = priv->plat->flags & 1411 1411 STMMAC_FLAG_EN_TX_LPI_CLOCKGATING; 1412 1412 1413 - config->default_an_inband = priv->plat->default_an_inband; 1414 - 1415 1413 /* Get the PHY interface modes (at the PHY end of the link) that 1416 1414 * are supported by the platform. 1417 1415 */ 1418 1416 if (priv->plat->get_interfaces) 1419 1417 priv->plat->get_interfaces(priv, priv->plat->bsp_priv, 1420 1418 config->supported_interfaces); 1419 + 1420 + config->default_an_inband = priv->plat->default_an_inband; 1421 1421 1422 1422 /* Set the platform/firmware specified interface mode if the 1423 1423 * supported interfaces have not already been provided using

+2 -1

drivers/net/ethernet/wangxun/txgbe/txgbe_main.c

··· 864 864 "0x%08x", etrack_id); 865 865 } 866 866 867 - if (etrack_id < 0x20010) 867 + if (wx->mac.type == wx_mac_sp && 868 + ((etrack_id & 0xfffff) < 0x20010)) 868 869 dev_warn(&pdev->dev, "Please upgrade the firmware to 0x20010 or above.\n"); 869 870 870 871 err = txgbe_test_hostif(wx);

+2

drivers/net/gtp.c

··· 2400 2400 return -ENODEV; 2401 2401 } 2402 2402 2403 + local_bh_disable(); 2403 2404 udp_tunnel_xmit_skb(rt, sk, skb_to_send, 2404 2405 fl4.saddr, fl4.daddr, 2405 2406 inet_dscp_to_dsfield(fl4.flowi4_dscp), ··· 2410 2409 !net_eq(sock_net(sk), 2411 2410 dev_net(gtp->dev)), 2412 2411 false, 0); 2412 + local_bh_enable(); 2413 2413 return 0; 2414 2414 } 2415 2415

+1

drivers/net/macvlan.c

··· 1689 1689 + macvlan_get_size_mac(vlan) /* IFLA_MACVLAN_MACADDR */ 1690 1690 + nla_total_size(4) /* IFLA_MACVLAN_BC_QUEUE_LEN */ 1691 1691 + nla_total_size(4) /* IFLA_MACVLAN_BC_QUEUE_LEN_USED */ 1692 + + nla_total_size(4) /* IFLA_MACVLAN_BC_CUTOFF */ 1692 1693 ); 1693 1694 } 1694 1695

+1

drivers/net/mdio/Kconfig

··· 147 147 148 148 config MDIO_PIC64HPSC 149 149 tristate "PIC64-HPSC/HX MDIO interface support" 150 + depends on ARCH_MICROCHIP || COMPILE_TEST 150 151 depends on HAS_IOMEM && OF_MDIO 151 152 help 152 153 This driver supports the MDIO interface found on the PIC64-HPSC/HX

+2

drivers/net/netconsole.c

··· 497 497 size_t len; 498 498 499 499 len = strnlen(s, maxlen); 500 + if (!len) 501 + return; 500 502 if (s[len - 1] == '\n') 501 503 s[len - 1] = '\0'; 502 504 }

+5 -3

drivers/net/netdevsim/netdev.c

··· 185 185 return NETDEV_TX_OK; 186 186 } 187 187 188 - static void nsim_set_rx_mode(struct net_device *dev) 188 + static void nsim_set_rx_mode(struct net_device *dev, 189 + struct netdev_hw_addr_list *uc, 190 + struct netdev_hw_addr_list *mc) 189 191 { 190 192 } 191 193 ··· 625 623 626 624 static const struct net_device_ops nsim_netdev_ops = { 627 625 .ndo_start_xmit = nsim_start_xmit, 628 - .ndo_set_rx_mode = nsim_set_rx_mode, 626 + .ndo_set_rx_mode_async = nsim_set_rx_mode, 629 627 .ndo_set_mac_address = eth_mac_addr, 630 628 .ndo_validate_addr = eth_validate_addr, 631 629 .ndo_change_mtu = nsim_change_mtu, ··· 650 648 651 649 static const struct net_device_ops nsim_vf_netdev_ops = { 652 650 .ndo_start_xmit = nsim_start_xmit, 653 - .ndo_set_rx_mode = nsim_set_rx_mode, 651 + .ndo_set_rx_mode_async = nsim_set_rx_mode, 654 652 .ndo_set_mac_address = eth_mac_addr, 655 653 .ndo_validate_addr = eth_validate_addr, 656 654 .ndo_change_mtu = nsim_change_mtu,

+4 -2

drivers/net/netkit.c

··· 186 186 return iflink; 187 187 } 188 188 189 - static void netkit_set_multicast(struct net_device *dev) 189 + static void netkit_set_multicast(struct net_device *dev, 190 + struct netdev_hw_addr_list *uc, 191 + struct netdev_hw_addr_list *mc) 190 192 { 191 193 /* Nothing to do, we receive whatever gets pushed to us! */ 192 194 } ··· 332 330 .ndo_open = netkit_open, 333 331 .ndo_stop = netkit_close, 334 332 .ndo_start_xmit = netkit_xmit, 335 - .ndo_set_rx_mode = netkit_set_multicast, 333 + .ndo_set_rx_mode_async = netkit_set_multicast, 336 334 .ndo_set_rx_headroom = netkit_set_headroom, 337 335 .ndo_set_mac_address = netkit_set_macaddr, 338 336 .ndo_get_iflink = netkit_get_iflink,

+1 -1

drivers/net/ppp/ppp_generic.c

··· 2245 2245 */ 2246 2246 static void __ppp_decompress_proto(struct sk_buff *skb) 2247 2247 { 2248 - if (skb->data[0] & 0x01) 2248 + if (ppp_skb_is_compressed_proto(skb)) 2249 2249 *(u8 *)skb_push(skb, 1) = 0x00; 2250 2250 } 2251 2251

+7 -1

drivers/net/ppp/pppoe.c

··· 393 393 if (skb_mac_header_len(skb) < ETH_HLEN) 394 394 goto drop; 395 395 396 - if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) 396 + if (!pskb_may_pull(skb, PPPOE_SES_HLEN)) 397 397 goto drop; 398 398 399 399 ph = pppoe_hdr(skb); ··· 401 401 402 402 skb_pull_rcsum(skb, sizeof(*ph)); 403 403 if (skb->len < len) 404 + goto drop; 405 + 406 + /* skb->data points to the PPP protocol header after skb_pull_rcsum. 407 + * Drop PFC frames. 408 + */ 409 + if (ppp_skb_is_compressed_proto(skb)) 404 410 goto drop; 405 411 406 412 if (pskb_trim_rcsum(skb, len))

+9 -4

drivers/net/pse-pd/pse_core.c

··· 1170 1170 struct pse_controller_dev *pcdev; 1171 1171 struct pse_irq_desc desc; 1172 1172 unsigned long *notifs; 1173 + unsigned long *notifs_mask; 1173 1174 }; 1174 1175 1175 1176 /** ··· 1248 1247 static irqreturn_t pse_isr(int irq, void *data) 1249 1248 { 1250 1249 struct pse_controller_dev *pcdev; 1251 - unsigned long notifs_mask = 0; 1252 1250 struct pse_irq_desc *desc; 1253 1251 struct pse_irq *h = data; 1254 1252 int ret, i; ··· 1257 1257 1258 1258 /* Clear notifs mask */ 1259 1259 memset(h->notifs, 0, pcdev->nr_lines * sizeof(*h->notifs)); 1260 + bitmap_zero(h->notifs_mask, pcdev->nr_lines); 1260 1261 mutex_lock(&pcdev->lock); 1261 - ret = desc->map_event(irq, pcdev, h->notifs, &notifs_mask); 1262 - if (ret || !notifs_mask) { 1262 + ret = desc->map_event(irq, pcdev, h->notifs, h->notifs_mask); 1263 + if (ret || bitmap_empty(h->notifs_mask, pcdev->nr_lines)) { 1263 1264 mutex_unlock(&pcdev->lock); 1264 1265 return IRQ_NONE; 1265 1266 } 1266 1267 1267 - for_each_set_bit(i, &notifs_mask, pcdev->nr_lines) { 1268 + for_each_set_bit(i, h->notifs_mask, pcdev->nr_lines) { 1268 1269 unsigned long notifs, rnotifs; 1269 1270 struct pse_ntf ntf = {}; 1270 1271 ··· 1339 1338 h->notifs = devm_kcalloc(dev, pcdev->nr_lines, 1340 1339 sizeof(*h->notifs), GFP_KERNEL); 1341 1340 if (!h->notifs) 1341 + return -ENOMEM; 1342 + 1343 + h->notifs_mask = devm_bitmap_zalloc(dev, pcdev->nr_lines, GFP_KERNEL); 1344 + if (!h->notifs_mask) 1342 1345 return -ENOMEM; 1343 1346 1344 1347 ret = devm_request_threaded_irq(dev, irq, NULL, pse_isr,

+31 -18

drivers/net/slip/slhc.c

··· 80 80 #include <linux/unaligned.h> 81 81 82 82 static unsigned char *encode(unsigned char *cp, unsigned short n); 83 - static long decode(unsigned char **cpp); 83 + static long decode(unsigned char **cpp, const unsigned char *end); 84 84 static unsigned char * put16(unsigned char *cp, unsigned short x); 85 - static unsigned short pull16(unsigned char **cpp); 85 + static long pull16(unsigned char **cpp, const unsigned char *end); 86 86 87 87 /* Allocate compression data structure 88 88 * slots must be in range 0 to 255 (zero meaning no compression) ··· 190 190 return cp; 191 191 } 192 192 193 - /* Pull a 16-bit integer in host order from buffer in network byte order */ 194 - static unsigned short 195 - pull16(unsigned char **cpp) 193 + /* Pull a 16-bit integer in host order from buffer in network byte order. 194 + * Returns -1 if the buffer is exhausted, otherwise the 16-bit value. 195 + */ 196 + static long 197 + pull16(unsigned char **cpp, const unsigned char *end) 196 198 { 197 - short rval; 199 + long rval; 198 200 201 + if (*cpp + 2 > end) 202 + return -1; 199 203 rval = *(*cpp)++; 200 204 rval <<= 8; 201 205 rval |= *(*cpp)++; 202 206 return rval; 203 207 } 204 208 205 - /* Decode a number */ 209 + /* Decode a number. Returns -1 if the buffer is exhausted. */ 206 210 static long 207 - decode(unsigned char **cpp) 211 + decode(unsigned char **cpp, const unsigned char *end) 208 212 { 209 213 int x; 210 214 215 + if (*cpp >= end) 216 + return -1; 211 217 x = *(*cpp)++; 212 - if(x == 0){ 213 - return pull16(cpp) & 0xffff; /* pull16 returns -1 on error */ 214 - } else { 215 - return x & 0xff; /* -1 if PULLCHAR returned error */ 216 - } 218 + if (x == 0) 219 + return pull16(cpp, end); 220 + return x & 0xff; 217 221 } 218 222 219 223 /* ··· 503 499 struct cstate *cs; 504 500 int len, hdrlen; 505 501 unsigned char *cp = icp; 502 + const unsigned char *end = icp + isize; 506 503 507 504 /* We've got a compressed packet; read the change byte */ 508 505 comp->sls_i_compressed++; ··· 511 506 comp->sls_i_error++; 512 507 return 0; 513 508 } 509 + if (!comp->rstate) 510 + goto bad; 514 511 changes = *cp++; 515 512 if(changes & NEW_C){ 516 513 /* Make sure the state index is in range, then grab the state. ··· 541 534 thp = &cs->cs_tcp; 542 535 ip = &cs->cs_ip; 543 536 537 + if (cp + 2 > end) 538 + goto bad; 544 539 thp->check = *(__sum16 *)cp; 545 540 cp += 2; 546 541 ··· 573 564 default: 574 565 if(changes & NEW_U){ 575 566 thp->urg = 1; 576 - if((x = decode(&cp)) == -1) { 567 + if((x = decode(&cp, end)) == -1) { 577 568 goto bad; 578 569 } 579 570 thp->urg_ptr = htons(x); 580 571 } else 581 572 thp->urg = 0; 582 573 if(changes & NEW_W){ 583 - if((x = decode(&cp)) == -1) { 574 + if((x = decode(&cp, end)) == -1) { 584 575 goto bad; 585 576 } 586 577 thp->window = htons( ntohs(thp->window) + x); 587 578 } 588 579 if(changes & NEW_A){ 589 - if((x = decode(&cp)) == -1) { 580 + if((x = decode(&cp, end)) == -1) { 590 581 goto bad; 591 582 } 592 583 thp->ack_seq = htonl( ntohl(thp->ack_seq) + x); 593 584 } 594 585 if(changes & NEW_S){ 595 - if((x = decode(&cp)) == -1) { 586 + if((x = decode(&cp, end)) == -1) { 596 587 goto bad; 597 588 } 598 589 thp->seq = htonl( ntohl(thp->seq) + x); ··· 600 591 break; 601 592 } 602 593 if(changes & NEW_I){ 603 - if((x = decode(&cp)) == -1) { 594 + if((x = decode(&cp, end)) == -1) { 604 595 goto bad; 605 596 } 606 597 ip->id = htons (ntohs (ip->id) + x); ··· 658 649 struct cstate *cs; 659 650 unsigned int ihl; 660 651 652 + if (!comp->rstate) { 653 + comp->sls_i_error++; 654 + return slhc_toss(comp); 655 + } 661 656 /* The packet is shorter than a legal IP header. 662 657 * Also make sure isize is positive. 663 658 */

+6

drivers/net/virtio_net.c

··· 3759 3759 queue_pairs); 3760 3760 return -EINVAL; 3761 3761 } 3762 + 3763 + /* Keep max_tx_vq in sync so that a later RSS command does not 3764 + * revert queue_pairs to a stale value. 3765 + */ 3766 + if (vi->has_rss) 3767 + vi->rss_trailer.max_tx_vq = cpu_to_le16(queue_pairs); 3762 3768 succ: 3763 3769 vi->curr_queue_pairs = queue_pairs; 3764 3770 if (dev->flags & IFF_UP) {

+2 -2

drivers/vhost/net.c

··· 560 560 busyloop_timeout = poll_rx ? rvq->busyloop_timeout: 561 561 tvq->busyloop_timeout; 562 562 563 - preempt_disable(); 563 + migrate_disable(); 564 564 endtime = busy_clock() + busyloop_timeout; 565 565 566 566 while (vhost_can_busy_poll(endtime)) { ··· 577 577 cpu_relax(); 578 578 } 579 579 580 - preempt_enable(); 580 + migrate_enable(); 581 581 582 582 if (poll_rx || sock_has_rx_data(sock)) 583 583 vhost_net_busy_poll_try_queue(net, vq);

+8 -1

include/linux/fsl/ntmp.h

··· 31 31 u8 rsst_ver; 32 32 }; 33 33 34 + struct netc_swcbd { 35 + void *buf; 36 + dma_addr_t dma; 37 + size_t size; 38 + }; 39 + 34 40 struct netc_cbdr { 35 41 struct device *dev; 36 42 struct netc_cbdr_regs regs; ··· 50 44 void *addr_base_align; 51 45 dma_addr_t dma_base; 52 46 dma_addr_t dma_base_align; 47 + struct netc_swcbd *swcbd; 53 48 54 49 /* Serialize the order of command BD ring */ 55 - spinlock_t ring_lock; 50 + struct mutex ring_lock; 56 51 }; 57 52 58 53 struct ntmp_user {

+16 -9

include/linux/if_vlan.h

··· 147 147 * @priority: skb priority 148 148 * @vlan_qos: vlan priority: (skb->priority << 13) & 0xE000 149 149 * @next: pointer to next struct 150 + * @rcu: used for deferred freeing of mapping nodes 150 151 */ 151 152 struct vlan_priority_tci_mapping { 152 153 u32 priority; 153 154 u16 vlan_qos; 154 - struct vlan_priority_tci_mapping *next; 155 + struct vlan_priority_tci_mapping __rcu *next; 156 + struct rcu_head rcu; 155 157 }; 156 158 157 159 struct proc_dir_entry; ··· 179 177 unsigned int nr_ingress_mappings; 180 178 u32 ingress_priority_map[8]; 181 179 unsigned int nr_egress_mappings; 182 - struct vlan_priority_tci_mapping *egress_priority_map[16]; 180 + struct vlan_priority_tci_mapping __rcu *egress_priority_map[16]; 183 181 184 182 __be16 vlan_proto; 185 183 u16 vlan_id; ··· 211 209 vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio) 212 210 { 213 211 struct vlan_priority_tci_mapping *mp; 212 + u16 vlan_qos = 0; 214 213 215 - smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */ 214 + rcu_read_lock(); 216 215 217 - mp = vlan_dev_priv(dev)->egress_priority_map[(skprio & 0xF)]; 216 + mp = rcu_dereference(vlan_dev_priv(dev)->egress_priority_map[skprio & 0xF]); 218 217 while (mp) { 219 218 if (mp->priority == skprio) { 220 - return mp->vlan_qos; /* This should already be shifted 221 - * to mask correctly with the 222 - * VLAN's TCI */ 219 + vlan_qos = READ_ONCE(mp->vlan_qos); 220 + break; 223 221 } 224 - mp = mp->next; 222 + mp = rcu_dereference(mp->next); 225 223 } 226 - return 0; 224 + rcu_read_unlock(); 225 + 226 + /* This should already be shifted to mask correctly with 227 + * the VLAN's TCI. 228 + */ 229 + return vlan_qos; 227 230 } 228 231 229 232 extern bool vlan_do_receive(struct sk_buff **skb);

+28

include/linux/netdevice.h

··· 1119 1119 * This function is called device changes address list filtering. 1120 1120 * If driver handles unicast address filtering, it should set 1121 1121 * IFF_UNICAST_FLT in its priv_flags. 1122 + * Cannot sleep, called with netif_addr_lock_bh held. 1123 + * Deprecated in favor of ndo_set_rx_mode_async. 1124 + * 1125 + * void (*ndo_set_rx_mode_async)(struct net_device *dev, 1126 + * struct netdev_hw_addr_list *uc, 1127 + * struct netdev_hw_addr_list *mc); 1128 + * Async version of ndo_set_rx_mode which runs in process context 1129 + * with rtnl_lock and netdev_lock_ops(dev) held. The uc/mc parameters 1130 + * are snapshots of the address lists - iterate with 1131 + * netdev_hw_addr_list_for_each(ha, uc). 1122 1132 * 1123 1133 * int (*ndo_set_mac_address)(struct net_device *dev, void *addr); 1124 1134 * This function is called when the Media Access Control address ··· 1449 1439 void (*ndo_change_rx_flags)(struct net_device *dev, 1450 1440 int flags); 1451 1441 void (*ndo_set_rx_mode)(struct net_device *dev); 1442 + void (*ndo_set_rx_mode_async)( 1443 + struct net_device *dev, 1444 + struct netdev_hw_addr_list *uc, 1445 + struct netdev_hw_addr_list *mc); 1452 1446 int (*ndo_set_mac_address)(struct net_device *dev, 1453 1447 void *addr); 1454 1448 int (*ndo_validate_addr)(struct net_device *dev); ··· 1917 1903 * has been enabled due to the need to listen to 1918 1904 * additional unicast addresses in a device that 1919 1905 * does not implement ndo_set_rx_mode() 1906 + * @rx_mode_node: List entry for rx_mode work processing 1907 + * @rx_mode_tracker: Refcount tracker for rx_mode work 1908 + * @rx_mode_addr_cache: Recycled snapshot entries for rx_mode work 1920 1909 * @uc: unicast mac addresses 1921 1910 * @mc: multicast mac addresses 1922 1911 * @dev_addrs: list of device hw addresses ··· 2311 2294 unsigned int promiscuity; 2312 2295 unsigned int allmulti; 2313 2296 bool uc_promisc; 2297 + struct list_head rx_mode_node; 2298 + netdevice_tracker rx_mode_tracker; 2299 + struct netdev_hw_addr_list rx_mode_addr_cache; 2314 2300 #ifdef CONFIG_LOCKDEP 2315 2301 unsigned char nested_level; 2316 2302 #endif ··· 5024 5004 int (*unsync)(struct net_device *, 5025 5005 const unsigned char *)); 5026 5006 void __hw_addr_init(struct netdev_hw_addr_list *list); 5007 + void __hw_addr_flush(struct netdev_hw_addr_list *list); 5008 + int __hw_addr_list_snapshot(struct netdev_hw_addr_list *snap, 5009 + const struct netdev_hw_addr_list *list, 5010 + int addr_len, struct netdev_hw_addr_list *cache); 5011 + void __hw_addr_list_reconcile(struct netdev_hw_addr_list *real_list, 5012 + struct netdev_hw_addr_list *work, 5013 + struct netdev_hw_addr_list *ref, int addr_len, 5014 + struct netdev_hw_addr_list *cache); 5027 5015 5028 5016 /* Functions used for device addresses handling */ 5029 5017 void dev_addr_mod(struct net_device *dev, unsigned int offset,

+16

include/linux/ppp_defs.h

··· 8 8 #define _PPP_DEFS_H_ 9 9 10 10 #include <linux/crc-ccitt.h> 11 + #include <linux/skbuff.h> 11 12 #include <uapi/linux/ppp_defs.h> 12 13 13 14 #define PPP_FCS(fcs, c) crc_ccitt_byte(fcs, c) ··· 24 23 static inline bool ppp_proto_is_valid(u16 proto) 25 24 { 26 25 return !!((proto & 0x0101) == 0x0001); 26 + } 27 + 28 + /** 29 + * ppp_skb_is_compressed_proto - checks if PPP protocol in a skb is compressed 30 + * @skb: skb to check 31 + * 32 + * Check if the PPP protocol field is compressed (the least significant 33 + * bit of the most significant octet is 1). skb->data must point to the PPP 34 + * protocol header. 35 + * 36 + * Return: Whether the PPP protocol field is compressed. 37 + */ 38 + static inline bool ppp_skb_is_compressed_proto(const struct sk_buff *skb) 39 + { 40 + return unlikely(skb->data[0] & 0x01); 27 41 } 28 42 29 43 #endif /* _PPP_DEFS_H_ */

+3

include/net/mctp.h

··· 26 26 #define MCTP_VER_MIN 1 27 27 #define MCTP_VER_MAX 1 28 28 29 + /* Definitions for ver field */ 30 + #define MCTP_HDR_VER_MASK GENMASK(3, 0) 31 + 29 32 /* Definitions for flags_seq_tag field */ 30 33 #define MCTP_HDR_FLAG_SOM BIT(7) 31 34 #define MCTP_HDR_FLAG_EOM BIT(6)

+1 -1

include/net/pie.h

··· 104 104 vars->dq_tstamp = DTIME_INVALID; 105 105 vars->accu_prob = 0; 106 106 vars->dq_count = DQCOUNT_INVALID; 107 - vars->avg_dq_rate = 0; 107 + WRITE_ONCE(vars->avg_dq_rate, 0); 108 108 } 109 109 110 110 static inline struct pie_skb_cb *get_pie_cb(const struct sk_buff *skb)

+8 -4

include/net/tcp.h

··· 1513 1513 static inline void tcp_snd_cwnd_set(struct tcp_sock *tp, u32 val) 1514 1514 { 1515 1515 WARN_ON_ONCE((int)val <= 0); 1516 - tp->snd_cwnd = val; 1516 + WRITE_ONCE(tp->snd_cwnd, val); 1517 1517 } 1518 1518 1519 1519 static inline bool tcp_in_slow_start(const struct tcp_sock *tp) ··· 2208 2208 const u32 now = tcp_jiffies32; 2209 2209 enum tcp_chrono old = tp->chrono_type; 2210 2210 2211 + /* Following WRITE_ONCE()s pair with READ_ONCE()s in 2212 + * tcp_get_info_chrono_stats(). 2213 + */ 2211 2214 if (old > TCP_CHRONO_UNSPEC) 2212 - tp->chrono_stat[old - 1] += now - tp->chrono_start; 2213 - tp->chrono_start = now; 2214 - tp->chrono_type = new; 2215 + WRITE_ONCE(tp->chrono_stat[old - 1], 2216 + tp->chrono_stat[old - 1] + now - tp->chrono_start); 2217 + WRITE_ONCE(tp->chrono_start, now); 2218 + WRITE_ONCE(tp->chrono_type, new); 2215 2219 } 2216 2220 2217 2221 static inline void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type)

+1 -1

include/net/tcp_ecn.h

··· 181 181 tcp_accecn_validate_syn_feedback(sk, ace, sent_ect)) { 182 182 if ((tcp_accecn_extract_syn_ect(ace) == INET_ECN_CE) && 183 183 !tp->delivered_ce) 184 - tp->delivered_ce++; 184 + WRITE_ONCE(tp->delivered_ce, 1); 185 185 } 186 186 break; 187 187 }

+3 -1

include/trace/events/net.h

··· 10 10 #include <linux/if_vlan.h> 11 11 #include <linux/ip.h> 12 12 #include <linux/tracepoint.h> 13 + #include <net/busy_poll.h> 13 14 14 15 TRACE_EVENT(net_dev_start_xmit, 15 16 ··· 209 208 TP_fast_assign( 210 209 __assign_str(name); 211 210 #ifdef CONFIG_NET_RX_BUSY_POLL 212 - __entry->napi_id = skb->napi_id; 211 + __entry->napi_id = napi_id_valid(skb->napi_id) ? 212 + skb->napi_id : 0; 213 213 #else 214 214 __entry->napi_id = 0; 215 215 #endif

+3 -3

include/trace/events/rxrpc.h

··· 37 37 EM(rxkad_abort_1_short_encdata, "rxkad1-short-encdata") \ 38 38 EM(rxkad_abort_1_short_header, "rxkad1-short-hdr") \ 39 39 EM(rxkad_abort_2_short_check, "rxkad2-short-check") \ 40 + EM(rxkad_abort_2_crypto_unaligned, "rxkad2-crypto-unaligned") \ 40 41 EM(rxkad_abort_2_short_data, "rxkad2-short-data") \ 41 42 EM(rxkad_abort_2_short_header, "rxkad2-short-hdr") \ 42 43 EM(rxkad_abort_2_short_len, "rxkad2-short-len") \ ··· 162 161 E_(rxrpc_call_poke_timer_now, "Timer-now") 163 162 164 163 #define rxrpc_skb_traces \ 165 - EM(rxrpc_skb_eaten_by_unshare, "ETN unshare ") \ 166 - EM(rxrpc_skb_eaten_by_unshare_nomem, "ETN unshar-nm") \ 167 164 EM(rxrpc_skb_get_call_rx, "GET call-rx ") \ 168 165 EM(rxrpc_skb_get_conn_secured, "GET conn-secd") \ 169 166 EM(rxrpc_skb_get_conn_work, "GET conn-work") \ ··· 188 189 EM(rxrpc_skb_put_purge, "PUT purge ") \ 189 190 EM(rxrpc_skb_put_purge_oob, "PUT purge-oob") \ 190 191 EM(rxrpc_skb_put_response, "PUT response ") \ 192 + EM(rxrpc_skb_put_response_copy, "PUT resp-cpy ") \ 191 193 EM(rxrpc_skb_put_rotate, "PUT rotate ") \ 192 194 EM(rxrpc_skb_put_unknown, "PUT unknown ") \ 193 195 EM(rxrpc_skb_see_conn_work, "SEE conn-work") \ ··· 197 197 EM(rxrpc_skb_see_recvmsg_oob, "SEE recvm-oob") \ 198 198 EM(rxrpc_skb_see_reject, "SEE reject ") \ 199 199 EM(rxrpc_skb_see_rotate, "SEE rotate ") \ 200 + EM(rxrpc_skb_see_unshare_nomem, "SEE unshar-nm") \ 200 201 E_(rxrpc_skb_see_version, "SEE version ") 201 202 202 203 #define rxrpc_local_traces \ ··· 285 284 EM(rxrpc_conn_put_unidle, "PUT unidle ") \ 286 285 EM(rxrpc_conn_put_work, "PUT work ") \ 287 286 EM(rxrpc_conn_queue_challenge, "QUE chall ") \ 288 - EM(rxrpc_conn_queue_retry_work, "QUE retry-wk") \ 289 287 EM(rxrpc_conn_queue_rx_work, "QUE rx-work ") \ 290 288 EM(rxrpc_conn_see_new_service_conn, "SEE new-svc ") \ 291 289 EM(rxrpc_conn_see_reap_service, "SEE reap-svc") \

+27 -18

net/8021q/vlan_dev.c

··· 172 172 u32 skb_prio, u16 vlan_prio) 173 173 { 174 174 struct vlan_dev_priv *vlan = vlan_dev_priv(dev); 175 - struct vlan_priority_tci_mapping *mp = NULL; 175 + struct vlan_priority_tci_mapping __rcu **mpp; 176 + struct vlan_priority_tci_mapping *mp; 176 177 struct vlan_priority_tci_mapping *np; 178 + u32 bucket = skb_prio & 0xF; 177 179 u32 vlan_qos = (vlan_prio << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK; 178 180 179 181 /* See if a priority mapping exists.. */ 180 - mp = vlan->egress_priority_map[skb_prio & 0xF]; 182 + mpp = &vlan->egress_priority_map[bucket]; 183 + mp = rtnl_dereference(*mpp); 181 184 while (mp) { 182 185 if (mp->priority == skb_prio) { 183 - if (mp->vlan_qos && !vlan_qos) 186 + if (!vlan_qos) { 187 + rcu_assign_pointer(*mpp, rtnl_dereference(mp->next)); 184 188 vlan->nr_egress_mappings--; 185 - else if (!mp->vlan_qos && vlan_qos) 186 - vlan->nr_egress_mappings++; 187 - mp->vlan_qos = vlan_qos; 189 + kfree_rcu(mp, rcu); 190 + } else { 191 + WRITE_ONCE(mp->vlan_qos, vlan_qos); 192 + } 188 193 return 0; 189 194 } 190 - mp = mp->next; 195 + mpp = &mp->next; 196 + mp = rtnl_dereference(*mpp); 191 197 } 192 198 193 199 /* Create a new mapping then. */ 194 - mp = vlan->egress_priority_map[skb_prio & 0xF]; 200 + if (!vlan_qos) 201 + return 0; 202 + 195 203 np = kmalloc_obj(struct vlan_priority_tci_mapping); 196 204 if (!np) 197 205 return -ENOBUFS; 198 206 199 - np->next = mp; 200 207 np->priority = skb_prio; 201 208 np->vlan_qos = vlan_qos; 202 - /* Before inserting this element in hash table, make sure all its fields 203 - * are committed to memory. 204 - * coupled with smp_rmb() in vlan_dev_get_egress_qos_mask() 205 - */ 206 - smp_wmb(); 207 - vlan->egress_priority_map[skb_prio & 0xF] = np; 209 + RCU_INIT_POINTER(np->next, rtnl_dereference(vlan->egress_priority_map[bucket])); 210 + rcu_assign_pointer(vlan->egress_priority_map[bucket], np); 208 211 if (vlan_qos) 209 212 vlan->nr_egress_mappings++; 210 213 return 0; ··· 607 604 int i; 608 605 609 606 for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) { 610 - while ((pm = vlan->egress_priority_map[i]) != NULL) { 611 - vlan->egress_priority_map[i] = pm->next; 612 - kfree(pm); 607 + pm = rtnl_dereference(vlan->egress_priority_map[i]); 608 + RCU_INIT_POINTER(vlan->egress_priority_map[i], NULL); 609 + while (pm) { 610 + struct vlan_priority_tci_mapping *next; 611 + 612 + next = rtnl_dereference(pm->next); 613 + kfree_rcu(pm, rcu); 614 + pm = next; 613 615 } 614 616 } 617 + vlan->nr_egress_mappings = 0; 615 618 } 616 619 617 620 static void vlan_dev_uninit(struct net_device *dev)

+4 -6

net/8021q/vlan_netlink.c

··· 260 260 goto nla_put_failure; 261 261 262 262 for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) { 263 - for (pm = vlan->egress_priority_map[i]; pm; 264 - pm = pm->next) { 265 - if (!pm->vlan_qos) 266 - continue; 267 - 263 + for (pm = rcu_dereference_rtnl(vlan->egress_priority_map[i]); pm; 264 + pm = rcu_dereference_rtnl(pm->next)) { 265 + u16 vlan_qos = READ_ONCE(pm->vlan_qos); 268 266 m.from = pm->priority; 269 - m.to = (pm->vlan_qos >> 13) & 0x7; 267 + m.to = (vlan_qos >> 13) & 0x7; 270 268 if (nla_put(skb, IFLA_VLAN_QOS_MAPPING, 271 269 sizeof(m), &m)) 272 270 goto nla_put_failure;

+8 -4

net/8021q/vlanproc.c

··· 262 262 vlan->ingress_priority_map[7]); 263 263 264 264 seq_printf(seq, " EGRESS priority mappings: "); 265 + rcu_read_lock(); 265 266 for (i = 0; i < 16; i++) { 266 - const struct vlan_priority_tci_mapping *mp 267 - = vlan->egress_priority_map[i]; 267 + const struct vlan_priority_tci_mapping *mp = 268 + rcu_dereference(vlan->egress_priority_map[i]); 268 269 while (mp) { 270 + u16 vlan_qos = READ_ONCE(mp->vlan_qos); 271 + 269 272 seq_printf(seq, "%u:%d ", 270 - mp->priority, ((mp->vlan_qos >> 13) & 0x7)); 271 - mp = mp->next; 273 + mp->priority, ((vlan_qos >> 13) & 0x7)); 274 + mp = rcu_dereference(mp->next); 272 275 } 273 276 } 277 + rcu_read_unlock(); 274 278 seq_puts(seq, "\n"); 275 279 276 280 return 0;

+5 -3

net/bridge/br_arp_nd_proxy.c

··· 201 201 202 202 f = br_fdb_find_rcu(br, n->ha, vid); 203 203 if (f) { 204 + const struct net_bridge_port *dst = READ_ONCE(f->dst); 204 205 bool replied = false; 205 206 206 207 if ((p && (p->flags & BR_PROXYARP)) || 207 - (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)) || 208 - br_is_neigh_suppress_enabled(f->dst, vid)) { 208 + (dst && (dst->flags & BR_PROXYARP_WIFI)) || 209 + br_is_neigh_suppress_enabled(dst, vid)) { 209 210 if (!vid) 210 211 br_arp_send(br, p, skb->dev, sip, tip, 211 212 sha, n->ha, sha, 0, 0); ··· 470 469 471 470 f = br_fdb_find_rcu(br, n->ha, vid); 472 471 if (f) { 472 + const struct net_bridge_port *dst = READ_ONCE(f->dst); 473 473 bool replied = false; 474 474 475 - if (br_is_neigh_suppress_enabled(f->dst, vid)) { 475 + if (br_is_neigh_suppress_enabled(dst, vid)) { 476 476 if (vid != 0) 477 477 br_nd_send(br, p, skb, n, 478 478 skb->vlan_proto,

+18 -10

net/bridge/br_fdb.c

··· 236 236 const unsigned char *addr, 237 237 __u16 vid) 238 238 { 239 + const struct net_bridge_port *dst; 239 240 struct net_bridge_fdb_entry *f; 240 241 struct net_device *dev = NULL; 241 242 struct net_bridge *br; ··· 249 248 br = netdev_priv(br_dev); 250 249 rcu_read_lock(); 251 250 f = br_fdb_find_rcu(br, addr, vid); 252 - if (f && f->dst) 253 - dev = f->dst->dev; 251 + if (f) { 252 + dst = READ_ONCE(f->dst); 253 + if (dst) 254 + dev = dst->dev; 255 + } 254 256 rcu_read_unlock(); 255 257 256 258 return dev; ··· 350 346 vg = nbp_vlan_group(op); 351 347 if (op != p && ether_addr_equal(op->dev->dev_addr, addr) && 352 348 (!vid || br_vlan_find(vg, vid))) { 353 - f->dst = op; 349 + WRITE_ONCE(f->dst, op); 354 350 clear_bit(BR_FDB_ADDED_BY_USER, &f->flags); 355 351 return; 356 352 } ··· 361 357 /* Maybe bridge device has same hw addr? */ 362 358 if (p && ether_addr_equal(br->dev->dev_addr, addr) && 363 359 (!vid || (v && br_vlan_should_use(v)))) { 364 - f->dst = NULL; 360 + WRITE_ONCE(f->dst, NULL); 365 361 clear_bit(BR_FDB_ADDED_BY_USER, &f->flags); 366 362 return; 367 363 } ··· 932 928 int br_fdb_fillbuf(struct net_bridge *br, void *buf, 933 929 unsigned long maxnum, unsigned long skip) 934 930 { 931 + const struct net_bridge_port *dst; 935 932 struct net_bridge_fdb_entry *f; 936 933 struct __fdb_entry *fe = buf; 937 934 unsigned long delta; ··· 949 944 continue; 950 945 951 946 /* ignore pseudo entry for local MAC address */ 952 - if (!f->dst) 947 + dst = READ_ONCE(f->dst); 948 + if (!dst) 953 949 continue; 954 950 955 951 if (skip) { ··· 962 956 memcpy(fe->mac_addr, f->key.addr.addr, ETH_ALEN); 963 957 964 958 /* due to ABI compat need to split into hi/lo */ 965 - fe->port_no = f->dst->port_no; 966 - fe->port_hi = f->dst->port_no >> 8; 959 + fe->port_no = dst->port_no; 960 + fe->port_hi = dst->port_no >> 8; 967 961 968 962 fe->is_local = test_bit(BR_FDB_LOCAL, &f->flags); 969 963 if (!test_bit(BR_FDB_STATIC, &f->flags)) { ··· 1089 1083 1090 1084 rcu_read_lock(); 1091 1085 hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { 1086 + const struct net_bridge_port *dst = READ_ONCE(f->dst); 1087 + 1092 1088 if (*idx < ctx->fdb_idx) 1093 1089 goto skip; 1094 - if (filter_dev && (!f->dst || f->dst->dev != filter_dev)) { 1090 + if (filter_dev && (!dst || dst->dev != filter_dev)) { 1095 1091 if (filter_dev != dev) 1096 1092 goto skip; 1097 1093 /* !f->dst is a special case for bridge ··· 1101 1093 * Therefore need a little more filtering 1102 1094 * we only want to dump the !f->dst case 1103 1095 */ 1104 - if (f->dst) 1096 + if (dst) 1105 1097 goto skip; 1106 1098 } 1107 - if (!filter_dev && f->dst) 1099 + if (!filter_dev && dst) 1108 1100 goto skip; 1109 1101 1110 1102 err = fdb_fill_info(skb, br, f,

+14 -53

net/core/dev.c

··· 9593 9593 ops->ndo_change_rx_flags(dev, flags); 9594 9594 } 9595 9595 9596 - static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) 9596 + int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) 9597 9597 { 9598 9598 unsigned int old_flags = dev->flags; 9599 9599 unsigned int promiscuity, flags; 9600 9600 kuid_t uid; 9601 9601 kgid_t gid; 9602 9602 9603 - ASSERT_RTNL(); 9603 + netdev_ops_assert_locked(dev); 9604 9604 9605 9605 promiscuity = dev->promiscuity + inc; 9606 9606 if (promiscuity == 0) { ··· 9636 9636 9637 9637 dev_change_rx_flags(dev, IFF_PROMISC); 9638 9638 } 9639 - if (notify) { 9640 - /* The ops lock is only required to ensure consistent locking 9641 - * for `NETDEV_CHANGE` notifiers. This function is sometimes 9642 - * called without the lock, even for devices that are ops 9643 - * locked, such as in `dev_uc_sync_multiple` when using 9644 - * bonding or teaming. 9645 - */ 9646 - netdev_ops_assert_locked(dev); 9639 + if (notify) 9647 9640 __dev_notify_flags(dev, old_flags, IFF_PROMISC, 0, NULL); 9648 - } 9649 9641 return 0; 9650 9642 } 9651 9643 ··· 9659 9667 unsigned int old_flags = dev->flags, old_gflags = dev->gflags; 9660 9668 unsigned int allmulti, flags; 9661 9669 9662 - ASSERT_RTNL(); 9670 + netdev_ops_assert_locked(dev); 9663 9671 9664 9672 allmulti = dev->allmulti + inc; 9665 9673 if (allmulti == 0) { ··· 9689 9697 return 0; 9690 9698 } 9691 9699 9692 - /* 9693 - * Upload unicast and multicast address lists to device and 9694 - * configure RX filtering. When the device doesn't support unicast 9695 - * filtering it is put in promiscuous mode while unicast addresses 9696 - * are present. 9697 - */ 9698 - void __dev_set_rx_mode(struct net_device *dev) 9699 - { 9700 - const struct net_device_ops *ops = dev->netdev_ops; 9701 - 9702 - /* dev_open will call this function so the list will stay sane. */ 9703 - if (!(dev->flags&IFF_UP)) 9704 - return; 9705 - 9706 - if (!netif_device_present(dev)) 9707 - return; 9708 - 9709 - if (!(dev->priv_flags & IFF_UNICAST_FLT)) { 9710 - /* Unicast addresses changes may only happen under the rtnl, 9711 - * therefore calling __dev_set_promiscuity here is safe. 9712 - */ 9713 - if (!netdev_uc_empty(dev) && !dev->uc_promisc) { 9714 - __dev_set_promiscuity(dev, 1, false); 9715 - dev->uc_promisc = true; 9716 - } else if (netdev_uc_empty(dev) && dev->uc_promisc) { 9717 - __dev_set_promiscuity(dev, -1, false); 9718 - dev->uc_promisc = false; 9719 - } 9720 - } 9721 - 9722 - if (ops->ndo_set_rx_mode) 9723 - ops->ndo_set_rx_mode(dev); 9724 - } 9725 - 9726 - void dev_set_rx_mode(struct net_device *dev) 9727 - { 9728 - netif_addr_lock_bh(dev); 9729 - __dev_set_rx_mode(dev); 9730 - netif_addr_unlock_bh(dev); 9731 - } 9732 9700 9733 9701 /** 9734 9702 * netif_get_flags() - get flags reported to userspace ··· 9727 9775 unsigned int old_flags = dev->flags; 9728 9776 int ret; 9729 9777 9730 - ASSERT_RTNL(); 9778 + netdev_ops_assert_locked(dev); 9731 9779 9732 9780 /* 9733 9781 * Set the flags on our device. ··· 11360 11408 goto err_uninit; 11361 11409 } 11362 11410 11411 + if (netdev_need_ops_lock(dev) && 11412 + dev->netdev_ops->ndo_set_rx_mode && 11413 + !dev->netdev_ops->ndo_set_rx_mode_async) 11414 + netdev_WARN(dev, "ops-locked drivers should use ndo_set_rx_mode_async\n"); 11415 + 11363 11416 ret = netdev_do_alloc_pcpu_stats(dev); 11364 11417 if (ret) 11365 11418 goto err_uninit; ··· 12084 12127 #endif 12085 12128 12086 12129 mutex_init(&dev->lock); 12130 + INIT_LIST_HEAD(&dev->rx_mode_node); 12131 + __hw_addr_init(&dev->rx_mode_addr_cache); 12087 12132 12088 12133 dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; 12089 12134 setup(dev); ··· 12189 12230 netif_free_rx_queues(dev); 12190 12231 12191 12232 kfree(rcu_dereference_protected(dev->ingress_queue, 1)); 12233 + 12234 + __hw_addr_flush(&dev->rx_mode_addr_cache); 12192 12235 12193 12236 /* Flush device addresses */ 12194 12237 dev_addr_flush(dev);

+4

net/core/dev.h

··· 78 78 void dev_addr_flush(struct net_device *dev); 79 79 int dev_addr_init(struct net_device *dev); 80 80 void dev_addr_check(struct net_device *dev); 81 + void __hw_addr_flush(struct netdev_hw_addr_list *list); 81 82 82 83 #if IS_ENABLED(CONFIG_NET_SHAPER) 83 84 void net_shaper_flush_netdev(struct net_device *dev); ··· 165 164 int dev_change_carrier(struct net_device *dev, bool new_carrier); 166 165 167 166 void __dev_set_rx_mode(struct net_device *dev); 167 + int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify); 168 + bool netif_rx_mode_clean(struct net_device *dev); 169 + void netif_rx_mode_sync(struct net_device *dev); 168 170 169 171 void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, 170 172 unsigned int gchanges, u32 portid,

+384 -1

net/core/dev_addr_lists.c

··· 11 11 #include <linux/rtnetlink.h> 12 12 #include <linux/export.h> 13 13 #include <linux/list.h> 14 + #include <linux/spinlock.h> 15 + #include <linux/workqueue.h> 16 + #include <kunit/visibility.h> 14 17 15 18 #include "dev.h" 19 + 20 + static void netdev_rx_mode_work(struct work_struct *work); 21 + 22 + static LIST_HEAD(rx_mode_list); 23 + static DEFINE_SPINLOCK(rx_mode_lock); 24 + static DECLARE_WORK(rx_mode_work, netdev_rx_mode_work); 16 25 17 26 /* 18 27 * General list handling functions ··· 490 481 } 491 482 EXPORT_SYMBOL(__hw_addr_unsync_dev); 492 483 493 - static void __hw_addr_flush(struct netdev_hw_addr_list *list) 484 + void __hw_addr_flush(struct netdev_hw_addr_list *list) 494 485 { 495 486 struct netdev_hw_addr *ha, *tmp; 496 487 ··· 501 492 } 502 493 list->count = 0; 503 494 } 495 + EXPORT_SYMBOL_IF_KUNIT(__hw_addr_flush); 504 496 505 497 void __hw_addr_init(struct netdev_hw_addr_list *list) 506 498 { ··· 510 500 list->tree = RB_ROOT; 511 501 } 512 502 EXPORT_SYMBOL(__hw_addr_init); 503 + 504 + static void __hw_addr_splice(struct netdev_hw_addr_list *dst, 505 + struct netdev_hw_addr_list *src) 506 + { 507 + src->tree = RB_ROOT; 508 + list_splice_init(&src->list, &dst->list); 509 + dst->count += src->count; 510 + src->count = 0; 511 + } 512 + 513 + /** 514 + * __hw_addr_list_snapshot - create a snapshot copy of an address list 515 + * @snap: destination snapshot list (needs to be __hw_addr_init-initialized) 516 + * @list: source address list to snapshot 517 + * @addr_len: length of addresses 518 + * @cache: entry cache to reuse entries from; falls back to GFP_ATOMIC 519 + * 520 + * Creates a copy of @list reusing entries from @cache when available. 521 + * Must be called under a spinlock. 522 + * 523 + * Return: 0 on success, -errno on failure. 524 + */ 525 + int __hw_addr_list_snapshot(struct netdev_hw_addr_list *snap, 526 + const struct netdev_hw_addr_list *list, 527 + int addr_len, struct netdev_hw_addr_list *cache) 528 + { 529 + struct netdev_hw_addr *ha, *entry; 530 + 531 + list_for_each_entry(ha, &list->list, list) { 532 + if (cache->count) { 533 + entry = list_first_entry(&cache->list, 534 + struct netdev_hw_addr, list); 535 + list_del(&entry->list); 536 + cache->count--; 537 + memcpy(entry->addr, ha->addr, addr_len); 538 + entry->type = ha->type; 539 + entry->global_use = false; 540 + entry->synced = 0; 541 + } else { 542 + entry = __hw_addr_create(ha->addr, addr_len, ha->type, 543 + false, false); 544 + if (!entry) { 545 + __hw_addr_flush(snap); 546 + return -ENOMEM; 547 + } 548 + } 549 + entry->sync_cnt = ha->sync_cnt; 550 + entry->refcount = ha->refcount; 551 + 552 + list_add_tail(&entry->list, &snap->list); 553 + __hw_addr_insert(snap, entry, addr_len); 554 + snap->count++; 555 + } 556 + 557 + return 0; 558 + } 559 + EXPORT_SYMBOL_IF_KUNIT(__hw_addr_list_snapshot); 560 + 561 + /** 562 + * __hw_addr_list_reconcile - sync snapshot changes back and free snapshots 563 + * @real_list: the real address list to update 564 + * @work: the working snapshot (modified by driver via __hw_addr_sync_dev) 565 + * @ref: the reference snapshot (untouched copy of original state) 566 + * @addr_len: length of addresses 567 + * @cache: entry cache to return snapshot entries to for reuse 568 + * 569 + * Walks the reference snapshot and compares each entry against the work 570 + * snapshot to compute sync_cnt deltas. Applies those deltas to @real_list. 571 + * Returns snapshot entries to @cache for reuse; frees both snapshots. 572 + * Caller must hold netif_addr_lock_bh. 573 + */ 574 + void __hw_addr_list_reconcile(struct netdev_hw_addr_list *real_list, 575 + struct netdev_hw_addr_list *work, 576 + struct netdev_hw_addr_list *ref, int addr_len, 577 + struct netdev_hw_addr_list *cache) 578 + { 579 + struct netdev_hw_addr *ref_ha, *tmp, *work_ha, *real_ha; 580 + int delta; 581 + 582 + list_for_each_entry_safe(ref_ha, tmp, &ref->list, list) { 583 + work_ha = __hw_addr_lookup(work, ref_ha->addr, addr_len, 584 + ref_ha->type); 585 + if (work_ha) 586 + delta = work_ha->sync_cnt - ref_ha->sync_cnt; 587 + else 588 + delta = -1; 589 + 590 + if (delta == 0) 591 + continue; 592 + 593 + real_ha = __hw_addr_lookup(real_list, ref_ha->addr, addr_len, 594 + ref_ha->type); 595 + if (!real_ha) { 596 + /* The real entry was concurrently removed. If the 597 + * driver synced this addr to hardware (delta > 0), 598 + * re-insert it as a stale entry so the next work 599 + * run unsyncs it from hardware. 600 + */ 601 + if (delta > 0) { 602 + rb_erase(&ref_ha->node, &ref->tree); 603 + list_del(&ref_ha->list); 604 + ref->count--; 605 + ref_ha->sync_cnt = delta; 606 + ref_ha->refcount = delta; 607 + list_add_tail_rcu(&ref_ha->list, 608 + &real_list->list); 609 + __hw_addr_insert(real_list, ref_ha, 610 + addr_len); 611 + real_list->count++; 612 + } 613 + continue; 614 + } 615 + 616 + real_ha->sync_cnt += delta; 617 + real_ha->refcount += delta; 618 + if (!real_ha->refcount) { 619 + rb_erase(&real_ha->node, &real_list->tree); 620 + list_del_rcu(&real_ha->list); 621 + kfree_rcu(real_ha, rcu_head); 622 + real_list->count--; 623 + } 624 + } 625 + 626 + __hw_addr_splice(cache, work); 627 + __hw_addr_splice(cache, ref); 628 + } 629 + EXPORT_SYMBOL_IF_KUNIT(__hw_addr_list_reconcile); 513 630 514 631 /* 515 632 * Device addresses handling functions ··· 1186 1049 __hw_addr_init(&dev->mc); 1187 1050 } 1188 1051 EXPORT_SYMBOL(dev_mc_init); 1052 + 1053 + static int netif_addr_lists_snapshot(struct net_device *dev, 1054 + struct netdev_hw_addr_list *uc_snap, 1055 + struct netdev_hw_addr_list *mc_snap, 1056 + struct netdev_hw_addr_list *uc_ref, 1057 + struct netdev_hw_addr_list *mc_ref) 1058 + { 1059 + int err; 1060 + 1061 + err = __hw_addr_list_snapshot(uc_snap, &dev->uc, dev->addr_len, 1062 + &dev->rx_mode_addr_cache); 1063 + if (!err) 1064 + err = __hw_addr_list_snapshot(uc_ref, &dev->uc, dev->addr_len, 1065 + &dev->rx_mode_addr_cache); 1066 + if (!err) 1067 + err = __hw_addr_list_snapshot(mc_snap, &dev->mc, 1068 + dev->addr_len, 1069 + &dev->rx_mode_addr_cache); 1070 + if (!err) 1071 + err = __hw_addr_list_snapshot(mc_ref, &dev->mc, dev->addr_len, 1072 + &dev->rx_mode_addr_cache); 1073 + 1074 + if (err) { 1075 + __hw_addr_flush(uc_snap); 1076 + __hw_addr_flush(uc_ref); 1077 + __hw_addr_flush(mc_snap); 1078 + } 1079 + 1080 + return err; 1081 + } 1082 + 1083 + static void netif_addr_lists_reconcile(struct net_device *dev, 1084 + struct netdev_hw_addr_list *uc_snap, 1085 + struct netdev_hw_addr_list *mc_snap, 1086 + struct netdev_hw_addr_list *uc_ref, 1087 + struct netdev_hw_addr_list *mc_ref) 1088 + { 1089 + __hw_addr_list_reconcile(&dev->uc, uc_snap, uc_ref, dev->addr_len, 1090 + &dev->rx_mode_addr_cache); 1091 + __hw_addr_list_reconcile(&dev->mc, mc_snap, mc_ref, dev->addr_len, 1092 + &dev->rx_mode_addr_cache); 1093 + } 1094 + 1095 + /** 1096 + * netif_uc_promisc_update() - evaluate whether uc_promisc should be toggled. 1097 + * @dev: device 1098 + * 1099 + * Must be called under netif_addr_lock_bh. 1100 + * Return: +1 to enter promisc, -1 to leave, 0 for no change. 1101 + */ 1102 + static int netif_uc_promisc_update(struct net_device *dev) 1103 + { 1104 + if (dev->priv_flags & IFF_UNICAST_FLT) 1105 + return 0; 1106 + 1107 + if (!netdev_uc_empty(dev) && !dev->uc_promisc) { 1108 + dev->uc_promisc = true; 1109 + return 1; 1110 + } 1111 + if (netdev_uc_empty(dev) && dev->uc_promisc) { 1112 + dev->uc_promisc = false; 1113 + return -1; 1114 + } 1115 + return 0; 1116 + } 1117 + 1118 + static void netif_rx_mode_run(struct net_device *dev) 1119 + { 1120 + struct netdev_hw_addr_list uc_snap, mc_snap, uc_ref, mc_ref; 1121 + const struct net_device_ops *ops = dev->netdev_ops; 1122 + int promisc_inc; 1123 + int err; 1124 + 1125 + might_sleep(); 1126 + netdev_ops_assert_locked(dev); 1127 + 1128 + __hw_addr_init(&uc_snap); 1129 + __hw_addr_init(&mc_snap); 1130 + __hw_addr_init(&uc_ref); 1131 + __hw_addr_init(&mc_ref); 1132 + 1133 + if (!(dev->flags & IFF_UP) || !netif_device_present(dev)) 1134 + return; 1135 + 1136 + if (ops->ndo_set_rx_mode_async) { 1137 + netif_addr_lock_bh(dev); 1138 + err = netif_addr_lists_snapshot(dev, &uc_snap, &mc_snap, 1139 + &uc_ref, &mc_ref); 1140 + if (err) { 1141 + netdev_WARN(dev, "failed to sync uc/mc addresses\n"); 1142 + netif_addr_unlock_bh(dev); 1143 + return; 1144 + } 1145 + 1146 + promisc_inc = netif_uc_promisc_update(dev); 1147 + netif_addr_unlock_bh(dev); 1148 + } else { 1149 + netif_addr_lock_bh(dev); 1150 + promisc_inc = netif_uc_promisc_update(dev); 1151 + netif_addr_unlock_bh(dev); 1152 + } 1153 + 1154 + if (promisc_inc) 1155 + __dev_set_promiscuity(dev, promisc_inc, false); 1156 + 1157 + if (ops->ndo_set_rx_mode_async) { 1158 + ops->ndo_set_rx_mode_async(dev, &uc_snap, &mc_snap); 1159 + 1160 + netif_addr_lock_bh(dev); 1161 + netif_addr_lists_reconcile(dev, &uc_snap, &mc_snap, 1162 + &uc_ref, &mc_ref); 1163 + netif_addr_unlock_bh(dev); 1164 + } else if (ops->ndo_set_rx_mode) { 1165 + netif_addr_lock_bh(dev); 1166 + ops->ndo_set_rx_mode(dev); 1167 + netif_addr_unlock_bh(dev); 1168 + } 1169 + } 1170 + 1171 + static void netdev_rx_mode_work(struct work_struct *work) 1172 + { 1173 + struct net_device *dev; 1174 + 1175 + rtnl_lock(); 1176 + 1177 + while (true) { 1178 + spin_lock_bh(&rx_mode_lock); 1179 + if (list_empty(&rx_mode_list)) { 1180 + spin_unlock_bh(&rx_mode_lock); 1181 + break; 1182 + } 1183 + dev = list_first_entry(&rx_mode_list, struct net_device, 1184 + rx_mode_node); 1185 + list_del_init(&dev->rx_mode_node); 1186 + /* We must free netdev tracker under 1187 + * the spinlock protection. 1188 + */ 1189 + netdev_tracker_free(dev, &dev->rx_mode_tracker); 1190 + spin_unlock_bh(&rx_mode_lock); 1191 + 1192 + netdev_lock_ops(dev); 1193 + netif_rx_mode_run(dev); 1194 + netdev_unlock_ops(dev); 1195 + /* Use __dev_put() because netdev_tracker_free() was already 1196 + * called above. Must be after netdev_unlock_ops() to prevent 1197 + * netdev_run_todo() from freeing the device while still in use. 1198 + */ 1199 + __dev_put(dev); 1200 + } 1201 + 1202 + rtnl_unlock(); 1203 + } 1204 + 1205 + static void netif_rx_mode_queue(struct net_device *dev) 1206 + { 1207 + spin_lock_bh(&rx_mode_lock); 1208 + if (list_empty(&dev->rx_mode_node)) { 1209 + list_add_tail(&dev->rx_mode_node, &rx_mode_list); 1210 + netdev_hold(dev, &dev->rx_mode_tracker, GFP_ATOMIC); 1211 + } 1212 + spin_unlock_bh(&rx_mode_lock); 1213 + schedule_work(&rx_mode_work); 1214 + } 1215 + 1216 + /** 1217 + * __dev_set_rx_mode() - upload unicast and multicast address lists to device 1218 + * and configure RX filtering. 1219 + * @dev: device 1220 + * 1221 + * When the device doesn't support unicast filtering it is put in promiscuous 1222 + * mode while unicast addresses are present. 1223 + */ 1224 + void __dev_set_rx_mode(struct net_device *dev) 1225 + { 1226 + const struct net_device_ops *ops = dev->netdev_ops; 1227 + int promisc_inc; 1228 + 1229 + /* dev_open will call this function so the list will stay sane. */ 1230 + if (!(dev->flags & IFF_UP)) 1231 + return; 1232 + 1233 + if (!netif_device_present(dev)) 1234 + return; 1235 + 1236 + if (ops->ndo_set_rx_mode_async || ops->ndo_change_rx_flags || 1237 + netdev_need_ops_lock(dev)) { 1238 + netif_rx_mode_queue(dev); 1239 + return; 1240 + } 1241 + 1242 + /* Legacy path for non-ops-locked HW devices. */ 1243 + 1244 + promisc_inc = netif_uc_promisc_update(dev); 1245 + if (promisc_inc) 1246 + __dev_set_promiscuity(dev, promisc_inc, false); 1247 + 1248 + if (ops->ndo_set_rx_mode) 1249 + ops->ndo_set_rx_mode(dev); 1250 + } 1251 + 1252 + void dev_set_rx_mode(struct net_device *dev) 1253 + { 1254 + netif_addr_lock_bh(dev); 1255 + __dev_set_rx_mode(dev); 1256 + netif_addr_unlock_bh(dev); 1257 + } 1258 + 1259 + bool netif_rx_mode_clean(struct net_device *dev) 1260 + { 1261 + bool clean = false; 1262 + 1263 + spin_lock_bh(&rx_mode_lock); 1264 + if (!list_empty(&dev->rx_mode_node)) { 1265 + list_del_init(&dev->rx_mode_node); 1266 + clean = true; 1267 + /* We must release netdev tracker under 1268 + * the spinlock protection. 1269 + */ 1270 + netdev_tracker_free(dev, &dev->rx_mode_tracker); 1271 + } 1272 + spin_unlock_bh(&rx_mode_lock); 1273 + 1274 + return clean; 1275 + } 1276 + 1277 + /** 1278 + * netif_rx_mode_sync() - sync rx mode inline 1279 + * @dev: network device 1280 + * 1281 + * Drivers implementing ndo_set_rx_mode_async() have their rx mode callback 1282 + * executed from a workqueue. This allows the callback to sleep, but means 1283 + * the hardware update is deferred and may not be visible to userspace 1284 + * by the time the initiating syscall returns. netif_rx_mode_sync() steals 1285 + * workqueue update and executes it inline. This preserves the atomicity of 1286 + * operations to the userspace. 1287 + */ 1288 + void netif_rx_mode_sync(struct net_device *dev) 1289 + { 1290 + if (netif_rx_mode_clean(dev)) { 1291 + netif_rx_mode_run(dev); 1292 + /* Use __dev_put() because netdev_tracker_free() was already 1293 + * called inside netif_rx_mode_clean(). 1294 + */ 1295 + __dev_put(dev); 1296 + } 1297 + }

+385 -2

net/core/dev_addr_lists_test.c

··· 2 2 3 3 #include <kunit/test.h> 4 4 #include <linux/etherdevice.h> 5 + #include <linux/math64.h> 5 6 #include <linux/netdevice.h> 6 7 #include <linux/rtnetlink.h> 7 8 8 9 static const struct net_device_ops dummy_netdev_ops = { 9 10 }; 10 11 12 + #define ADDR_A 1 13 + #define ADDR_B 2 14 + #define ADDR_C 3 15 + 11 16 struct dev_addr_test_priv { 12 17 u32 addr_seen; 18 + u32 addr_synced; 19 + u32 addr_unsynced; 13 20 }; 14 21 15 22 static int dev_addr_test_sync(struct net_device *netdev, const unsigned char *a) 16 23 { 17 24 struct dev_addr_test_priv *datp = netdev_priv(netdev); 18 25 19 - if (a[0] < 31 && !memchr_inv(a, a[0], ETH_ALEN)) 26 + if (a[0] < 31 && !memchr_inv(a, a[0], ETH_ALEN)) { 20 27 datp->addr_seen |= 1 << a[0]; 28 + datp->addr_synced |= 1 << a[0]; 29 + } 21 30 return 0; 22 31 } 23 32 ··· 35 26 { 36 27 struct dev_addr_test_priv *datp = netdev_priv(netdev); 37 28 38 - if (a[0] < 31 && !memchr_inv(a, a[0], ETH_ALEN)) 29 + if (a[0] < 31 && !memchr_inv(a, a[0], ETH_ALEN)) { 39 30 datp->addr_seen &= ~(1 << a[0]); 31 + datp->addr_unsynced |= 1 << a[0]; 32 + } 40 33 return 0; 34 + } 35 + 36 + static void dev_addr_test_reset(struct net_device *netdev) 37 + { 38 + struct dev_addr_test_priv *datp = netdev_priv(netdev); 39 + 40 + datp->addr_seen = 0; 41 + datp->addr_synced = 0; 42 + datp->addr_unsynced = 0; 41 43 } 42 44 43 45 static int dev_addr_test_init(struct kunit *test) ··· 245 225 rtnl_unlock(); 246 226 } 247 227 228 + /* Snapshot test: basic sync with no concurrent modifications. 229 + * Add one address, snapshot, driver syncs it, reconcile propagates 230 + * sync_cnt delta back to real list. 231 + */ 232 + static void dev_addr_test_snapshot_sync(struct kunit *test) 233 + { 234 + struct netdev_hw_addr_list snap, ref, cache; 235 + struct net_device *netdev = test->priv; 236 + struct dev_addr_test_priv *datp; 237 + struct netdev_hw_addr *ha; 238 + u8 addr[ETH_ALEN]; 239 + 240 + datp = netdev_priv(netdev); 241 + 242 + rtnl_lock(); 243 + 244 + memset(addr, ADDR_A, sizeof(addr)); 245 + KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr)); 246 + 247 + /* Snapshot: ADDR_A has sync_cnt=0, refcount=1 (new) */ 248 + netif_addr_lock_bh(netdev); 249 + __hw_addr_init(&snap); 250 + __hw_addr_init(&ref); 251 + __hw_addr_init(&cache); 252 + KUNIT_EXPECT_EQ(test, 0, 253 + __hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN, 254 + &cache)); 255 + KUNIT_EXPECT_EQ(test, 0, 256 + __hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN, 257 + &cache)); 258 + netif_addr_unlock_bh(netdev); 259 + 260 + /* Driver syncs ADDR_A to hardware */ 261 + dev_addr_test_reset(netdev); 262 + __hw_addr_sync_dev(&snap, netdev, dev_addr_test_sync, 263 + dev_addr_test_unsync); 264 + KUNIT_EXPECT_EQ(test, 1 << ADDR_A, datp->addr_synced); 265 + KUNIT_EXPECT_EQ(test, 0, datp->addr_unsynced); 266 + 267 + /* Reconcile: delta=+1 applied to real entry */ 268 + netif_addr_lock_bh(netdev); 269 + __hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN, 270 + &cache); 271 + netif_addr_unlock_bh(netdev); 272 + 273 + /* Real entry should now reflect the sync: sync_cnt=1, refcount=2 */ 274 + KUNIT_EXPECT_EQ(test, 1, netdev->uc.count); 275 + ha = list_first_entry(&netdev->uc.list, struct netdev_hw_addr, list); 276 + KUNIT_EXPECT_MEMEQ(test, ha->addr, addr, ETH_ALEN); 277 + KUNIT_EXPECT_EQ(test, 1, ha->sync_cnt); 278 + KUNIT_EXPECT_EQ(test, 2, ha->refcount); 279 + 280 + /* Second work run: already synced, nothing to do */ 281 + dev_addr_test_reset(netdev); 282 + __hw_addr_sync_dev(&netdev->uc, netdev, dev_addr_test_sync, 283 + dev_addr_test_unsync); 284 + KUNIT_EXPECT_EQ(test, 0, datp->addr_synced); 285 + KUNIT_EXPECT_EQ(test, 0, datp->addr_unsynced); 286 + KUNIT_EXPECT_EQ(test, 1, netdev->uc.count); 287 + 288 + __hw_addr_flush(&cache); 289 + rtnl_unlock(); 290 + } 291 + 292 + /* Snapshot test: ADDR_A synced to hardware, then concurrently removed 293 + * from the real list before reconcile runs. Reconcile re-inserts ADDR_A as 294 + * a stale entry so the next work run unsyncs it from hardware. 295 + */ 296 + static void dev_addr_test_snapshot_remove_during_sync(struct kunit *test) 297 + { 298 + struct netdev_hw_addr_list snap, ref, cache; 299 + struct net_device *netdev = test->priv; 300 + struct dev_addr_test_priv *datp; 301 + struct netdev_hw_addr *ha; 302 + u8 addr[ETH_ALEN]; 303 + 304 + datp = netdev_priv(netdev); 305 + 306 + rtnl_lock(); 307 + 308 + memset(addr, ADDR_A, sizeof(addr)); 309 + KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr)); 310 + 311 + /* Snapshot: ADDR_A is new (sync_cnt=0, refcount=1) */ 312 + netif_addr_lock_bh(netdev); 313 + __hw_addr_init(&snap); 314 + __hw_addr_init(&ref); 315 + __hw_addr_init(&cache); 316 + KUNIT_EXPECT_EQ(test, 0, 317 + __hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN, 318 + &cache)); 319 + KUNIT_EXPECT_EQ(test, 0, 320 + __hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN, 321 + &cache)); 322 + netif_addr_unlock_bh(netdev); 323 + 324 + /* Driver syncs ADDR_A to hardware */ 325 + dev_addr_test_reset(netdev); 326 + __hw_addr_sync_dev(&snap, netdev, dev_addr_test_sync, 327 + dev_addr_test_unsync); 328 + KUNIT_EXPECT_EQ(test, 1 << ADDR_A, datp->addr_synced); 329 + KUNIT_EXPECT_EQ(test, 0, datp->addr_unsynced); 330 + 331 + /* Concurrent removal: user deletes ADDR_A while driver was working */ 332 + memset(addr, ADDR_A, sizeof(addr)); 333 + KUNIT_EXPECT_EQ(test, 0, dev_uc_del(netdev, addr)); 334 + KUNIT_EXPECT_EQ(test, 0, netdev->uc.count); 335 + 336 + /* Reconcile: ADDR_A gone from real list but driver synced it, 337 + * so it gets re-inserted as stale (sync_cnt=1, refcount=1). 338 + */ 339 + netif_addr_lock_bh(netdev); 340 + __hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN, 341 + &cache); 342 + netif_addr_unlock_bh(netdev); 343 + 344 + KUNIT_EXPECT_EQ(test, 1, netdev->uc.count); 345 + ha = list_first_entry(&netdev->uc.list, struct netdev_hw_addr, list); 346 + KUNIT_EXPECT_MEMEQ(test, ha->addr, addr, ETH_ALEN); 347 + KUNIT_EXPECT_EQ(test, 1, ha->sync_cnt); 348 + KUNIT_EXPECT_EQ(test, 1, ha->refcount); 349 + 350 + /* Second work run: stale entry gets unsynced from HW and removed */ 351 + dev_addr_test_reset(netdev); 352 + __hw_addr_sync_dev(&netdev->uc, netdev, dev_addr_test_sync, 353 + dev_addr_test_unsync); 354 + KUNIT_EXPECT_EQ(test, 0, datp->addr_synced); 355 + KUNIT_EXPECT_EQ(test, 1 << ADDR_A, datp->addr_unsynced); 356 + KUNIT_EXPECT_EQ(test, 0, netdev->uc.count); 357 + 358 + __hw_addr_flush(&cache); 359 + rtnl_unlock(); 360 + } 361 + 362 + /* Snapshot test: ADDR_A was stale (unsynced from hardware by driver), 363 + * but concurrently re-added by the user. The re-add bumps refcount of 364 + * the existing stale entry. Reconcile applies delta=-1, leaving ADDR_A 365 + * as a fresh entry (sync_cnt=0, refcount=1) for the next work run. 366 + */ 367 + static void dev_addr_test_snapshot_readd_during_unsync(struct kunit *test) 368 + { 369 + struct netdev_hw_addr_list snap, ref, cache; 370 + struct net_device *netdev = test->priv; 371 + struct dev_addr_test_priv *datp; 372 + struct netdev_hw_addr *ha; 373 + u8 addr[ETH_ALEN]; 374 + 375 + datp = netdev_priv(netdev); 376 + 377 + rtnl_lock(); 378 + 379 + memset(addr, ADDR_A, sizeof(addr)); 380 + KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr)); 381 + 382 + /* Sync ADDR_A to hardware: sync_cnt=1, refcount=2 */ 383 + dev_addr_test_reset(netdev); 384 + __hw_addr_sync_dev(&netdev->uc, netdev, dev_addr_test_sync, 385 + dev_addr_test_unsync); 386 + KUNIT_EXPECT_EQ(test, 1 << ADDR_A, datp->addr_synced); 387 + KUNIT_EXPECT_EQ(test, 0, datp->addr_unsynced); 388 + 389 + /* User removes ADDR_A: refcount=1, sync_cnt=1 -> stale */ 390 + KUNIT_EXPECT_EQ(test, 0, dev_uc_del(netdev, addr)); 391 + 392 + /* Snapshot: ADDR_A is stale (sync_cnt=1, refcount=1) */ 393 + netif_addr_lock_bh(netdev); 394 + __hw_addr_init(&snap); 395 + __hw_addr_init(&ref); 396 + __hw_addr_init(&cache); 397 + KUNIT_EXPECT_EQ(test, 0, 398 + __hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN, 399 + &cache)); 400 + KUNIT_EXPECT_EQ(test, 0, 401 + __hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN, 402 + &cache)); 403 + netif_addr_unlock_bh(netdev); 404 + 405 + /* Driver unsyncs stale ADDR_A from hardware */ 406 + dev_addr_test_reset(netdev); 407 + __hw_addr_sync_dev(&snap, netdev, dev_addr_test_sync, 408 + dev_addr_test_unsync); 409 + KUNIT_EXPECT_EQ(test, 0, datp->addr_synced); 410 + KUNIT_EXPECT_EQ(test, 1 << ADDR_A, datp->addr_unsynced); 411 + 412 + /* Concurrent: user re-adds ADDR_A. dev_uc_add finds the existing 413 + * stale entry and bumps refcount from 1 -> 2. sync_cnt stays 1. 414 + */ 415 + KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr)); 416 + KUNIT_EXPECT_EQ(test, 1, netdev->uc.count); 417 + 418 + /* Reconcile: ref sync_cnt=1 matches real sync_cnt=1, delta=-1 419 + * applied. Result: sync_cnt=0, refcount=1 (fresh). 420 + */ 421 + netif_addr_lock_bh(netdev); 422 + __hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN, 423 + &cache); 424 + netif_addr_unlock_bh(netdev); 425 + 426 + /* Entry survives as fresh: needs re-sync to HW */ 427 + KUNIT_EXPECT_EQ(test, 1, netdev->uc.count); 428 + ha = list_first_entry(&netdev->uc.list, struct netdev_hw_addr, list); 429 + KUNIT_EXPECT_MEMEQ(test, ha->addr, addr, ETH_ALEN); 430 + KUNIT_EXPECT_EQ(test, 0, ha->sync_cnt); 431 + KUNIT_EXPECT_EQ(test, 1, ha->refcount); 432 + 433 + /* Second work run: fresh entry gets synced to HW */ 434 + dev_addr_test_reset(netdev); 435 + __hw_addr_sync_dev(&netdev->uc, netdev, dev_addr_test_sync, 436 + dev_addr_test_unsync); 437 + KUNIT_EXPECT_EQ(test, 1 << ADDR_A, datp->addr_synced); 438 + KUNIT_EXPECT_EQ(test, 0, datp->addr_unsynced); 439 + 440 + __hw_addr_flush(&cache); 441 + rtnl_unlock(); 442 + } 443 + 444 + /* Snapshot test: ADDR_A is new (synced by driver), and independent ADDR_B 445 + * is concurrently removed from the real list. A's sync delta propagates 446 + * normally; B's absence doesn't interfere. 447 + */ 448 + static void dev_addr_test_snapshot_add_and_remove(struct kunit *test) 449 + { 450 + struct netdev_hw_addr_list snap, ref, cache; 451 + struct net_device *netdev = test->priv; 452 + struct dev_addr_test_priv *datp; 453 + struct netdev_hw_addr *ha; 454 + u8 addr[ETH_ALEN]; 455 + 456 + datp = netdev_priv(netdev); 457 + 458 + rtnl_lock(); 459 + 460 + /* Add ADDR_A and ADDR_B (will be synced then removed) */ 461 + memset(addr, ADDR_A, sizeof(addr)); 462 + KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr)); 463 + memset(addr, ADDR_B, sizeof(addr)); 464 + KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr)); 465 + 466 + /* Sync both to hardware: sync_cnt=1, refcount=2 */ 467 + __hw_addr_sync_dev(&netdev->uc, netdev, dev_addr_test_sync, 468 + dev_addr_test_unsync); 469 + 470 + /* Add ADDR_C (new, will be synced by snapshot) */ 471 + memset(addr, ADDR_C, sizeof(addr)); 472 + KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr)); 473 + 474 + /* Snapshot: A,B synced (sync_cnt=1,refcount=2); C new (0,1) */ 475 + netif_addr_lock_bh(netdev); 476 + __hw_addr_init(&snap); 477 + __hw_addr_init(&ref); 478 + __hw_addr_init(&cache); 479 + KUNIT_EXPECT_EQ(test, 0, 480 + __hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN, 481 + &cache)); 482 + KUNIT_EXPECT_EQ(test, 0, 483 + __hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN, 484 + &cache)); 485 + netif_addr_unlock_bh(netdev); 486 + 487 + /* Driver syncs snapshot: ADDR_C is new -> synced; A,B already synced */ 488 + dev_addr_test_reset(netdev); 489 + __hw_addr_sync_dev(&snap, netdev, dev_addr_test_sync, 490 + dev_addr_test_unsync); 491 + KUNIT_EXPECT_EQ(test, 1 << ADDR_C, datp->addr_synced); 492 + KUNIT_EXPECT_EQ(test, 0, datp->addr_unsynced); 493 + 494 + /* Concurrent: user removes addr B while driver was working */ 495 + memset(addr, ADDR_B, sizeof(addr)); 496 + KUNIT_EXPECT_EQ(test, 0, dev_uc_del(netdev, addr)); 497 + 498 + /* Reconcile: ADDR_C's delta=+1 applied to real list. 499 + * ADDR_B's delta=0 (unchanged in snapshot), 500 + * so nothing to apply to ADDR_B. 501 + */ 502 + netif_addr_lock_bh(netdev); 503 + __hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN, 504 + &cache); 505 + netif_addr_unlock_bh(netdev); 506 + 507 + /* ADDR_A: unchanged (sync_cnt=1, refcount=2) 508 + * ADDR_B: refcount went from 2->1 via dev_uc_del (still present, stale) 509 + * ADDR_C: sync propagated (sync_cnt=1, refcount=2) 510 + */ 511 + KUNIT_EXPECT_EQ(test, 3, netdev->uc.count); 512 + netdev_hw_addr_list_for_each(ha, &netdev->uc) { 513 + u8 id = ha->addr[0]; 514 + 515 + if (!memchr_inv(ha->addr, id, ETH_ALEN)) { 516 + if (id == ADDR_A) { 517 + KUNIT_EXPECT_EQ(test, 1, ha->sync_cnt); 518 + KUNIT_EXPECT_EQ(test, 2, ha->refcount); 519 + } else if (id == ADDR_B) { 520 + /* B: still present but now stale */ 521 + KUNIT_EXPECT_EQ(test, 1, ha->sync_cnt); 522 + KUNIT_EXPECT_EQ(test, 1, ha->refcount); 523 + } else if (id == ADDR_C) { 524 + KUNIT_EXPECT_EQ(test, 1, ha->sync_cnt); 525 + KUNIT_EXPECT_EQ(test, 2, ha->refcount); 526 + } 527 + } 528 + } 529 + 530 + /* Second work run: ADDR_B is stale, gets unsynced and removed */ 531 + dev_addr_test_reset(netdev); 532 + __hw_addr_sync_dev(&netdev->uc, netdev, dev_addr_test_sync, 533 + dev_addr_test_unsync); 534 + KUNIT_EXPECT_EQ(test, 0, datp->addr_synced); 535 + KUNIT_EXPECT_EQ(test, 1 << ADDR_B, datp->addr_unsynced); 536 + KUNIT_EXPECT_EQ(test, 2, netdev->uc.count); 537 + 538 + __hw_addr_flush(&cache); 539 + rtnl_unlock(); 540 + } 541 + 542 + static void dev_addr_test_snapshot_benchmark(struct kunit *test) 543 + { 544 + struct net_device *netdev = test->priv; 545 + struct netdev_hw_addr_list snap, cache; 546 + u8 addr[ETH_ALEN]; 547 + s64 duration = 0; 548 + ktime_t start; 549 + int i, iter; 550 + 551 + rtnl_lock(); 552 + 553 + for (i = 0; i < 1024; i++) { 554 + memset(addr, 0, sizeof(addr)); 555 + addr[0] = (i >> 8) & 0xff; 556 + addr[1] = i & 0xff; 557 + KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr)); 558 + } 559 + 560 + __hw_addr_init(&cache); 561 + 562 + for (iter = 0; iter < 1000; iter++) { 563 + netif_addr_lock_bh(netdev); 564 + __hw_addr_init(&snap); 565 + 566 + start = ktime_get(); 567 + KUNIT_EXPECT_EQ(test, 0, 568 + __hw_addr_list_snapshot(&snap, &netdev->uc, 569 + ETH_ALEN, &cache)); 570 + duration += ktime_to_ns(ktime_sub(ktime_get(), start)); 571 + 572 + netif_addr_unlock_bh(netdev); 573 + __hw_addr_flush(&snap); 574 + } 575 + 576 + __hw_addr_flush(&cache); 577 + 578 + kunit_info(test, 579 + "1024 addrs x 1000 snapshots: %lld ns total, %lld ns/iter", 580 + duration, div_s64(duration, 1000)); 581 + 582 + rtnl_unlock(); 583 + } 584 + 248 585 static struct kunit_case dev_addr_test_cases[] = { 249 586 KUNIT_CASE(dev_addr_test_basic), 250 587 KUNIT_CASE(dev_addr_test_sync_one), ··· 609 232 KUNIT_CASE(dev_addr_test_del_main), 610 233 KUNIT_CASE(dev_addr_test_add_set), 611 234 KUNIT_CASE(dev_addr_test_add_excl), 235 + KUNIT_CASE(dev_addr_test_snapshot_sync), 236 + KUNIT_CASE(dev_addr_test_snapshot_remove_during_sync), 237 + KUNIT_CASE(dev_addr_test_snapshot_readd_during_unsync), 238 + KUNIT_CASE(dev_addr_test_snapshot_add_and_remove), 239 + KUNIT_CASE_SLOW(dev_addr_test_snapshot_benchmark), 612 240 {} 613 241 }; 614 242 ··· 625 243 }; 626 244 kunit_test_suite(dev_addr_test_suite); 627 245 246 + MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING"); 628 247 MODULE_DESCRIPTION("KUnit tests for struct netdev_hw_addr_list"); 629 248 MODULE_LICENSE("GPL");

+3

net/core/dev_api.c

··· 66 66 67 67 netdev_lock_ops(dev); 68 68 ret = netif_change_flags(dev, flags, extack); 69 + netif_rx_mode_sync(dev); 69 70 netdev_unlock_ops(dev); 70 71 71 72 return ret; ··· 286 285 287 286 netdev_lock_ops(dev); 288 287 ret = netif_set_promiscuity(dev, inc); 288 + netif_rx_mode_sync(dev); 289 289 netdev_unlock_ops(dev); 290 290 291 291 return ret; ··· 313 311 314 312 netdev_lock_ops(dev); 315 313 ret = netif_set_allmulti(dev, inc, true); 314 + netif_rx_mode_sync(dev); 316 315 netdev_unlock_ops(dev); 317 316 318 317 return ret;

+4 -2

net/core/dev_ioctl.c

··· 586 586 return err; 587 587 588 588 case SIOCADDMULTI: 589 - if (!ops->ndo_set_rx_mode || 589 + if ((!ops->ndo_set_rx_mode && !ops->ndo_set_rx_mode_async) || 590 590 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) 591 591 return -EINVAL; 592 592 if (!netif_device_present(dev)) 593 593 return -ENODEV; 594 594 netdev_lock_ops(dev); 595 595 err = dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); 596 + netif_rx_mode_sync(dev); 596 597 netdev_unlock_ops(dev); 597 598 return err; 598 599 599 600 case SIOCDELMULTI: 600 - if (!ops->ndo_set_rx_mode || 601 + if ((!ops->ndo_set_rx_mode && !ops->ndo_set_rx_mode_async) || 601 602 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) 602 603 return -EINVAL; 603 604 if (!netif_device_present(dev)) 604 605 return -ENODEV; 605 606 netdev_lock_ops(dev); 606 607 err = dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); 608 + netif_rx_mode_sync(dev); 607 609 netdev_unlock_ops(dev); 608 610 return err; 609 611

+1 -1

net/core/filter.c

··· 5396 5396 if (val <= 0) 5397 5397 return -EINVAL; 5398 5398 tp->snd_cwnd_clamp = val; 5399 - tp->snd_ssthresh = val; 5399 + WRITE_ONCE(tp->snd_ssthresh, val); 5400 5400 break; 5401 5401 case TCP_BPF_DELACK_MAX: 5402 5402 timeout = usecs_to_jiffies(val);

+5 -8

net/core/flow_dissector.c

··· 1374 1374 break; 1375 1375 } 1376 1376 1377 - /* least significant bit of the most significant octet 1378 - * indicates if protocol field was compressed 1377 + /* PFC (compressed 1-byte protocol) frames are not processed. 1378 + * A compressed protocol field has the least significant bit of 1379 + * the most significant octet set, which will fail the following 1380 + * ppp_proto_is_valid(), returning FLOW_DISSECT_RET_OUT_BAD. 1379 1381 */ 1380 1382 ppp_proto = ntohs(hdr->proto); 1381 - if (ppp_proto & 0x0100) { 1382 - ppp_proto = ppp_proto >> 8; 1383 - nhoff += PPPOE_SES_HLEN - 1; 1384 - } else { 1385 - nhoff += PPPOE_SES_HLEN; 1386 - } 1383 + nhoff += PPPOE_SES_HLEN; 1387 1384 1388 1385 if (ppp_proto == PPP_IP) { 1389 1386 proto = htons(ETH_P_IP);

+1

net/core/rtnetlink.c

··· 3431 3431 dev->name); 3432 3432 } 3433 3433 3434 + netif_rx_mode_sync(dev); 3434 3435 netdev_unlock_ops(dev); 3435 3436 3436 3437 return err;

+1 -15

net/dsa/conduit.c

··· 27 27 int len; 28 28 29 29 if (ops && ops->get_regs_len) { 30 - netdev_lock_ops(dev); 31 30 len = ops->get_regs_len(dev); 32 - netdev_unlock_ops(dev); 33 31 if (len < 0) 34 32 return len; 35 33 ret += len; ··· 58 60 int len; 59 61 60 62 if (ops && ops->get_regs_len && ops->get_regs) { 61 - netdev_lock_ops(dev); 62 63 len = ops->get_regs_len(dev); 63 - if (len < 0) { 64 - netdev_unlock_ops(dev); 64 + if (len < 0) 65 65 return; 66 - } 67 66 regs->len = len; 68 67 ops->get_regs(dev, regs, data); 69 - netdev_unlock_ops(dev); 70 68 data += regs->len; 71 69 } 72 70 ··· 109 115 int count, mcount = 0; 110 116 111 117 if (ops && ops->get_sset_count && ops->get_ethtool_stats) { 112 - netdev_lock_ops(dev); 113 118 mcount = ops->get_sset_count(dev, ETH_SS_STATS); 114 119 ops->get_ethtool_stats(dev, stats, data); 115 - netdev_unlock_ops(dev); 116 120 } 117 121 118 122 list_for_each_entry(dp, &dst->ports, list) { ··· 141 149 if (count >= 0) 142 150 phy_ethtool_get_stats(dev->phydev, stats, data); 143 151 } else if (ops && ops->get_sset_count && ops->get_ethtool_phy_stats) { 144 - netdev_lock_ops(dev); 145 152 count = ops->get_sset_count(dev, ETH_SS_PHY_STATS); 146 153 ops->get_ethtool_phy_stats(dev, stats, data); 147 - netdev_unlock_ops(dev); 148 154 } 149 155 150 156 if (count < 0) ··· 166 176 struct dsa_switch_tree *dst = cpu_dp->dst; 167 177 int count = 0; 168 178 169 - netdev_lock_ops(dev); 170 179 if (sset == ETH_SS_PHY_STATS && dev->phydev && 171 180 (!ops || !ops->get_ethtool_phy_stats)) 172 181 count = phy_ethtool_get_sset_count(dev->phydev); 173 182 else if (ops && ops->get_sset_count) 174 183 count = ops->get_sset_count(dev, sset); 175 - netdev_unlock_ops(dev); 176 184 177 185 if (count < 0) 178 186 count = 0; ··· 227 239 struct dsa_switch_tree *dst = cpu_dp->dst; 228 240 int count, mcount = 0; 229 241 230 - netdev_lock_ops(dev); 231 242 if (stringset == ETH_SS_PHY_STATS && dev->phydev && 232 243 !ops->get_ethtool_phy_stats) { 233 244 mcount = phy_ethtool_get_sset_count(dev->phydev); ··· 240 253 mcount = 0; 241 254 ops->get_strings(dev, stringset, data); 242 255 } 243 - netdev_unlock_ops(dev); 244 256 245 257 list_for_each_entry(dp, &dst->ports, list) { 246 258 if (!dsa_port_is_dsa(dp) && !dsa_port_is_cpu(dp))

+4 -1

net/ipv4/icmp.c

··· 64 64 #include <linux/jiffies.h> 65 65 #include <linux/kernel.h> 66 66 #include <linux/fcntl.h> 67 + #include <linux/nospec.h> 67 68 #include <linux/socket.h> 68 69 #include <linux/in.h> 69 70 #include <linux/inet.h> ··· 372 371 to, len); 373 372 374 373 skb->csum = csum_block_add(skb->csum, csum, odd); 375 - if (icmp_pointers[icmp_param->data.icmph.type].error) 374 + if (icmp_param->data.icmph.type <= NR_ICMP_TYPES && 375 + icmp_pointers[array_index_nospec(icmp_param->data.icmph.type, 376 + NR_ICMP_TYPES + 1)].error) 376 377 nf_ct_attach(skb, icmp_param->skb); 377 378 return 0; 378 379 }

+3

net/ipv4/inet_connection_sock.c

··· 1479 1479 if (nreq) { 1480 1480 refcount_set(&nreq->rsk_refcnt, 1); 1481 1481 1482 + rcu_read_lock(); 1482 1483 if (inet_csk_reqsk_queue_add(nsk, nreq, child)) { 1483 1484 __NET_INC_STATS(sock_net(nsk), 1484 1485 LINUX_MIB_TCPMIGRATEREQSUCCESS); 1485 1486 reqsk_migrate_reset(req); 1487 + READ_ONCE(nsk->sk_data_ready)(nsk); 1486 1488 } else { 1487 1489 __NET_INC_STATS(sock_net(nsk), 1488 1490 LINUX_MIB_TCPMIGRATEREQFAILURE); 1489 1491 reqsk_migrate_reset(nreq); 1490 1492 __reqsk_free(nreq); 1491 1493 } 1494 + rcu_read_unlock(); 1492 1495 1493 1496 /* inet_csk_reqsk_queue_add() has already 1494 1497 * called inet_child_forget() on failure case.

+2 -2

net/ipv4/netfilter/iptable_nat.c

··· 79 79 while (i) 80 80 nf_nat_ipv4_unregister_fn(net, &ops[--i]); 81 81 82 - kfree(ops); 82 + kfree_rcu(ops, rcu); 83 83 return ret; 84 84 } 85 85 } ··· 100 100 for (i = 0; i < ARRAY_SIZE(nf_nat_ipv4_ops); i++) 101 101 nf_nat_ipv4_unregister_fn(net, &ops[i]); 102 102 103 - kfree(ops); 103 + kfree_rcu(ops, rcu); 104 104 } 105 105 106 106 static int iptable_nat_table_init(struct net *net)

+2 -2

net/ipv4/nexthop.c

··· 2469 2469 goto err_notify; 2470 2470 } 2471 2471 2472 - /* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially 2472 + /* When replacing a nexthop with one of a different family, potentially 2473 2473 * update IPv4 indication in all the groups using the nexthop. 2474 2474 */ 2475 - if (oldi->family == AF_INET && newi->family == AF_INET6) { 2475 + if (oldi->family != newi->family) { 2476 2476 list_for_each_entry(nhge, &old->grp_list, nh_list) { 2477 2477 struct nexthop *nhp = nhge->nh_parent; 2478 2478 struct nh_group *nhg;

+37 -25

net/ipv4/tcp.c

··· 3424 3424 icsk->icsk_rto = TCP_TIMEOUT_INIT; 3425 3425 WRITE_ONCE(icsk->icsk_rto_min, TCP_RTO_MIN); 3426 3426 WRITE_ONCE(icsk->icsk_delack_max, TCP_DELACK_MAX); 3427 - tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 3427 + WRITE_ONCE(tp->snd_ssthresh, TCP_INFINITE_SSTHRESH); 3428 3428 tcp_snd_cwnd_set(tp, TCP_INIT_CWND); 3429 3429 tp->snd_cwnd_cnt = 0; 3430 3430 tp->is_cwnd_limited = 0; ··· 3622 3622 if (delta && sk->sk_state == TCP_ESTABLISHED) { 3623 3623 s64 srtt = (s64)tp->srtt_us + delta; 3624 3624 3625 - tp->srtt_us = clamp_t(s64, srtt, 1, ~0U); 3625 + WRITE_ONCE(tp->srtt_us, 3626 + clamp_t(s64, srtt, 1, ~0U)); 3626 3627 3627 3628 /* Note: does not deal with non zero icsk_backoff */ 3628 3629 tcp_set_rto(sk); ··· 4191 4190 struct tcp_info *info) 4192 4191 { 4193 4192 u64 stats[__TCP_CHRONO_MAX], total = 0; 4194 - enum tcp_chrono i; 4193 + enum tcp_chrono i, cur; 4195 4194 4195 + /* Following READ_ONCE()s pair with WRITE_ONCE()s in tcp_chrono_set(). 4196 + * This is because socket lock might not be owned by us at this point. 4197 + * This is best effort, tcp_get_timestamping_opt_stats() can 4198 + * see wrong values. A real fix would be too costly for TCP fast path. 4199 + */ 4200 + cur = READ_ONCE(tp->chrono_type); 4196 4201 for (i = TCP_CHRONO_BUSY; i < __TCP_CHRONO_MAX; ++i) { 4197 - stats[i] = tp->chrono_stat[i - 1]; 4198 - if (i == tp->chrono_type) 4199 - stats[i] += tcp_jiffies32 - tp->chrono_start; 4202 + stats[i] = READ_ONCE(tp->chrono_stat[i - 1]); 4203 + if (i == cur) 4204 + stats[i] += tcp_jiffies32 - READ_ONCE(tp->chrono_start); 4200 4205 stats[i] *= USEC_PER_SEC / HZ; 4201 4206 total += stats[i]; 4202 4207 } ··· 4434 4427 nla_put_u64_64bit(stats, TCP_NLA_SNDBUF_LIMITED, 4435 4428 info.tcpi_sndbuf_limited, TCP_NLA_PAD); 4436 4429 nla_put_u64_64bit(stats, TCP_NLA_DATA_SEGS_OUT, 4437 - tp->data_segs_out, TCP_NLA_PAD); 4430 + READ_ONCE(tp->data_segs_out), TCP_NLA_PAD); 4438 4431 nla_put_u64_64bit(stats, TCP_NLA_TOTAL_RETRANS, 4439 - tp->total_retrans, TCP_NLA_PAD); 4432 + READ_ONCE(tp->total_retrans), TCP_NLA_PAD); 4440 4433 4441 4434 rate = READ_ONCE(sk->sk_pacing_rate); 4442 4435 rate64 = (rate != ~0UL) ? rate : ~0ULL; ··· 4445 4438 rate64 = tcp_compute_delivery_rate(tp); 4446 4439 nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD); 4447 4440 4448 - nla_put_u32(stats, TCP_NLA_SND_CWND, tcp_snd_cwnd(tp)); 4449 - nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering); 4450 - nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp)); 4441 + nla_put_u32(stats, TCP_NLA_SND_CWND, READ_ONCE(tp->snd_cwnd)); 4442 + nla_put_u32(stats, TCP_NLA_REORDERING, READ_ONCE(tp->reordering)); 4443 + nla_put_u32(stats, TCP_NLA_MIN_RTT, data_race(tcp_min_rtt(tp))); 4451 4444 4452 4445 nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, 4453 4446 READ_ONCE(inet_csk(sk)->icsk_retransmits)); 4454 - nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited); 4455 - nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh); 4456 - nla_put_u32(stats, TCP_NLA_DELIVERED, tp->delivered); 4457 - nla_put_u32(stats, TCP_NLA_DELIVERED_CE, tp->delivered_ce); 4447 + nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, data_race(!!tp->rate_app_limited)); 4448 + nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, READ_ONCE(tp->snd_ssthresh)); 4449 + nla_put_u32(stats, TCP_NLA_DELIVERED, READ_ONCE(tp->delivered)); 4450 + nla_put_u32(stats, TCP_NLA_DELIVERED_CE, READ_ONCE(tp->delivered_ce)); 4458 4451 4459 - nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una); 4452 + nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, 4453 + max_t(int, 0, 4454 + READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_una))); 4460 4455 nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state); 4461 4456 4462 - nla_put_u64_64bit(stats, TCP_NLA_BYTES_SENT, tp->bytes_sent, 4457 + nla_put_u64_64bit(stats, TCP_NLA_BYTES_SENT, READ_ONCE(tp->bytes_sent), 4463 4458 TCP_NLA_PAD); 4464 - nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, tp->bytes_retrans, 4465 - TCP_NLA_PAD); 4466 - nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups); 4467 - nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen); 4468 - nla_put_u32(stats, TCP_NLA_SRTT, tp->srtt_us >> 3); 4469 - nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, tp->timeout_rehash); 4459 + nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, 4460 + READ_ONCE(tp->bytes_retrans), TCP_NLA_PAD); 4461 + nla_put_u32(stats, TCP_NLA_DSACK_DUPS, READ_ONCE(tp->dsack_dups)); 4462 + nla_put_u32(stats, TCP_NLA_REORD_SEEN, READ_ONCE(tp->reord_seen)); 4463 + nla_put_u32(stats, TCP_NLA_SRTT, READ_ONCE(tp->srtt_us) >> 3); 4464 + nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, 4465 + READ_ONCE(tp->timeout_rehash)); 4470 4466 nla_put_u32(stats, TCP_NLA_BYTES_NOTSENT, 4471 - max_t(int, 0, tp->write_seq - tp->snd_nxt)); 4467 + max_t(int, 0, 4468 + READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt))); 4472 4469 nla_put_u64_64bit(stats, TCP_NLA_EDT, orig_skb->skb_mstamp_ns, 4473 4470 TCP_NLA_PAD); 4474 4471 if (ack_skb) 4475 4472 nla_put_u8(stats, TCP_NLA_TTL, 4476 4473 tcp_skb_ttl_or_hop_limit(ack_skb)); 4477 4474 4478 - nla_put_u32(stats, TCP_NLA_REHASH, tp->plb_rehash + tp->timeout_rehash); 4475 + nla_put_u32(stats, TCP_NLA_REHASH, 4476 + READ_ONCE(tp->plb_rehash) + READ_ONCE(tp->timeout_rehash)); 4479 4477 return stats; 4480 4478 } 4481 4479

+3 -3

net/ipv4/tcp_bbr.c

··· 897 897 898 898 if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { 899 899 bbr->mode = BBR_DRAIN; /* drain queue we created */ 900 - tcp_sk(sk)->snd_ssthresh = 901 - bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); 900 + WRITE_ONCE(tcp_sk(sk)->snd_ssthresh, 901 + bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)); 902 902 } /* fall through to check if in-flight is already small: */ 903 903 if (bbr->mode == BBR_DRAIN && 904 904 bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <= ··· 1043 1043 struct bbr *bbr = inet_csk_ca(sk); 1044 1044 1045 1045 bbr->prior_cwnd = 0; 1046 - tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 1046 + WRITE_ONCE(tp->snd_ssthresh, TCP_INFINITE_SSTHRESH); 1047 1047 bbr->rtt_cnt = 0; 1048 1048 bbr->next_rtt_delivered = tp->delivered; 1049 1049 bbr->prev_ca_state = TCP_CA_Open;

+1 -1

net/ipv4/tcp_bic.c

··· 74 74 bictcp_reset(ca); 75 75 76 76 if (initial_ssthresh) 77 - tcp_sk(sk)->snd_ssthresh = initial_ssthresh; 77 + WRITE_ONCE(tcp_sk(sk)->snd_ssthresh, initial_ssthresh); 78 78 } 79 79 80 80 /*

+2 -2

net/ipv4/tcp_cdg.c

··· 162 162 NET_ADD_STATS(sock_net(sk), 163 163 LINUX_MIB_TCPHYSTARTTRAINCWND, 164 164 tcp_snd_cwnd(tp)); 165 - tp->snd_ssthresh = tcp_snd_cwnd(tp); 165 + WRITE_ONCE(tp->snd_ssthresh, tcp_snd_cwnd(tp)); 166 166 return; 167 167 } 168 168 } ··· 181 181 NET_ADD_STATS(sock_net(sk), 182 182 LINUX_MIB_TCPHYSTARTDELAYCWND, 183 183 tcp_snd_cwnd(tp)); 184 - tp->snd_ssthresh = tcp_snd_cwnd(tp); 184 + WRITE_ONCE(tp->snd_ssthresh, tcp_snd_cwnd(tp)); 185 185 } 186 186 } 187 187 }

+3 -3

net/ipv4/tcp_cubic.c

··· 136 136 bictcp_hystart_reset(sk); 137 137 138 138 if (!hystart && initial_ssthresh) 139 - tcp_sk(sk)->snd_ssthresh = initial_ssthresh; 139 + WRITE_ONCE(tcp_sk(sk)->snd_ssthresh, initial_ssthresh); 140 140 } 141 141 142 142 __bpf_kfunc static void cubictcp_cwnd_event_tx_start(struct sock *sk) ··· 420 420 NET_ADD_STATS(sock_net(sk), 421 421 LINUX_MIB_TCPHYSTARTTRAINCWND, 422 422 tcp_snd_cwnd(tp)); 423 - tp->snd_ssthresh = tcp_snd_cwnd(tp); 423 + WRITE_ONCE(tp->snd_ssthresh, tcp_snd_cwnd(tp)); 424 424 } 425 425 } 426 426 } ··· 440 440 NET_ADD_STATS(sock_net(sk), 441 441 LINUX_MIB_TCPHYSTARTDELAYCWND, 442 442 tcp_snd_cwnd(tp)); 443 - tp->snd_ssthresh = tcp_snd_cwnd(tp); 443 + WRITE_ONCE(tp->snd_ssthresh, tcp_snd_cwnd(tp)); 444 444 } 445 445 } 446 446 }

+1 -1

net/ipv4/tcp_dctcp.c

··· 177 177 struct tcp_sock *tp = tcp_sk(sk); 178 178 179 179 ca->loss_cwnd = tcp_snd_cwnd(tp); 180 - tp->snd_ssthresh = max(tcp_snd_cwnd(tp) >> 1U, 2U); 180 + WRITE_ONCE(tp->snd_ssthresh, max(tcp_snd_cwnd(tp) >> 1U, 2U)); 181 181 } 182 182 183 183 __bpf_kfunc static void dctcp_state(struct sock *sk, u8 new_state)

+29 -23

net/ipv4/tcp_input.c

··· 476 476 477 477 static void tcp_count_delivered_ce(struct tcp_sock *tp, u32 ecn_count) 478 478 { 479 - tp->delivered_ce += ecn_count; 479 + WRITE_ONCE(tp->delivered_ce, tp->delivered_ce + ecn_count); 480 480 } 481 481 482 482 /* Updates the delivered and delivered_ce counts */ 483 483 static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered, 484 484 bool ece_ack) 485 485 { 486 - tp->delivered += delivered; 486 + WRITE_ONCE(tp->delivered, tp->delivered + delivered); 487 487 if (tcp_ecn_mode_rfc3168(tp) && ece_ack) 488 488 tcp_count_delivered_ce(tp, delivered); 489 489 } ··· 1132 1132 1133 1133 tcp_bpf_rtt(sk, mrtt_us, srtt); 1134 1134 } 1135 - tp->srtt_us = max(1U, srtt); 1135 + WRITE_ONCE(tp->srtt_us, max(1U, srtt)); 1136 1136 } 1137 1137 1138 1138 void tcp_update_pacing_rate(struct sock *sk) ··· 1246 1246 else if (tp->tlp_high_seq && tp->tlp_high_seq == end_seq) 1247 1247 state->flag |= FLAG_DSACK_TLP; 1248 1248 1249 - tp->dsack_dups += dup_segs; 1249 + WRITE_ONCE(tp->dsack_dups, tp->dsack_dups + dup_segs); 1250 1250 /* Skip the DSACK if dup segs weren't retransmitted by sender */ 1251 1251 if (tp->dsack_dups > tp->total_retrans) 1252 1252 return 0; ··· 1293 1293 tp->sacked_out, 1294 1294 tp->undo_marker ? tp->undo_retrans : 0); 1295 1295 #endif 1296 - tp->reordering = min_t(u32, (metric + mss - 1) / mss, 1297 - READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)); 1296 + WRITE_ONCE(tp->reordering, 1297 + min_t(u32, (metric + mss - 1) / mss, 1298 + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering))); 1298 1299 } 1299 1300 1300 1301 /* This exciting event is worth to be remembered. 8) */ 1301 - tp->reord_seen++; 1302 + WRITE_ONCE(tp->reord_seen, tp->reord_seen + 1); 1302 1303 NET_INC_STATS(sock_net(sk), 1303 1304 ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER); 1304 1305 } ··· 2440 2439 if (!tcp_limit_reno_sacked(tp)) 2441 2440 return; 2442 2441 2443 - tp->reordering = min_t(u32, tp->packets_out + addend, 2444 - READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)); 2445 - tp->reord_seen++; 2442 + WRITE_ONCE(tp->reordering, 2443 + min_t(u32, tp->packets_out + addend, 2444 + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering))); 2445 + WRITE_ONCE(tp->reord_seen, tp->reord_seen + 1); 2446 2446 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER); 2447 2447 } 2448 2448 ··· 2567 2565 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { 2568 2566 tp->prior_ssthresh = tcp_current_ssthresh(sk); 2569 2567 tp->prior_cwnd = tcp_snd_cwnd(tp); 2570 - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); 2568 + WRITE_ONCE(tp->snd_ssthresh, icsk->icsk_ca_ops->ssthresh(sk)); 2571 2569 tcp_ca_event(sk, CA_EVENT_LOSS); 2572 2570 tcp_init_undo(tp); 2573 2571 } ··· 2581 2579 reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering); 2582 2580 if (icsk->icsk_ca_state <= TCP_CA_Disorder && 2583 2581 tp->sacked_out >= reordering) 2584 - tp->reordering = min_t(unsigned int, tp->reordering, 2585 - reordering); 2582 + WRITE_ONCE(tp->reordering, 2583 + min_t(unsigned int, tp->reordering, reordering)); 2586 2584 2587 2585 tcp_set_ca_state(sk, TCP_CA_Loss); 2588 2586 tp->high_seq = tp->snd_nxt; ··· 2860 2858 tcp_snd_cwnd_set(tp, icsk->icsk_ca_ops->undo_cwnd(sk)); 2861 2859 2862 2860 if (tp->prior_ssthresh > tp->snd_ssthresh) { 2863 - tp->snd_ssthresh = tp->prior_ssthresh; 2861 + WRITE_ONCE(tp->snd_ssthresh, tp->prior_ssthresh); 2864 2862 tcp_ecn_withdraw_cwr(tp); 2865 2863 } 2866 2864 } ··· 2978 2976 tp->prior_cwnd = tcp_snd_cwnd(tp); 2979 2977 tp->prr_delivered = 0; 2980 2978 tp->prr_out = 0; 2981 - tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); 2979 + WRITE_ONCE(tp->snd_ssthresh, inet_csk(sk)->icsk_ca_ops->ssthresh(sk)); 2982 2980 tcp_ecn_queue_cwr(tp); 2983 2981 } 2984 2982 ··· 3120 3118 3121 3119 if (icsk->icsk_ca_state != TCP_CA_Loss) { 3122 3120 tp->high_seq = tp->snd_nxt; 3123 - tp->snd_ssthresh = tcp_current_ssthresh(sk); 3121 + WRITE_ONCE(tp->snd_ssthresh, tcp_current_ssthresh(sk)); 3124 3122 tp->prior_ssthresh = 0; 3125 3123 tp->undo_marker = 0; 3126 3124 tcp_set_ca_state(sk, TCP_CA_Loss); ··· 3912 3910 sock_owned_by_me((struct sock *)tp); 3913 3911 tp->bytes_acked += delta; 3914 3912 tcp_snd_sne_update(tp, ack); 3915 - tp->snd_una = ack; 3913 + WRITE_ONCE(tp->snd_una, ack); 3916 3914 } 3917 3915 3918 3916 static void tcp_rcv_sne_update(struct tcp_sock *tp, u32 seq) ··· 4286 4284 goto old_ack; 4287 4285 } 4288 4286 4289 - /* If the ack includes data we haven't sent yet, discard 4290 - * this segment (RFC793 Section 3.9). 4287 + /* If the ack includes data we haven't sent yet, drop the 4288 + * segment. RFC 793 Section 3.9 and RFC 5961 Section 5.2 4289 + * require us to send an ACK back in that case. 4291 4290 */ 4292 - if (after(ack, tp->snd_nxt)) 4291 + if (after(ack, tp->snd_nxt)) { 4292 + if (!(flag & FLAG_NO_CHALLENGE_ACK)) 4293 + tcp_send_challenge_ack(sk, false); 4293 4294 return -SKB_DROP_REASON_TCP_ACK_UNSENT_DATA; 4295 + } 4294 4296 4295 4297 if (after(ack, prior_snd_una)) { 4296 4298 flag |= FLAG_SND_UNA_ADVANCED; ··· 6783 6777 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); 6784 6778 /* SYN-data is counted as two separate packets in tcp_ack() */ 6785 6779 if (tp->delivered > 1) 6786 - --tp->delivered; 6780 + WRITE_ONCE(tp->delivered, tp->delivered - 1); 6787 6781 } 6788 6782 6789 6783 tcp_fastopen_add_skb(sk, synack); ··· 7216 7210 SKB_DR_SET(reason, NOT_SPECIFIED); 7217 7211 switch (sk->sk_state) { 7218 7212 case TCP_SYN_RECV: 7219 - tp->delivered++; /* SYN-ACK delivery isn't tracked in tcp_ack */ 7213 + WRITE_ONCE(tp->delivered, tp->delivered + 1); /* SYN-ACK delivery isn't tracked in tcp_ack */ 7220 7214 if (!tp->srtt_us) 7221 7215 tcp_synack_rtt_meas(sk, req); 7222 7216 ··· 7244 7238 if (sk->sk_socket) 7245 7239 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); 7246 7240 7247 - tp->snd_una = TCP_SKB_CB(skb)->ack_seq; 7241 + WRITE_ONCE(tp->snd_una, TCP_SKB_CB(skb)->ack_seq); 7248 7242 tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; 7249 7243 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); 7250 7244

+3 -3

net/ipv4/tcp_metrics.c

··· 490 490 val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ? 491 491 0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH); 492 492 if (val) { 493 - tp->snd_ssthresh = val; 493 + WRITE_ONCE(tp->snd_ssthresh, val); 494 494 if (tp->snd_ssthresh > tp->snd_cwnd_clamp) 495 - tp->snd_ssthresh = tp->snd_cwnd_clamp; 495 + WRITE_ONCE(tp->snd_ssthresh, tp->snd_cwnd_clamp); 496 496 } 497 497 val = tcp_metric_get(tm, TCP_METRIC_REORDERING); 498 498 if (val && tp->reordering != val) 499 - tp->reordering = val; 499 + WRITE_ONCE(tp->reordering, val); 500 500 501 501 crtt = tcp_metric_get(tm, TCP_METRIC_RTT); 502 502 rcu_read_unlock();

+2 -2

net/ipv4/tcp_nv.c

··· 396 396 397 397 /* We have enough data to determine we are congested */ 398 398 ca->nv_allow_cwnd_growth = 0; 399 - tp->snd_ssthresh = 400 - (nv_ssthresh_factor * max_win) >> 3; 399 + WRITE_ONCE(tp->snd_ssthresh, 400 + (nv_ssthresh_factor * max_win) >> 3); 401 401 if (tcp_snd_cwnd(tp) - max_win > 2) { 402 402 /* gap > 2, we do exponential cwnd decrease */ 403 403 int dec;

+11 -8

net/ipv4/tcp_output.c

··· 171 171 172 172 tcp_ca_event(sk, CA_EVENT_CWND_RESTART); 173 173 174 - tp->snd_ssthresh = tcp_current_ssthresh(sk); 174 + WRITE_ONCE(tp->snd_ssthresh, tcp_current_ssthresh(sk)); 175 175 restart_cwnd = min(restart_cwnd, cwnd); 176 176 177 177 while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd) ··· 1688 1688 1689 1689 if (skb->len != tcp_header_size) { 1690 1690 tcp_event_data_sent(tp, sk); 1691 - tp->data_segs_out += tcp_skb_pcount(skb); 1692 - tp->bytes_sent += skb->len - tcp_header_size; 1691 + WRITE_ONCE(tp->data_segs_out, 1692 + tp->data_segs_out + tcp_skb_pcount(skb)); 1693 + WRITE_ONCE(tp->bytes_sent, 1694 + tp->bytes_sent + skb->len - tcp_header_size); 1693 1695 } 1694 1696 1695 1697 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) ··· 2144 2142 u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk)); 2145 2143 u32 win_used = max(tp->snd_cwnd_used, init_win); 2146 2144 if (win_used < tcp_snd_cwnd(tp)) { 2147 - tp->snd_ssthresh = tcp_current_ssthresh(sk); 2145 + WRITE_ONCE(tp->snd_ssthresh, tcp_current_ssthresh(sk)); 2148 2146 tcp_snd_cwnd_set(tp, (tcp_snd_cwnd(tp) + win_used) >> 1); 2149 2147 } 2150 2148 tp->snd_cwnd_used = 0; ··· 3644 3642 TCP_ADD_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS, segs); 3645 3643 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) 3646 3644 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); 3647 - tp->total_retrans += segs; 3648 - tp->bytes_retrans += skb->len; 3645 + WRITE_ONCE(tp->total_retrans, tp->total_retrans + segs); 3646 + WRITE_ONCE(tp->bytes_retrans, tp->bytes_retrans + skb->len); 3649 3647 3650 3648 /* make sure skb->data is aligned on arches that require it 3651 3649 * and check if ack-trimming & collapsing extended the headroom ··· 4154 4152 tp->snd_wnd = 0; 4155 4153 tcp_init_wl(tp, 0); 4156 4154 tcp_write_queue_purge(sk); 4157 - tp->snd_una = tp->write_seq; 4155 + WRITE_ONCE(tp->snd_una, tp->write_seq); 4158 4156 tp->snd_sml = tp->write_seq; 4159 4157 tp->snd_up = tp->write_seq; 4160 4158 WRITE_ONCE(tp->snd_nxt, tp->write_seq); ··· 4648 4646 * However in this case, we are dealing with a passive fastopen 4649 4647 * socket thus we can change total_retrans value. 4650 4648 */ 4651 - tcp_sk_rw(sk)->total_retrans++; 4649 + WRITE_ONCE(tcp_sk_rw(sk)->total_retrans, 4650 + tcp_sk_rw(sk)->total_retrans + 1); 4652 4651 } 4653 4652 trace_tcp_retransmit_synack(sk, req); 4654 4653 WRITE_ONCE(req->num_retrans, req->num_retrans + 1);

+1 -1

net/ipv4/tcp_plb.c

··· 80 80 81 81 sk_rethink_txhash(sk); 82 82 plb->consec_cong_rounds = 0; 83 - tcp_sk(sk)->plb_rehash++; 83 + WRITE_ONCE(tcp_sk(sk)->plb_rehash, tcp_sk(sk)->plb_rehash + 1); 84 84 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPLBREHASH); 85 85 } 86 86 EXPORT_SYMBOL_GPL(tcp_plb_check_rehash);

+1 -1

net/ipv4/tcp_timer.c

··· 297 297 } 298 298 299 299 if (sk_rethink_txhash(sk)) { 300 - tp->timeout_rehash++; 300 + WRITE_ONCE(tp->timeout_rehash, tp->timeout_rehash + 1); 301 301 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTREHASH); 302 302 } 303 303

+5 -4

net/ipv4/tcp_vegas.c

··· 245 245 */ 246 246 tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), 247 247 (u32)target_cwnd + 1)); 248 - tp->snd_ssthresh = tcp_vegas_ssthresh(tp); 248 + WRITE_ONCE(tp->snd_ssthresh, 249 + tcp_vegas_ssthresh(tp)); 249 250 250 251 } else if (tcp_in_slow_start(tp)) { 251 252 /* Slow start. */ ··· 262 261 * we slow down. 263 262 */ 264 263 tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - 1); 265 - tp->snd_ssthresh 266 - = tcp_vegas_ssthresh(tp); 264 + WRITE_ONCE(tp->snd_ssthresh, 265 + tcp_vegas_ssthresh(tp)); 267 266 } else if (diff < alpha) { 268 267 /* We don't have enough extra packets 269 268 * in the network, so speed up. ··· 281 280 else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp) 282 281 tcp_snd_cwnd_set(tp, tp->snd_cwnd_clamp); 283 282 284 - tp->snd_ssthresh = tcp_current_ssthresh(sk); 283 + WRITE_ONCE(tp->snd_ssthresh, tcp_current_ssthresh(sk)); 285 284 } 286 285 287 286 /* Wipe the slate clean for the next RTT. */

+2 -2

net/ipv4/tcp_westwood.c

··· 244 244 245 245 switch (event) { 246 246 case CA_EVENT_COMPLETE_CWR: 247 - tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); 247 + WRITE_ONCE(tp->snd_ssthresh, tcp_westwood_bw_rttmin(sk)); 248 248 tcp_snd_cwnd_set(tp, tp->snd_ssthresh); 249 249 break; 250 250 case CA_EVENT_LOSS: 251 - tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); 251 + WRITE_ONCE(tp->snd_ssthresh, tcp_westwood_bw_rttmin(sk)); 252 252 /* Update RTT_min when next ack arrives */ 253 253 w->reset_rtt_min = 1; 254 254 break;

+2 -1

net/ipv4/tcp_yeah.c

··· 147 147 tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), 148 148 yeah->reno_count)); 149 149 150 - tp->snd_ssthresh = tcp_snd_cwnd(tp); 150 + WRITE_ONCE(tp->snd_ssthresh, 151 + tcp_snd_cwnd(tp)); 151 152 } 152 153 153 154 if (yeah->reno_count <= 2)

+4 -6

net/ipv6/icmp.c

··· 1104 1104 struct net *net = dev_net_rcu(skb->dev); 1105 1105 struct net_device *dev = icmp6_dev(skb); 1106 1106 struct inet6_dev *idev = __in6_dev_get(dev); 1107 - const struct in6_addr *saddr, *daddr; 1108 1107 struct icmp6hdr *hdr; 1109 1108 u8 type; 1110 1109 ··· 1134 1135 1135 1136 __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INMSGS); 1136 1137 1137 - saddr = &ipv6_hdr(skb)->saddr; 1138 - daddr = &ipv6_hdr(skb)->daddr; 1139 - 1140 1138 if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) { 1141 1139 net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n", 1142 - saddr, daddr); 1140 + &ipv6_hdr(skb)->saddr, 1141 + &ipv6_hdr(skb)->daddr); 1143 1142 goto csum_error; 1144 1143 } 1145 1144 ··· 1217 1220 break; 1218 1221 1219 1222 net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n", 1220 - saddr, daddr); 1223 + &ipv6_hdr(skb)->saddr, 1224 + &ipv6_hdr(skb)->daddr); 1221 1225 1222 1226 /* 1223 1227 * error of unknown type.

+6

net/ipv6/ip6_tunnel.c

··· 62 62 MODULE_ALIAS_RTNL_LINK("ip6tnl"); 63 63 MODULE_ALIAS_NETDEV("ip6tnl0"); 64 64 65 + #define IP6_TUNNEL_MAX_DEST_TLVS 8 66 + 65 67 #define IP6_TUNNEL_HASH_SIZE_SHIFT 5 66 68 #define IP6_TUNNEL_HASH_SIZE (1 << IP6_TUNNEL_HASH_SIZE_SHIFT) 67 69 ··· 427 425 break; 428 426 } 429 427 if (nexthdr == NEXTHDR_DEST) { 428 + int tlv_cnt = 0; 430 429 u16 i = 2; 431 430 432 431 while (1) { 433 432 struct ipv6_tlv_tnl_enc_lim *tel; 433 + 434 + if (unlikely(tlv_cnt++ >= IP6_TUNNEL_MAX_DEST_TLVS)) 435 + break; 434 436 435 437 /* No more room for encapsulation limit */ 436 438 if (i + sizeof(*tel) > optlen)

+2 -2

net/ipv6/netfilter/ip6table_nat.c

··· 81 81 while (i) 82 82 nf_nat_ipv6_unregister_fn(net, &ops[--i]); 83 83 84 - kfree(ops); 84 + kfree_rcu(ops, rcu); 85 85 return ret; 86 86 } 87 87 } ··· 102 102 for (i = 0; i < ARRAY_SIZE(nf_nat_ipv6_ops); i++) 103 103 nf_nat_ipv6_unregister_fn(net, &ops[i]); 104 104 105 - kfree(ops); 105 + kfree_rcu(ops, rcu); 106 106 } 107 107 108 108 static int ip6table_nat_table_init(struct net *net)

+2 -1

net/ipv6/seg6_iptunnel.c

··· 746 746 newts->type = LWTUNNEL_ENCAP_SEG6; 747 747 newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT; 748 748 749 - if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP) 749 + if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP && 750 + tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP_RED) 750 751 newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; 751 752 752 753 newts->headroom = seg6_lwt_headroom(tuninfo);

+3 -1

net/llc/af_llc.c

··· 520 520 if (sk->sk_state == TCP_SYN_SENT) { 521 521 const long timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 522 522 523 - if (!timeo || !llc_ui_wait_for_conn(sk, timeo)) 523 + if (!timeo || !llc_ui_wait_for_conn(sk, timeo)) { 524 + rc = -EINPROGRESS; 524 525 goto out; 526 + } 525 527 526 528 rc = sock_intr_errno(timeo); 527 529 if (signal_pending(current))

+6 -2

net/mctp/route.c

··· 441 441 unsigned long f; 442 442 u8 tag, flags; 443 443 int rc; 444 + u8 ver; 444 445 445 446 msk = NULL; 446 447 rc = -EINVAL; ··· 468 467 netid = mctp_cb(skb)->net; 469 468 skb_pull(skb, sizeof(struct mctp_hdr)); 470 469 471 - if (mh->ver != 1) 470 + ver = mh->ver & MCTP_HDR_VER_MASK; 471 + if (ver < MCTP_VER_MIN || ver > MCTP_VER_MAX) 472 472 goto out; 473 473 474 474 flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM); ··· 1319 1317 struct mctp_dst dst; 1320 1318 struct mctp_hdr *mh; 1321 1319 int rc; 1320 + u8 ver; 1322 1321 1323 1322 rcu_read_lock(); 1324 1323 mdev = __mctp_dev_get(dev); ··· 1337 1334 1338 1335 /* We have enough for a header; decode and route */ 1339 1336 mh = mctp_hdr(skb); 1340 - if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX) 1337 + ver = mh->ver & MCTP_HDR_VER_MASK; 1338 + if (ver < MCTP_VER_MIN || ver > MCTP_VER_MAX) 1341 1339 goto err_drop; 1342 1340 1343 1341 /* source must be valid unicast or null; drop reserved ranges and

+1 -1

net/mptcp/protocol.c

··· 3594 3594 * uses the correct data 3595 3595 */ 3596 3596 mptcp_copy_inaddrs(nsk, ssk); 3597 - __mptcp_propagate_sndbuf(nsk, ssk); 3598 3597 3599 3598 mptcp_rcv_space_init(msk, ssk); 3600 3599 msk->rcvq_space.time = mptcp_stamp(); ··· 4251 4252 4252 4253 mptcp_graft_subflows(newsk); 4253 4254 mptcp_rps_record_subflows(msk); 4255 + __mptcp_propagate_sndbuf(newsk, mptcp_subflow_tcp_sock(subflow)); 4254 4256 4255 4257 /* Do late cleanup for the first subflow as necessary. Also 4256 4258 * deal with bad peers not doing a complete shutdown.

+15 -4

net/netfilter/ipvs/ip_vs_xmit.c

··· 102 102 return dest_dst; 103 103 } 104 104 105 + /* Based on ip_exceeds_mtu(). */ 106 + static bool ip_vs_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) 107 + { 108 + if (skb->len <= mtu) 109 + return false; 110 + 111 + if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) 112 + return false; 113 + 114 + return true; 115 + } 116 + 105 117 static inline bool 106 118 __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) 107 119 { ··· 123 111 */ 124 112 if (IP6CB(skb)->frag_max_size > mtu) 125 113 return true; /* largest fragment violate MTU */ 126 - } 127 - else if (skb->len > mtu && !skb_is_gso(skb)) { 114 + } else if (ip_vs_exceeds_mtu(skb, mtu)) 128 115 return true; /* Packet size violate MTU size */ 129 - } 116 + 130 117 return false; 131 118 } 132 119 ··· 243 232 return true; 244 233 245 234 if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) && 246 - skb->len > mtu && !skb_is_gso(skb) && 235 + ip_vs_exceeds_mtu(skb, mtu) && 247 236 !ip_vs_iph_icmp(ipvsh))) { 248 237 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 249 238 htonl(mtu));

+1 -1

net/netfilter/nf_nat_amanda.c

··· 50 50 return NF_DROP; 51 51 } 52 52 53 - sprintf(buffer, "%u", port); 53 + snprintf(buffer, sizeof(buffer), "%u", port); 54 54 if (!nf_nat_mangle_udp_packet(skb, exp->master, ctinfo, 55 55 protoff, matchoff, matchlen, 56 56 buffer, strlen(buffer))) {

+6 -4

net/netfilter/nf_nat_core.c

··· 1222 1222 ret = nf_register_net_hooks(net, nat_ops, ops_count); 1223 1223 if (ret < 0) { 1224 1224 mutex_unlock(&nf_nat_proto_mutex); 1225 - for (i = 0; i < ops_count; i++) 1226 - kfree(nat_ops[i].priv); 1227 - kfree(nat_ops); 1225 + for (i = 0; i < ops_count; i++) { 1226 + priv = nat_ops[i].priv; 1227 + kfree_rcu(priv, rcu_head); 1228 + } 1229 + kfree_rcu(nat_ops, rcu); 1228 1230 return ret; 1229 1231 } 1230 1232 ··· 1290 1288 } 1291 1289 1292 1290 nat_proto_net->nat_hook_ops = NULL; 1293 - kfree(nat_ops); 1291 + kfree_rcu(nat_ops, rcu); 1294 1292 } 1295 1293 unlock: 1296 1294 mutex_unlock(&nf_nat_proto_mutex);

+18 -15

net/netfilter/nf_nat_sip.c

··· 68 68 } 69 69 70 70 static int sip_sprintf_addr(const struct nf_conn *ct, char *buffer, 71 + size_t size, 71 72 const union nf_inet_addr *addr, bool delim) 72 73 { 73 74 if (nf_ct_l3num(ct) == NFPROTO_IPV4) 74 - return sprintf(buffer, "%pI4", &addr->ip); 75 + return scnprintf(buffer, size, "%pI4", &addr->ip); 75 76 else { 76 77 if (delim) 77 - return sprintf(buffer, "[%pI6c]", &addr->ip6); 78 + return scnprintf(buffer, size, "[%pI6c]", &addr->ip6); 78 79 else 79 - return sprintf(buffer, "%pI6c", &addr->ip6); 80 + return scnprintf(buffer, size, "%pI6c", &addr->ip6); 80 81 } 81 82 } 82 83 83 84 static int sip_sprintf_addr_port(const struct nf_conn *ct, char *buffer, 85 + size_t size, 84 86 const union nf_inet_addr *addr, u16 port) 85 87 { 86 88 if (nf_ct_l3num(ct) == NFPROTO_IPV4) 87 - return sprintf(buffer, "%pI4:%u", &addr->ip, port); 89 + return scnprintf(buffer, size, "%pI4:%u", &addr->ip, port); 88 90 else 89 - return sprintf(buffer, "[%pI6c]:%u", &addr->ip6, port); 91 + return scnprintf(buffer, size, "[%pI6c]:%u", &addr->ip6, port); 90 92 } 91 93 92 94 static int map_addr(struct sk_buff *skb, unsigned int protoff, ··· 121 119 if (nf_inet_addr_cmp(&newaddr, addr) && newport == port) 122 120 return 1; 123 121 124 - buflen = sip_sprintf_addr_port(ct, buffer, &newaddr, ntohs(newport)); 122 + buflen = sip_sprintf_addr_port(ct, buffer, sizeof(buffer), &newaddr, ntohs(newport)); 125 123 return mangle_packet(skb, protoff, dataoff, dptr, datalen, 126 124 matchoff, matchlen, buffer, buflen); 127 125 } ··· 214 212 &addr, true) > 0 && 215 213 nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.src.u3) && 216 214 !nf_inet_addr_cmp(&addr, &ct->tuplehash[!dir].tuple.dst.u3)) { 217 - buflen = sip_sprintf_addr(ct, buffer, 215 + buflen = sip_sprintf_addr(ct, buffer, sizeof(buffer), 218 216 &ct->tuplehash[!dir].tuple.dst.u3, 219 217 true); 220 218 if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, ··· 231 229 &addr, false) > 0 && 232 230 nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.dst.u3) && 233 231 !nf_inet_addr_cmp(&addr, &ct->tuplehash[!dir].tuple.src.u3)) { 234 - buflen = sip_sprintf_addr(ct, buffer, 232 + buflen = sip_sprintf_addr(ct, buffer, sizeof(buffer), 235 233 &ct->tuplehash[!dir].tuple.src.u3, 236 234 false); 237 235 if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, ··· 249 247 htons(n) == ct->tuplehash[dir].tuple.dst.u.udp.port && 250 248 htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) { 251 249 __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port; 252 - buflen = sprintf(buffer, "%u", ntohs(p)); 250 + buflen = scnprintf(buffer, sizeof(buffer), "%u", ntohs(p)); 253 251 if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, 254 252 poff, plen, buffer, buflen)) { 255 253 nf_ct_helper_log(skb, ct, "cannot mangle rport"); ··· 420 418 421 419 if (!nf_inet_addr_cmp(&exp->tuple.dst.u3, &exp->saved_addr) || 422 420 exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { 423 - buflen = sip_sprintf_addr_port(ct, buffer, &newaddr, port); 421 + buflen = sip_sprintf_addr_port(ct, buffer, sizeof(buffer), 422 + &newaddr, port); 424 423 if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, 425 424 matchoff, matchlen, buffer, buflen)) { 426 425 nf_ct_helper_log(skb, ct, "cannot mangle packet"); ··· 441 438 { 442 439 enum ip_conntrack_info ctinfo; 443 440 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 441 + char buffer[sizeof("4294967295")]; 444 442 unsigned int matchoff, matchlen; 445 - char buffer[sizeof("65536")]; 446 443 int buflen, c_len; 447 444 448 445 /* Get actual SDP length */ ··· 457 454 &matchoff, &matchlen) <= 0) 458 455 return 0; 459 456 460 - buflen = sprintf(buffer, "%u", c_len); 457 + buflen = scnprintf(buffer, sizeof(buffer), "%u", c_len); 461 458 return mangle_packet(skb, protoff, dataoff, dptr, datalen, 462 459 matchoff, matchlen, buffer, buflen); 463 460 } ··· 494 491 char buffer[INET6_ADDRSTRLEN]; 495 492 unsigned int buflen; 496 493 497 - buflen = sip_sprintf_addr(ct, buffer, addr, false); 494 + buflen = sip_sprintf_addr(ct, buffer, sizeof(buffer), addr, false); 498 495 if (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, 499 496 sdpoff, type, term, buffer, buflen)) 500 497 return 0; ··· 512 509 char buffer[sizeof("nnnnn")]; 513 510 unsigned int buflen; 514 511 515 - buflen = sprintf(buffer, "%u", port); 512 + buflen = scnprintf(buffer, sizeof(buffer), "%u", port); 516 513 if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, 517 514 matchoff, matchlen, buffer, buflen)) 518 515 return 0; ··· 532 529 unsigned int buflen; 533 530 534 531 /* Mangle session description owner and contact addresses */ 535 - buflen = sip_sprintf_addr(ct, buffer, addr, false); 532 + buflen = sip_sprintf_addr(ct, buffer, sizeof(buffer), addr, false); 536 533 if (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, sdpoff, 537 534 SDP_HDR_OWNER, SDP_HDR_MEDIA, buffer, buflen)) 538 535 return 0;

+19 -26

net/netfilter/nfnetlink_osf.c

··· 31 31 static inline int nf_osf_ttl(const struct sk_buff *skb, 32 32 int ttl_check, unsigned char f_ttl) 33 33 { 34 - struct in_device *in_dev = __in_dev_get_rcu(skb->dev); 35 34 const struct iphdr *ip = ip_hdr(skb); 36 - const struct in_ifaddr *ifa; 37 - int ret = 0; 38 35 39 - if (ttl_check == NF_OSF_TTL_TRUE) 36 + switch (ttl_check) { 37 + case NF_OSF_TTL_TRUE: 40 38 return ip->ttl == f_ttl; 41 - if (ttl_check == NF_OSF_TTL_NOCHECK) 39 + break; 40 + case NF_OSF_TTL_NOCHECK: 42 41 return 1; 43 - else if (ip->ttl <= f_ttl) 44 - return 1; 45 - 46 - in_dev_for_each_ifa_rcu(ifa, in_dev) { 47 - if (inet_ifa_match(ip->saddr, ifa)) { 48 - ret = (ip->ttl == f_ttl); 49 - break; 50 - } 42 + case NF_OSF_TTL_LESS: 43 + default: 44 + return ip->ttl <= f_ttl; 51 45 } 52 - 53 - return ret; 54 46 } 55 47 56 48 struct nf_osf_hdr_ctx { ··· 56 64 static bool nf_osf_match_one(const struct sk_buff *skb, 57 65 const struct nf_osf_user_finger *f, 58 66 int ttl_check, 59 - struct nf_osf_hdr_ctx *ctx) 67 + const struct nf_osf_hdr_ctx *ctx) 60 68 { 61 - const __u8 *optpinit = ctx->optp; 69 + const __u8 *optp = ctx->optp; 62 70 unsigned int check_WSS = 0; 63 71 int fmatch = FMATCH_WRONG; 64 72 int foptsize, optnum; ··· 87 95 check_WSS = f->wss.wc; 88 96 89 97 for (optnum = 0; optnum < f->opt_num; ++optnum) { 90 - if (f->opt[optnum].kind == *ctx->optp) { 98 + if (f->opt[optnum].kind == *optp) { 91 99 __u32 len = f->opt[optnum].length; 92 - const __u8 *optend = ctx->optp + len; 100 + const __u8 *optend = optp + len; 93 101 94 102 fmatch = FMATCH_OK; 95 103 96 - switch (*ctx->optp) { 104 + switch (*optp) { 97 105 case OSFOPT_MSS: 98 - mss = ctx->optp[3]; 106 + mss = optp[3]; 99 107 mss <<= 8; 100 - mss |= ctx->optp[2]; 108 + mss |= optp[2]; 101 109 102 110 mss = ntohs((__force __be16)mss); 103 111 break; ··· 105 113 break; 106 114 } 107 115 108 - ctx->optp = optend; 116 + optp = optend; 109 117 } else 110 118 fmatch = FMATCH_OPT_WRONG; 111 119 ··· 147 155 break; 148 156 } 149 157 } 150 - 151 - if (fmatch != FMATCH_OK) 152 - ctx->optp = optpinit; 153 158 154 159 return fmatch == FMATCH_OK; 155 160 } ··· 307 318 f = nla_data(osf_attrs[OSF_ATTR_FINGER]); 308 319 309 320 if (f->opt_num > ARRAY_SIZE(f->opt)) 321 + return -EINVAL; 322 + 323 + if (f->wss.wc >= OSF_WSS_MAX || 324 + (f->wss.wc == OSF_WSS_MODULO && f->wss.val == 0)) 310 325 return -EINVAL; 311 326 312 327 for (i = 0; i < f->opt_num; i++) {

+5 -1

net/netfilter/nft_osf.c

··· 28 28 struct nf_osf_data data; 29 29 struct tcphdr _tcph; 30 30 31 + if (nft_pf(pkt) != NFPROTO_IPV4) { 32 + regs->verdict.code = NFT_BREAK; 33 + return; 34 + } 35 + 31 36 if (pkt->tprot != IPPROTO_TCP) { 32 37 regs->verdict.code = NFT_BREAK; 33 38 return; ··· 119 114 120 115 switch (ctx->family) { 121 116 case NFPROTO_IPV4: 122 - case NFPROTO_IPV6: 123 117 case NFPROTO_INET: 124 118 hooks = (1 << NF_INET_LOCAL_IN) | 125 119 (1 << NF_INET_PRE_ROUTING) |

+23 -11

net/netfilter/xt_mac.c

··· 36 36 return ret; 37 37 } 38 38 39 - static struct xt_match mac_mt_reg __read_mostly = { 40 - .name = "mac", 41 - .revision = 0, 42 - .family = NFPROTO_UNSPEC, 43 - .match = mac_mt, 44 - .matchsize = sizeof(struct xt_mac_info), 45 - .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | 46 - (1 << NF_INET_FORWARD), 47 - .me = THIS_MODULE, 39 + static struct xt_match mac_mt_reg[] __read_mostly = { 40 + { 41 + .name = "mac", 42 + .family = NFPROTO_IPV4, 43 + .match = mac_mt, 44 + .matchsize = sizeof(struct xt_mac_info), 45 + .hooks = (1 << NF_INET_PRE_ROUTING) | 46 + (1 << NF_INET_LOCAL_IN) | 47 + (1 << NF_INET_FORWARD), 48 + .me = THIS_MODULE, 49 + }, 50 + { 51 + .name = "mac", 52 + .family = NFPROTO_IPV6, 53 + .match = mac_mt, 54 + .matchsize = sizeof(struct xt_mac_info), 55 + .hooks = (1 << NF_INET_PRE_ROUTING) | 56 + (1 << NF_INET_LOCAL_IN) | 57 + (1 << NF_INET_FORWARD), 58 + .me = THIS_MODULE, 59 + }, 48 60 }; 49 61 50 62 static int __init mac_mt_init(void) 51 63 { 52 - return xt_register_match(&mac_mt_reg); 64 + return xt_register_matches(mac_mt_reg, ARRAY_SIZE(mac_mt_reg)); 53 65 } 54 66 55 67 static void __exit mac_mt_exit(void) 56 68 { 57 - xt_unregister_match(&mac_mt_reg); 69 + xt_unregister_matches(mac_mt_reg, ARRAY_SIZE(mac_mt_reg)); 58 70 } 59 71 60 72 module_init(mac_mt_init);

+25 -12

net/netfilter/xt_owner.c

··· 127 127 return true; 128 128 } 129 129 130 - static struct xt_match owner_mt_reg __read_mostly = { 131 - .name = "owner", 132 - .revision = 1, 133 - .family = NFPROTO_UNSPEC, 134 - .checkentry = owner_check, 135 - .match = owner_mt, 136 - .matchsize = sizeof(struct xt_owner_match_info), 137 - .hooks = (1 << NF_INET_LOCAL_OUT) | 138 - (1 << NF_INET_POST_ROUTING), 139 - .me = THIS_MODULE, 130 + static struct xt_match owner_mt_reg[] __read_mostly = { 131 + { 132 + .name = "owner", 133 + .revision = 1, 134 + .family = NFPROTO_IPV4, 135 + .checkentry = owner_check, 136 + .match = owner_mt, 137 + .matchsize = sizeof(struct xt_owner_match_info), 138 + .hooks = (1 << NF_INET_LOCAL_OUT) | 139 + (1 << NF_INET_POST_ROUTING), 140 + .me = THIS_MODULE, 141 + }, 142 + { 143 + .name = "owner", 144 + .revision = 1, 145 + .family = NFPROTO_IPV6, 146 + .checkentry = owner_check, 147 + .match = owner_mt, 148 + .matchsize = sizeof(struct xt_owner_match_info), 149 + .hooks = (1 << NF_INET_LOCAL_OUT) | 150 + (1 << NF_INET_POST_ROUTING), 151 + .me = THIS_MODULE, 152 + } 140 153 }; 141 154 142 155 static int __init owner_mt_init(void) 143 156 { 144 - return xt_register_match(&owner_mt_reg); 157 + return xt_register_matches(owner_mt_reg, ARRAY_SIZE(owner_mt_reg)); 145 158 } 146 159 147 160 static void __exit owner_mt_exit(void) 148 161 { 149 - xt_unregister_match(&owner_mt_reg); 162 + xt_unregister_matches(owner_mt_reg, ARRAY_SIZE(owner_mt_reg)); 150 163 } 151 164 152 165 module_init(owner_mt_init);

+19 -10

net/netfilter/xt_physdev.c

··· 137 137 return 0; 138 138 } 139 139 140 - static struct xt_match physdev_mt_reg __read_mostly = { 141 - .name = "physdev", 142 - .revision = 0, 143 - .family = NFPROTO_UNSPEC, 144 - .checkentry = physdev_mt_check, 145 - .match = physdev_mt, 146 - .matchsize = sizeof(struct xt_physdev_info), 147 - .me = THIS_MODULE, 140 + static struct xt_match physdev_mt_reg[] __read_mostly = { 141 + { 142 + .name = "physdev", 143 + .family = NFPROTO_IPV4, 144 + .checkentry = physdev_mt_check, 145 + .match = physdev_mt, 146 + .matchsize = sizeof(struct xt_physdev_info), 147 + .me = THIS_MODULE, 148 + }, 149 + { 150 + .name = "physdev", 151 + .family = NFPROTO_IPV6, 152 + .checkentry = physdev_mt_check, 153 + .match = physdev_mt, 154 + .matchsize = sizeof(struct xt_physdev_info), 155 + .me = THIS_MODULE, 156 + }, 148 157 }; 149 158 150 159 static int __init physdev_mt_init(void) 151 160 { 152 - return xt_register_match(&physdev_mt_reg); 161 + return xt_register_matches(physdev_mt_reg, ARRAY_SIZE(physdev_mt_reg)); 153 162 } 154 163 155 164 static void __exit physdev_mt_exit(void) 156 165 { 157 - xt_unregister_match(&physdev_mt_reg); 166 + xt_unregister_matches(physdev_mt_reg, ARRAY_SIZE(physdev_mt_reg)); 158 167 } 159 168 160 169 module_init(physdev_mt_init);

+1 -1

net/netfilter/xt_realm.c

··· 33 33 .matchsize = sizeof(struct xt_realm_info), 34 34 .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_FORWARD) | 35 35 (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN), 36 - .family = NFPROTO_UNSPEC, 36 + .family = NFPROTO_IPV4, 37 37 .me = THIS_MODULE 38 38 }; 39 39

+33 -2

net/openvswitch/datapath.c

··· 2184 2184 return err; 2185 2185 } 2186 2186 2187 + static size_t ovs_vport_cmd_msg_size(void) 2188 + { 2189 + size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header)); 2190 + 2191 + msgsize += nla_total_size(sizeof(u32)); /* OVS_VPORT_ATTR_PORT_NO */ 2192 + msgsize += nla_total_size(sizeof(u32)); /* OVS_VPORT_ATTR_TYPE */ 2193 + msgsize += nla_total_size(IFNAMSIZ); /* OVS_VPORT_ATTR_NAME */ 2194 + msgsize += nla_total_size(sizeof(u32)); /* OVS_VPORT_ATTR_IFINDEX */ 2195 + msgsize += nla_total_size(sizeof(s32)); /* OVS_VPORT_ATTR_NETNSID */ 2196 + 2197 + /* OVS_VPORT_ATTR_STATS */ 2198 + msgsize += nla_total_size_64bit(sizeof(struct ovs_vport_stats)); 2199 + 2200 + /* OVS_VPORT_ATTR_UPCALL_STATS(OVS_VPORT_UPCALL_ATTR_SUCCESS + 2201 + * OVS_VPORT_UPCALL_ATTR_FAIL) 2202 + */ 2203 + msgsize += nla_total_size(nla_total_size_64bit(sizeof(u64)) + 2204 + nla_total_size_64bit(sizeof(u64))); 2205 + 2206 + /* OVS_VPORT_ATTR_UPCALL_PID */ 2207 + msgsize += nla_total_size(nr_cpu_ids * sizeof(u32)); 2208 + 2209 + /* OVS_VPORT_ATTR_OPTIONS(OVS_TUNNEL_ATTR_DST_PORT + 2210 + * OVS_TUNNEL_ATTR_EXTENSION(OVS_VXLAN_EXT_GBP)) 2211 + */ 2212 + msgsize += nla_total_size(nla_total_size(sizeof(u16)) + 2213 + nla_total_size(nla_total_size(0))); 2214 + 2215 + return msgsize; 2216 + } 2217 + 2187 2218 static struct sk_buff *ovs_vport_cmd_alloc_info(void) 2188 2219 { 2189 - return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2220 + return genlmsg_new(ovs_vport_cmd_msg_size(), GFP_KERNEL); 2190 2221 } 2191 2222 2192 2223 /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */ ··· 2227 2196 struct sk_buff *skb; 2228 2197 int retval; 2229 2198 2230 - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2199 + skb = ovs_vport_cmd_alloc_info(); 2231 2200 if (!skb) 2232 2201 return ERR_PTR(-ENOMEM); 2233 2202

+3

net/openvswitch/vport.c

··· 406 406 if (!nla_len(ids) || nla_len(ids) % sizeof(u32)) 407 407 return -EINVAL; 408 408 409 + if (nla_len(ids) / sizeof(u32) > nr_cpu_ids) 410 + return -EINVAL; 411 + 409 412 old = ovsl_dereference(vport->upcall_portids); 410 413 411 414 vport_portids = kmalloc(sizeof(*vport_portids) + nla_len(ids),

+13 -8

net/packet/af_packet.c

··· 2718 2718 { 2719 2719 struct sk_buff *skb = NULL; 2720 2720 struct net_device *dev; 2721 - struct virtio_net_hdr *vnet_hdr = NULL; 2721 + struct virtio_net_hdr vnet_hdr; 2722 + bool has_vnet_hdr = false; 2722 2723 struct sockcm_cookie sockc; 2723 2724 __be16 proto; 2724 2725 int err, reserve = 0; ··· 2820 2819 hlen = LL_RESERVED_SPACE(dev); 2821 2820 tlen = dev->needed_tailroom; 2822 2821 if (vnet_hdr_sz) { 2823 - vnet_hdr = data; 2824 2822 data += vnet_hdr_sz; 2825 2823 tp_len -= vnet_hdr_sz; 2826 - if (tp_len < 0 || 2827 - __packet_snd_vnet_parse(vnet_hdr, tp_len)) { 2824 + if (tp_len < 0) { 2825 + tp_len = -EINVAL; 2826 + goto tpacket_error; 2827 + } 2828 + memcpy(&vnet_hdr, data - vnet_hdr_sz, sizeof(vnet_hdr)); 2829 + if (__packet_snd_vnet_parse(&vnet_hdr, tp_len)) { 2828 2830 tp_len = -EINVAL; 2829 2831 goto tpacket_error; 2830 2832 } 2831 2833 copylen = __virtio16_to_cpu(vio_le(), 2832 - vnet_hdr->hdr_len); 2834 + vnet_hdr.hdr_len); 2835 + has_vnet_hdr = true; 2833 2836 } 2834 2837 copylen = max_t(int, copylen, dev->hard_header_len); 2835 2838 skb = sock_alloc_send_skb(&po->sk, ··· 2870 2865 } 2871 2866 } 2872 2867 2873 - if (vnet_hdr_sz) { 2874 - if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) { 2868 + if (has_vnet_hdr) { 2869 + if (virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le())) { 2875 2870 tp_len = -EINVAL; 2876 2871 goto tpacket_error; 2877 2872 } 2878 - virtio_net_hdr_set_proto(skb, vnet_hdr); 2873 + virtio_net_hdr_set_proto(skb, &vnet_hdr); 2879 2874 } 2880 2875 2881 2876 skb->destructor = tpacket_destruct_skb;

+14

net/rds/connection.c

··· 701 701 i++, head++) { 702 702 hlist_for_each_entry_rcu(conn, head, c_hash_node) { 703 703 704 + /* Zero the per-item buffer before handing it to the 705 + * visitor so any field the visitor does not write - 706 + * including implicit alignment padding - cannot leak 707 + * stack contents to user space via rds_info_copy(). 708 + */ 709 + memset(buffer, 0, item_len); 710 + 704 711 /* XXX no c_lock usage.. */ 705 712 if (!visitor(conn, buffer)) 706 713 continue; ··· 756 749 * a bug in the design of MPRDS. 757 750 */ 758 751 cp = conn->c_path; 752 + 753 + /* Zero the per-item buffer for the same reason as 754 + * rds_for_each_conn_info(): any byte the visitor 755 + * does not write (including alignment padding) must 756 + * not leak stack contents via rds_info_copy(). 757 + */ 758 + memset(buffer, 0, item_len); 759 759 760 760 /* XXX no cp_lock usage.. */ 761 761 if (!visitor(cp, buffer))

-4

net/rds/rdma.c

··· 326 326 327 327 if (args->cookie_addr && 328 328 put_user(cookie, (u64 __user *)(unsigned long)args->cookie_addr)) { 329 - if (!need_odp) { 330 - unpin_user_pages(pages, nr_pages); 331 - kfree(sg); 332 - } 333 329 ret = -EFAULT; 334 330 goto out; 335 331 }

-1

net/rxrpc/ar-internal.h

··· 1486 1486 void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); 1487 1487 void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace); 1488 1488 void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace); 1489 - void rxrpc_eaten_skb(struct sk_buff *, enum rxrpc_skb_trace); 1490 1489 void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace); 1491 1490 void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace); 1492 1491 void rxrpc_purge_queue(struct sk_buff_head *);

+19 -1

net/rxrpc/call_event.c

··· 332 332 333 333 saw_ack |= sp->hdr.type == RXRPC_PACKET_TYPE_ACK; 334 334 335 - rxrpc_input_call_packet(call, skb); 335 + if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && 336 + sp->hdr.securityIndex != 0 && 337 + skb_cloned(skb)) { 338 + /* Unshare the packet so that it can be 339 + * modified by in-place decryption. 340 + */ 341 + struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); 342 + 343 + if (nskb) { 344 + rxrpc_new_skb(nskb, rxrpc_skb_new_unshared); 345 + rxrpc_input_call_packet(call, nskb); 346 + rxrpc_free_skb(nskb, rxrpc_skb_put_call_rx); 347 + } else { 348 + /* OOM - Drop the packet. */ 349 + rxrpc_see_skb(skb, rxrpc_skb_see_unshare_nomem); 350 + } 351 + } else { 352 + rxrpc_input_call_packet(call, skb); 353 + } 336 354 rxrpc_free_skb(skb, rxrpc_skb_put_call_rx); 337 355 did_receive = true; 338 356 }

+30 -13

net/rxrpc/conn_event.c

··· 240 240 rxrpc_notify_socket(call); 241 241 } 242 242 243 + static int rxrpc_verify_response(struct rxrpc_connection *conn, 244 + struct sk_buff *skb) 245 + { 246 + int ret; 247 + 248 + if (skb_cloned(skb)) { 249 + /* Copy the packet if shared so that we can do in-place 250 + * decryption. 251 + */ 252 + struct sk_buff *nskb = skb_copy(skb, GFP_NOFS); 253 + 254 + if (nskb) { 255 + rxrpc_new_skb(nskb, rxrpc_skb_new_unshared); 256 + ret = conn->security->verify_response(conn, nskb); 257 + rxrpc_free_skb(nskb, rxrpc_skb_put_response_copy); 258 + } else { 259 + /* OOM - Drop the packet. */ 260 + rxrpc_see_skb(skb, rxrpc_skb_see_unshare_nomem); 261 + ret = -ENOMEM; 262 + } 263 + } else { 264 + ret = conn->security->verify_response(conn, skb); 265 + } 266 + 267 + return ret; 268 + } 269 + 243 270 /* 244 271 * connection-level Rx packet processor 245 272 */ ··· 297 270 } 298 271 spin_unlock_irq(&conn->state_lock); 299 272 300 - ret = conn->security->verify_response(conn, skb); 273 + ret = rxrpc_verify_response(conn, skb); 301 274 if (ret < 0) 302 275 return ret; 303 276 ··· 389 362 static void rxrpc_do_process_connection(struct rxrpc_connection *conn) 390 363 { 391 364 struct sk_buff *skb; 392 - int ret; 393 365 394 366 if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) 395 367 rxrpc_secure_connection(conn); ··· 397 371 * connection that each one has when we've finished with it */ 398 372 while ((skb = skb_dequeue(&conn->rx_queue))) { 399 373 rxrpc_see_skb(skb, rxrpc_skb_see_conn_work); 400 - ret = rxrpc_process_event(conn, skb); 401 - switch (ret) { 402 - case -ENOMEM: 403 - case -EAGAIN: 404 - skb_queue_head(&conn->rx_queue, skb); 405 - rxrpc_queue_conn(conn, rxrpc_conn_queue_retry_work); 406 - break; 407 - default: 408 - rxrpc_free_skb(skb, rxrpc_skb_put_conn_work); 409 - break; 410 - } 374 + rxrpc_process_event(conn, skb); 375 + rxrpc_free_skb(skb, rxrpc_skb_put_conn_work); 411 376 } 412 377 } 413 378

+2 -22

net/rxrpc/io_thread.c

··· 192 192 /* 193 193 * Process packets received on the local endpoint 194 194 */ 195 - static bool rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) 195 + static bool rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff *skb) 196 196 { 197 197 struct rxrpc_connection *conn; 198 198 struct sockaddr_rxrpc peer_srx; 199 199 struct rxrpc_skb_priv *sp; 200 200 struct rxrpc_peer *peer = NULL; 201 - struct sk_buff *skb = *_skb; 202 201 bool ret = false; 203 202 204 203 skb_pull(skb, sizeof(struct udphdr)); ··· 243 244 return rxrpc_bad_message(skb, rxrpc_badmsg_zero_call); 244 245 if (sp->hdr.seq == 0) 245 246 return rxrpc_bad_message(skb, rxrpc_badmsg_zero_seq); 246 - 247 - /* Unshare the packet so that it can be modified for in-place 248 - * decryption. 249 - */ 250 - if (sp->hdr.securityIndex != 0) { 251 - skb = skb_unshare(skb, GFP_ATOMIC); 252 - if (!skb) { 253 - rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare_nomem); 254 - *_skb = NULL; 255 - return just_discard; 256 - } 257 - 258 - if (skb != *_skb) { 259 - rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare); 260 - *_skb = skb; 261 - rxrpc_new_skb(skb, rxrpc_skb_new_unshared); 262 - sp = rxrpc_skb(skb); 263 - } 264 - } 265 247 break; 266 248 267 249 case RXRPC_PACKET_TYPE_CHALLENGE: ··· 474 494 switch (skb->mark) { 475 495 case RXRPC_SKB_MARK_PACKET: 476 496 skb->priority = 0; 477 - if (!rxrpc_input_packet(local, &skb)) 497 + if (!rxrpc_input_packet(local, skb)) 478 498 rxrpc_reject_packet(local, skb); 479 499 trace_rxrpc_rx_done(skb->mark, skb->priority); 480 500 rxrpc_free_skb(skb, rxrpc_skb_put_input);

+4

net/rxrpc/key.c

··· 502 502 if (v1->security_index != RXRPC_SECURITY_RXKAD) 503 503 goto error; 504 504 505 + ret = -EKEYREJECTED; 506 + if (v1->ticket_length > AFSTOKEN_RK_TIX_MAX) 507 + goto error; 508 + 505 509 plen = sizeof(*token->kad) + v1->ticket_length; 506 510 prep->quotalen += plen + sizeof(*token); 507 511

+2 -1

net/rxrpc/rxgk_app.c

··· 214 214 ticket_len = ntohl(container.token_len); 215 215 ticket_offset = token_offset + sizeof(container); 216 216 217 - if (xdr_round_up(ticket_len) > token_len - sizeof(container)) 217 + if (ticket_len > xdr_round_down(token_len - sizeof(container))) 218 218 goto short_packet; 219 219 220 220 _debug("KVNO %u", kvno); ··· 245 245 if (ret != -ENOMEM) 246 246 return rxrpc_abort_conn(conn, skb, ec, ret, 247 247 rxgk_abort_resp_tok_dec); 248 + return ret; 248 249 } 249 250 250 251 ret = conn->security->default_decode_ticket(conn, skb, ticket_offset,

+1

net/rxrpc/rxgk_common.h

··· 34 34 }; 35 35 36 36 #define xdr_round_up(x) (round_up((x), sizeof(__be32))) 37 + #define xdr_round_down(x) (round_down((x), sizeof(__be32))) 37 38 #define xdr_object_len(x) (4 + xdr_round_up(x)) 38 39 39 40 /*

+49 -63

net/rxrpc/rxkad.c

··· 510 510 return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, 511 511 rxkad_abort_2_short_header); 512 512 513 + /* Don't let the crypto algo see a misaligned length. */ 514 + sp->len = round_down(sp->len, 8); 515 + 513 516 /* Decrypt the skbuff in-place. TODO: We really want to decrypt 514 517 * directly into the target buffer. 515 518 */ ··· 546 543 if (sg != _sg) 547 544 kfree(sg); 548 545 if (ret < 0) { 549 - WARN_ON_ONCE(ret != -ENOMEM); 550 - return ret; 546 + if (ret == -ENOMEM) 547 + return ret; 548 + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, 549 + rxkad_abort_2_crypto_unaligned); 551 550 } 552 551 553 552 /* Extract the decrypted packet length */ ··· 1141 1136 struct rxrpc_crypt session_key; 1142 1137 struct key *server_key; 1143 1138 time64_t expiry; 1144 - void *ticket; 1139 + void *ticket = NULL; 1145 1140 u32 version, kvno, ticket_len, level; 1146 1141 __be32 csum; 1147 1142 int ret, i; ··· 1167 1162 ret = -ENOMEM; 1168 1163 response = kzalloc_obj(struct rxkad_response, GFP_NOFS); 1169 1164 if (!response) 1170 - goto temporary_error; 1165 + goto error; 1171 1166 1172 1167 if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), 1173 1168 response, sizeof(*response)) < 0) { 1174 - rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, 1175 - rxkad_abort_resp_short); 1176 - goto protocol_error; 1169 + ret = rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, 1170 + rxkad_abort_resp_short); 1171 + goto error; 1177 1172 } 1178 1173 1179 1174 version = ntohl(response->version); ··· 1183 1178 trace_rxrpc_rx_response(conn, sp->hdr.serial, version, kvno, ticket_len); 1184 1179 1185 1180 if (version != RXKAD_VERSION) { 1186 - rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO, 1187 - rxkad_abort_resp_version); 1188 - goto protocol_error; 1181 + ret = rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO, 1182 + rxkad_abort_resp_version); 1183 + goto error; 1189 1184 } 1190 1185 1191 1186 if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN) { 1192 - rxrpc_abort_conn(conn, skb, RXKADTICKETLEN, -EPROTO, 1193 - rxkad_abort_resp_tkt_len); 1194 - goto protocol_error; 1187 + ret = rxrpc_abort_conn(conn, skb, RXKADTICKETLEN, -EPROTO, 1188 + rxkad_abort_resp_tkt_len); 1189 + goto error; 1195 1190 } 1196 1191 1197 1192 if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5) { 1198 - rxrpc_abort_conn(conn, skb, RXKADUNKNOWNKEY, -EPROTO, 1199 - rxkad_abort_resp_unknown_tkt); 1200 - goto protocol_error; 1193 + ret = rxrpc_abort_conn(conn, skb, RXKADUNKNOWNKEY, -EPROTO, 1194 + rxkad_abort_resp_unknown_tkt); 1195 + goto error; 1201 1196 } 1202 1197 1203 1198 /* extract the kerberos ticket and decrypt and decode it */ 1204 1199 ret = -ENOMEM; 1205 1200 ticket = kmalloc(ticket_len, GFP_NOFS); 1206 1201 if (!ticket) 1207 - goto temporary_error_free_resp; 1202 + goto error; 1208 1203 1209 1204 if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response), 1210 1205 ticket, ticket_len) < 0) { 1211 - rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, 1212 - rxkad_abort_resp_short_tkt); 1213 - goto protocol_error; 1206 + ret = rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, 1207 + rxkad_abort_resp_short_tkt); 1208 + goto error; 1214 1209 } 1215 1210 1216 1211 ret = rxkad_decrypt_ticket(conn, server_key, skb, ticket, ticket_len, 1217 1212 &session_key, &expiry); 1218 1213 if (ret < 0) 1219 - goto temporary_error_free_ticket; 1214 + goto error; 1220 1215 1221 1216 /* use the session key from inside the ticket to decrypt the 1222 1217 * response */ 1223 1218 ret = rxkad_decrypt_response(conn, response, &session_key); 1224 1219 if (ret < 0) 1225 - goto temporary_error_free_ticket; 1220 + goto error; 1226 1221 1227 1222 if (ntohl(response->encrypted.epoch) != conn->proto.epoch || 1228 1223 ntohl(response->encrypted.cid) != conn->proto.cid || 1229 1224 ntohl(response->encrypted.securityIndex) != conn->security_ix) { 1230 - rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, 1231 - rxkad_abort_resp_bad_param); 1232 - goto protocol_error_free; 1225 + ret = rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, 1226 + rxkad_abort_resp_bad_param); 1227 + goto error; 1233 1228 } 1234 1229 1235 1230 csum = response->encrypted.checksum; 1236 1231 response->encrypted.checksum = 0; 1237 1232 rxkad_calc_response_checksum(response); 1238 1233 if (response->encrypted.checksum != csum) { 1239 - rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, 1240 - rxkad_abort_resp_bad_checksum); 1241 - goto protocol_error_free; 1234 + ret = rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, 1235 + rxkad_abort_resp_bad_checksum); 1236 + goto error; 1242 1237 } 1243 1238 1244 1239 for (i = 0; i < RXRPC_MAXCALLS; i++) { ··· 1246 1241 u32 counter = READ_ONCE(conn->channels[i].call_counter); 1247 1242 1248 1243 if (call_id > INT_MAX) { 1249 - rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, 1250 - rxkad_abort_resp_bad_callid); 1251 - goto protocol_error_free; 1244 + ret = rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, 1245 + rxkad_abort_resp_bad_callid); 1246 + goto error; 1252 1247 } 1253 1248 1254 1249 if (call_id < counter) { 1255 - rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, 1256 - rxkad_abort_resp_call_ctr); 1257 - goto protocol_error_free; 1250 + ret = rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, 1251 + rxkad_abort_resp_call_ctr); 1252 + goto error; 1258 1253 } 1259 1254 1260 1255 if (call_id > counter) { 1261 1256 if (conn->channels[i].call) { 1262 - rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, 1257 + ret = rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, 1263 1258 rxkad_abort_resp_call_state); 1264 - goto protocol_error_free; 1259 + goto error; 1265 1260 } 1266 1261 conn->channels[i].call_counter = call_id; 1267 1262 } 1268 1263 } 1269 1264 1270 1265 if (ntohl(response->encrypted.inc_nonce) != conn->rxkad.nonce + 1) { 1271 - rxrpc_abort_conn(conn, skb, RXKADOUTOFSEQUENCE, -EPROTO, 1272 - rxkad_abort_resp_ooseq); 1273 - goto protocol_error_free; 1266 + ret = rxrpc_abort_conn(conn, skb, RXKADOUTOFSEQUENCE, -EPROTO, 1267 + rxkad_abort_resp_ooseq); 1268 + goto error; 1274 1269 } 1275 1270 1276 1271 level = ntohl(response->encrypted.level); 1277 1272 if (level > RXRPC_SECURITY_ENCRYPT) { 1278 - rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EPROTO, 1279 - rxkad_abort_resp_level); 1280 - goto protocol_error_free; 1273 + ret = rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EPROTO, 1274 + rxkad_abort_resp_level); 1275 + goto error; 1281 1276 } 1282 1277 conn->security_level = level; 1283 1278 ··· 1285 1280 * this the connection security can be handled in exactly the same way 1286 1281 * as for a client connection */ 1287 1282 ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno); 1288 - if (ret < 0) 1289 - goto temporary_error_free_ticket; 1290 1283 1284 + error: 1291 1285 kfree(ticket); 1292 - kfree(response); 1293 - _leave(" = 0"); 1294 - return 0; 1295 - 1296 - protocol_error_free: 1297 - kfree(ticket); 1298 - protocol_error: 1299 1286 kfree(response); 1300 1287 key_put(server_key); 1301 - return -EPROTO; 1302 - 1303 - temporary_error_free_ticket: 1304 - kfree(ticket); 1305 - temporary_error_free_resp: 1306 - kfree(response); 1307 - temporary_error: 1308 - /* Ignore the response packet if we got a temporary error such as 1309 - * ENOMEM. We just want to send the challenge again. Note that we 1310 - * also come out this way if the ticket decryption fails. 1311 - */ 1312 - key_put(server_key); 1288 + _leave(" = %d", ret); 1313 1289 return ret; 1314 1290 } 1315 1291

-9

net/rxrpc/skbuff.c

··· 47 47 } 48 48 49 49 /* 50 - * Note the dropping of a ref on a socket buffer by the core. 51 - */ 52 - void rxrpc_eaten_skb(struct sk_buff *skb, enum rxrpc_skb_trace why) 53 - { 54 - int n = atomic_inc_return(&rxrpc_n_rx_skbs); 55 - trace_rxrpc_skb(skb, 0, n, why); 56 - } 57 - 58 - /* 59 50 * Note the destruction of a socket buffer. 60 51 */ 61 52 void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace why)

+1 -1

net/sched/act_mirred.c

··· 354 354 goto assign_prev; 355 355 356 356 tcf_mirred_to_dev(skb, m, dev_prev, 357 - dev_is_mac_header_xmit(dev), 357 + dev_is_mac_header_xmit(dev_prev), 358 358 mirred_eaction, retval); 359 359 assign_prev: 360 360 dev_prev = dev;

+1 -1

net/sched/sch_cake.c

··· 619 619 } 620 620 port = rev ? tuple.src.u.all : tuple.dst.u.all; 621 621 if (port != keys->ports.dst) { 622 - port = keys->ports.dst; 622 + keys->ports.dst = port; 623 623 upd = true; 624 624 } 625 625 }

+28 -4

net/sched/sch_dualpi2.c

··· 868 868 old_backlog = sch->qstats.backlog; 869 869 while (qdisc_qlen(sch) > sch->limit || 870 870 q->memory_used > q->memory_limit) { 871 - struct sk_buff *skb = qdisc_dequeue_internal(sch, true); 871 + struct sk_buff *skb = NULL; 872 872 873 - q->memory_used -= skb->truesize; 874 - qdisc_qstats_backlog_dec(sch, skb); 875 - rtnl_qdisc_drop(skb, sch); 873 + if (qdisc_qlen(sch) > qdisc_qlen(q->l_queue)) { 874 + skb = qdisc_dequeue_internal(sch, true); 875 + if (unlikely(!skb)) { 876 + WARN_ON_ONCE(1); 877 + break; 878 + } 879 + q->memory_used -= skb->truesize; 880 + rtnl_qdisc_drop(skb, sch); 881 + } else if (qdisc_qlen(q->l_queue)) { 882 + skb = qdisc_dequeue_internal(q->l_queue, true); 883 + if (unlikely(!skb)) { 884 + WARN_ON_ONCE(1); 885 + break; 886 + } 887 + /* L-queue packets are counted in both sch and 888 + * l_queue on enqueue; qdisc_dequeue_internal() 889 + * handled l_queue, so we further account for sch. 890 + */ 891 + --sch->q.qlen; 892 + qdisc_qstats_backlog_dec(sch, skb); 893 + q->memory_used -= skb->truesize; 894 + rtnl_qdisc_drop(skb, q->l_queue); 895 + qdisc_qstats_drop(sch); 896 + } else { 897 + WARN_ON_ONCE(1); 898 + break; 899 + } 876 900 } 877 901 qdisc_tree_reduce_backlog(sch, old_qlen - qdisc_qlen(sch), 878 902 old_backlog - sch->qstats.backlog);

+2 -1

net/sched/sch_fq_codel.c

··· 585 585 }; 586 586 struct list_head *pos; 587 587 588 + sch_tree_lock(sch); 589 + 588 590 st.qdisc_stats.maxpacket = q->cstats.maxpacket; 589 591 st.qdisc_stats.drop_overlimit = q->drop_overlimit; 590 592 st.qdisc_stats.ecn_mark = q->cstats.ecn_mark; ··· 595 593 st.qdisc_stats.memory_usage = q->memory_usage; 596 594 st.qdisc_stats.drop_overmemory = q->drop_overmemory; 597 595 598 - sch_tree_lock(sch); 599 596 list_for_each(pos, &q->new_flows) 600 597 st.qdisc_stats.new_flows_len++; 601 598

+10 -9

net/sched/sch_hhf.c

··· 198 198 return NULL; 199 199 list_del(&flow->flowchain); 200 200 kfree(flow); 201 - q->hh_flows_current_cnt--; 201 + WRITE_ONCE(q->hh_flows_current_cnt, 202 + q->hh_flows_current_cnt - 1); 202 203 } else if (flow->hash_id == hash) { 203 204 return flow; 204 205 } ··· 227 226 } 228 227 229 228 if (q->hh_flows_current_cnt >= q->hh_flows_limit) { 230 - q->hh_flows_overlimit++; 229 + WRITE_ONCE(q->hh_flows_overlimit, q->hh_flows_overlimit + 1); 231 230 return NULL; 232 231 } 233 232 /* Create new entry. */ ··· 235 234 if (!flow) 236 235 return NULL; 237 236 238 - q->hh_flows_current_cnt++; 237 + WRITE_ONCE(q->hh_flows_current_cnt, q->hh_flows_current_cnt + 1); 239 238 INIT_LIST_HEAD(&flow->flowchain); 240 239 list_add_tail(&flow->flowchain, head); 241 240 ··· 310 309 return WDRR_BUCKET_FOR_NON_HH; 311 310 flow->hash_id = hash; 312 311 flow->hit_timestamp = now; 313 - q->hh_flows_total_cnt++; 312 + WRITE_ONCE(q->hh_flows_total_cnt, q->hh_flows_total_cnt + 1); 314 313 315 314 /* By returning without updating counters in q->hhf_arrays, 316 315 * we implicitly implement "shielding" (see Optimization O1). ··· 404 403 return NET_XMIT_SUCCESS; 405 404 406 405 prev_backlog = sch->qstats.backlog; 407 - q->drop_overlimit++; 406 + WRITE_ONCE(q->drop_overlimit, q->drop_overlimit + 1); 408 407 /* Return Congestion Notification only if we dropped a packet from this 409 408 * bucket. 410 409 */ ··· 687 686 { 688 687 struct hhf_sched_data *q = qdisc_priv(sch); 689 688 struct tc_hhf_xstats st = { 690 - .drop_overlimit = q->drop_overlimit, 691 - .hh_overlimit = q->hh_flows_overlimit, 692 - .hh_tot_count = q->hh_flows_total_cnt, 693 - .hh_cur_count = q->hh_flows_current_cnt, 689 + .drop_overlimit = READ_ONCE(q->drop_overlimit), 690 + .hh_overlimit = READ_ONCE(q->hh_flows_overlimit), 691 + .hh_tot_count = READ_ONCE(q->hh_flows_total_cnt), 692 + .hh_cur_count = READ_ONCE(q->hh_flows_current_cnt), 694 693 }; 695 694 696 695 return gnet_stats_copy_app(d, &st, sizeof(st));

+19 -19

net/sched/sch_pie.c

··· 90 90 bool enqueue = false; 91 91 92 92 if (unlikely(qdisc_qlen(sch) >= sch->limit)) { 93 - q->stats.overlimit++; 93 + WRITE_ONCE(q->stats.overlimit, q->stats.overlimit + 1); 94 94 goto out; 95 95 } 96 96 ··· 104 104 /* If packet is ecn capable, mark it if drop probability 105 105 * is lower than 10%, else drop it. 106 106 */ 107 - q->stats.ecn_mark++; 107 + WRITE_ONCE(q->stats.ecn_mark, q->stats.ecn_mark + 1); 108 108 enqueue = true; 109 109 } 110 110 ··· 114 114 if (!q->params.dq_rate_estimator) 115 115 pie_set_enqueue_time(skb); 116 116 117 - q->stats.packets_in++; 117 + WRITE_ONCE(q->stats.packets_in, q->stats.packets_in + 1); 118 118 if (qdisc_qlen(sch) > q->stats.maxq) 119 - q->stats.maxq = qdisc_qlen(sch); 119 + WRITE_ONCE(q->stats.maxq, qdisc_qlen(sch)); 120 120 121 121 return qdisc_enqueue_tail(skb, sch); 122 122 } 123 123 124 124 out: 125 - q->stats.dropped++; 125 + WRITE_ONCE(q->stats.dropped, q->stats.dropped + 1); 126 126 q->vars.accu_prob = 0; 127 127 return qdisc_drop_reason(skb, sch, to_free, reason); 128 128 } ··· 267 267 count = count / dtime; 268 268 269 269 if (vars->avg_dq_rate == 0) 270 - vars->avg_dq_rate = count; 270 + WRITE_ONCE(vars->avg_dq_rate, count); 271 271 else 272 - vars->avg_dq_rate = 272 + WRITE_ONCE(vars->avg_dq_rate, 273 273 (vars->avg_dq_rate - 274 - (vars->avg_dq_rate >> 3)) + (count >> 3); 274 + (vars->avg_dq_rate >> 3)) + (count >> 3)); 275 275 276 276 /* If the queue has receded below the threshold, we hold 277 277 * on to the last drain rate calculated, else we reset ··· 381 381 if (delta > 0) { 382 382 /* prevent overflow */ 383 383 if (vars->prob < oldprob) { 384 - vars->prob = MAX_PROB; 384 + WRITE_ONCE(vars->prob, MAX_PROB); 385 385 /* Prevent normalization error. If probability is at 386 386 * maximum value already, we normalize it here, and 387 387 * skip the check to do a non-linear drop in the next ··· 392 392 } else { 393 393 /* prevent underflow */ 394 394 if (vars->prob > oldprob) 395 - vars->prob = 0; 395 + WRITE_ONCE(vars->prob, 0); 396 396 } 397 397 398 398 /* Non-linear drop in probability: Reduce drop probability quickly if ··· 403 403 /* Reduce drop probability to 98.4% */ 404 404 vars->prob -= vars->prob / 64; 405 405 406 - vars->qdelay = qdelay; 406 + WRITE_ONCE(vars->qdelay, qdelay); 407 407 vars->backlog_old = backlog; 408 408 409 409 /* We restart the measurement cycle if the following conditions are met ··· 502 502 struct pie_sched_data *q = qdisc_priv(sch); 503 503 struct tc_pie_xstats st = { 504 504 .prob = q->vars.prob << BITS_PER_BYTE, 505 - .delay = ((u32)PSCHED_TICKS2NS(q->vars.qdelay)) / 505 + .delay = ((u32)PSCHED_TICKS2NS(READ_ONCE(q->vars.qdelay))) / 506 506 NSEC_PER_USEC, 507 - .packets_in = q->stats.packets_in, 508 - .overlimit = q->stats.overlimit, 509 - .maxq = q->stats.maxq, 510 - .dropped = q->stats.dropped, 511 - .ecn_mark = q->stats.ecn_mark, 507 + .packets_in = READ_ONCE(q->stats.packets_in), 508 + .overlimit = READ_ONCE(q->stats.overlimit), 509 + .maxq = READ_ONCE(q->stats.maxq), 510 + .dropped = READ_ONCE(q->stats.dropped), 511 + .ecn_mark = READ_ONCE(q->stats.ecn_mark), 512 512 }; 513 513 514 514 /* avg_dq_rate is only valid if dq_rate_estimator is enabled */ 515 515 st.dq_rate_estimating = q->params.dq_rate_estimator; 516 516 517 517 /* unscale and return dq_rate in bytes per sec */ 518 - if (q->params.dq_rate_estimator) 519 - st.avg_dq_rate = q->vars.avg_dq_rate * 518 + if (st.dq_rate_estimating) 519 + st.avg_dq_rate = READ_ONCE(q->vars.avg_dq_rate) * 520 520 (PSCHED_TICKS_PER_SEC) >> PIE_SCALE; 521 521 522 522 return gnet_stats_copy_app(d, &st, sizeof(st));

+21 -10

net/sched/sch_red.c

··· 90 90 case RED_PROB_MARK: 91 91 qdisc_qstats_overlimit(sch); 92 92 if (!red_use_ecn(q)) { 93 - q->stats.prob_drop++; 93 + WRITE_ONCE(q->stats.prob_drop, 94 + q->stats.prob_drop + 1); 94 95 goto congestion_drop; 95 96 } 96 97 97 98 if (INET_ECN_set_ce(skb)) { 98 - q->stats.prob_mark++; 99 + WRITE_ONCE(q->stats.prob_mark, 100 + q->stats.prob_mark + 1); 99 101 skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret); 100 102 if (!skb) 101 103 return NET_XMIT_CN | ret; 102 104 } else if (!red_use_nodrop(q)) { 103 - q->stats.prob_drop++; 105 + WRITE_ONCE(q->stats.prob_drop, 106 + q->stats.prob_drop + 1); 104 107 goto congestion_drop; 105 108 } 106 109 ··· 114 111 reason = QDISC_DROP_OVERLIMIT; 115 112 qdisc_qstats_overlimit(sch); 116 113 if (red_use_harddrop(q) || !red_use_ecn(q)) { 117 - q->stats.forced_drop++; 114 + WRITE_ONCE(q->stats.forced_drop, 115 + q->stats.forced_drop + 1); 118 116 goto congestion_drop; 119 117 } 120 118 121 119 if (INET_ECN_set_ce(skb)) { 122 - q->stats.forced_mark++; 120 + WRITE_ONCE(q->stats.forced_mark, 121 + q->stats.forced_mark + 1); 123 122 skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret); 124 123 if (!skb) 125 124 return NET_XMIT_CN | ret; 126 125 } else if (!red_use_nodrop(q)) { 127 - q->stats.forced_drop++; 126 + WRITE_ONCE(q->stats.forced_drop, 127 + q->stats.forced_drop + 1); 128 128 goto congestion_drop; 129 129 } 130 130 ··· 141 135 sch->qstats.backlog += len; 142 136 sch->q.qlen++; 143 137 } else if (net_xmit_drop_count(ret)) { 144 - q->stats.pdrop++; 138 + WRITE_ONCE(q->stats.pdrop, 139 + q->stats.pdrop + 1); 145 140 qdisc_qstats_drop(sch); 146 141 } 147 142 return ret; ··· 470 463 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, 471 464 &hw_stats_request); 472 465 } 473 - st.early = q->stats.prob_drop + q->stats.forced_drop; 474 - st.pdrop = q->stats.pdrop; 475 - st.marked = q->stats.prob_mark + q->stats.forced_mark; 466 + st.early = READ_ONCE(q->stats.prob_drop) + 467 + READ_ONCE(q->stats.forced_drop); 468 + 469 + st.pdrop = READ_ONCE(q->stats.pdrop); 470 + 471 + st.marked = READ_ONCE(q->stats.prob_mark) + 472 + READ_ONCE(q->stats.forced_mark); 476 473 477 474 return gnet_stats_copy_app(d, &st, sizeof(st)); 478 475 }

+32 -22

net/sched/sch_sfb.c

··· 130 130 131 131 sfbhash >>= SFB_BUCKET_SHIFT; 132 132 if (b[hash].qlen < 0xFFFF) 133 - b[hash].qlen++; 133 + WRITE_ONCE(b[hash].qlen, b[hash].qlen + 1); 134 134 b += SFB_NUMBUCKETS; /* next level */ 135 135 } 136 136 } ··· 159 159 160 160 sfbhash >>= SFB_BUCKET_SHIFT; 161 161 if (b[hash].qlen > 0) 162 - b[hash].qlen--; 162 + WRITE_ONCE(b[hash].qlen, b[hash].qlen - 1); 163 163 b += SFB_NUMBUCKETS; /* next level */ 164 164 } 165 165 } ··· 179 179 180 180 static void decrement_prob(struct sfb_bucket *b, struct sfb_sched_data *q) 181 181 { 182 - b->p_mark = prob_minus(b->p_mark, q->decrement); 182 + WRITE_ONCE(b->p_mark, prob_minus(b->p_mark, q->decrement)); 183 183 } 184 184 185 185 static void increment_prob(struct sfb_bucket *b, struct sfb_sched_data *q) 186 186 { 187 - b->p_mark = prob_plus(b->p_mark, q->increment); 187 + WRITE_ONCE(b->p_mark, prob_plus(b->p_mark, q->increment)); 188 188 } 189 189 190 190 static void sfb_zero_all_buckets(struct sfb_sched_data *q) ··· 202 202 const struct sfb_bucket *b = &q->bins[q->slot].bins[0][0]; 203 203 204 204 for (i = 0; i < SFB_LEVELS * SFB_NUMBUCKETS; i++) { 205 - if (qlen < b->qlen) 206 - qlen = b->qlen; 207 - totalpm += b->p_mark; 208 - if (prob < b->p_mark) 209 - prob = b->p_mark; 205 + u32 b_qlen = READ_ONCE(b->qlen); 206 + u32 b_mark = READ_ONCE(b->p_mark); 207 + 208 + if (qlen < b_qlen) 209 + qlen = b_qlen; 210 + totalpm += b_mark; 211 + if (prob < b_mark) 212 + prob = b_mark; 210 213 b++; 211 214 } 212 215 *prob_r = prob; ··· 298 295 299 296 if (unlikely(sch->q.qlen >= q->limit)) { 300 297 qdisc_qstats_overlimit(sch); 301 - q->stats.queuedrop++; 298 + WRITE_ONCE(q->stats.queuedrop, 299 + q->stats.queuedrop + 1); 302 300 goto drop; 303 301 } 304 302 ··· 352 348 353 349 if (unlikely(minqlen >= q->max)) { 354 350 qdisc_qstats_overlimit(sch); 355 - q->stats.bucketdrop++; 351 + WRITE_ONCE(q->stats.bucketdrop, 352 + q->stats.bucketdrop + 1); 356 353 goto drop; 357 354 } 358 355 ··· 379 374 } 380 375 if (sfb_rate_limit(skb, q)) { 381 376 qdisc_qstats_overlimit(sch); 382 - q->stats.penaltydrop++; 377 + WRITE_ONCE(q->stats.penaltydrop, 378 + q->stats.penaltydrop + 1); 383 379 goto drop; 384 380 } 385 381 goto enqueue; ··· 396 390 * In either case, we want to start dropping packets. 397 391 */ 398 392 if (r < (p_min - SFB_MAX_PROB / 2) * 2) { 399 - q->stats.earlydrop++; 393 + WRITE_ONCE(q->stats.earlydrop, 394 + q->stats.earlydrop + 1); 400 395 goto drop; 401 396 } 402 397 } 403 398 if (INET_ECN_set_ce(skb)) { 404 - q->stats.marked++; 399 + WRITE_ONCE(q->stats.marked, 400 + q->stats.marked + 1); 405 401 } else { 406 - q->stats.earlydrop++; 402 + WRITE_ONCE(q->stats.earlydrop, 403 + q->stats.earlydrop + 1); 407 404 goto drop; 408 405 } 409 406 } ··· 419 410 sch->q.qlen++; 420 411 increment_qlen(&cb, q); 421 412 } else if (net_xmit_drop_count(ret)) { 422 - q->stats.childdrop++; 413 + WRITE_ONCE(q->stats.childdrop, 414 + q->stats.childdrop + 1); 423 415 qdisc_qstats_drop(sch); 424 416 } 425 417 return ret; ··· 609 599 { 610 600 struct sfb_sched_data *q = qdisc_priv(sch); 611 601 struct tc_sfb_xstats st = { 612 - .earlydrop = q->stats.earlydrop, 613 - .penaltydrop = q->stats.penaltydrop, 614 - .bucketdrop = q->stats.bucketdrop, 615 - .queuedrop = q->stats.queuedrop, 616 - .childdrop = q->stats.childdrop, 617 - .marked = q->stats.marked, 602 + .earlydrop = READ_ONCE(q->stats.earlydrop), 603 + .penaltydrop = READ_ONCE(q->stats.penaltydrop), 604 + .bucketdrop = READ_ONCE(q->stats.bucketdrop), 605 + .queuedrop = READ_ONCE(q->stats.queuedrop), 606 + .childdrop = READ_ONCE(q->stats.childdrop), 607 + .marked = READ_ONCE(q->stats.marked), 618 608 }; 619 609 620 610 st.maxqlen = sfb_compute_qlen(&st.maxprob, &st.avgprob, q);

+5 -4

net/sched/sch_taprio.c

··· 972 972 } 973 973 974 974 if (should_change_schedules(admin, oper, end_time)) { 975 - /* Set things so the next time this runs, the new 976 - * schedule runs. 977 - */ 978 - end_time = sched_base_time(admin); 979 975 switch_schedules(q, &admin, &oper); 976 + /* After changing schedules, the next entry is the first one 977 + * in the new schedule, with a pre-calculated end_time. 978 + */ 979 + next = list_first_entry(&oper->entries, struct sched_entry, list); 980 + end_time = next->end_time; 980 981 } 981 982 982 983 next->end_time = end_time;

+3 -2

net/sctp/socket.c

··· 4855 4855 if (!newsk) 4856 4856 return ERR_PTR(err); 4857 4857 4858 - /* sk_clone() sets refcnt to 2 */ 4858 + /* sk_clone() sets refcnt to 2 and increments sockets_allocated */ 4859 4859 sock_put(newsk); 4860 + sk_sockets_allocated_dec(newsk); 4860 4861 4861 4862 newinet = inet_sk(newsk); 4862 4863 newsp = sctp_sk(newsk); ··· 7034 7033 7035 7034 /* See if the user provided enough room for all the data */ 7036 7035 num_chunks = ntohs(ch->param_hdr.length) - sizeof(struct sctp_paramhdr); 7037 - if (len < num_chunks) 7036 + if (len < sizeof(struct sctp_authchunks) + num_chunks) 7038 7037 return -EINVAL; 7039 7038 7040 7039 if (copy_to_user(to, ch->chunks, num_chunks))

+2 -2

net/smc/smc_clc.c

··· 788 788 dclc = (struct smc_clc_msg_decline *)clcm; 789 789 reason_code = SMC_CLC_DECL_PEERDECL; 790 790 smc->peer_diagnosis = ntohl(dclc->peer_diagnosis); 791 - if (((struct smc_clc_msg_decline *)buf)->hdr.typev2 & 792 - SMC_FIRST_CONTACT_MASK) { 791 + if ((dclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK) && 792 + smc->conn.lgr) { 793 793 smc->conn.lgr->sync_err = 1; 794 794 smc_lgr_terminate_sched(smc->conn.lgr); 795 795 }

+13 -1

net/tipc/msg.c

··· 177 177 178 178 if (fragid == LAST_FRAGMENT) { 179 179 TIPC_SKB_CB(head)->validated = 0; 180 - if (unlikely(!tipc_msg_validate(&head))) 180 + 181 + /* If the reassembled skb has been freed in 182 + * tipc_msg_validate() because of an invalid truesize, 183 + * then head will point to a newly allocated reassembled 184 + * skb, while *headbuf points to freed reassembled skb. 185 + * In such cases, correct *headbuf for freeing the newly 186 + * allocated reassembled skb later. 187 + */ 188 + if (unlikely(!tipc_msg_validate(&head))) { 189 + if (head != *headbuf) 190 + *headbuf = head; 181 191 goto err; 192 + } 193 + 182 194 *buf = head; 183 195 TIPC_SKB_CB(head)->tail = NULL; 184 196 *headbuf = NULL;

+27 -8

net/unix/af_unix.c

··· 1968 1968 1969 1969 static void unix_destruct_scm(struct sk_buff *skb) 1970 1970 { 1971 - struct scm_cookie scm; 1971 + struct scm_cookie scm = {}; 1972 1972 1973 - memset(&scm, 0, sizeof(scm)); 1974 - scm.pid = UNIXCB(skb).pid; 1973 + swap(scm.pid, UNIXCB(skb).pid); 1974 + 1975 1975 if (UNIXCB(skb).fp) 1976 1976 unix_detach_fds(&scm, skb); 1977 1977 1978 - /* Alas, it calls VFS */ 1979 - /* So fscking what? fput() had been SMP-safe since the last Summer */ 1980 1978 scm_destroy(&scm); 1979 + } 1980 + 1981 + static void unix_wfree(struct sk_buff *skb) 1982 + { 1983 + unix_destruct_scm(skb); 1981 1984 sock_wfree(skb); 1982 1985 } 1983 1986 ··· 1996 1993 if (scm->fp && send_fds) 1997 1994 err = unix_attach_fds(scm, skb); 1998 1995 1999 - skb->destructor = unix_destruct_scm; 1996 + skb->destructor = unix_wfree; 2000 1997 return err; 2001 1998 } 2002 1999 ··· 2071 2068 atomic_sub(fp->count, &u->scm_stat.nr_fds); 2072 2069 unix_del_edges(fp); 2073 2070 } 2071 + } 2072 + 2073 + static void unix_orphan_scm(struct sock *sk, struct sk_buff *skb) 2074 + { 2075 + scm_stat_del(sk, skb); 2076 + unix_destruct_scm(skb); 2077 + skb->destructor = sock_wfree; 2074 2078 } 2075 2079 2076 2080 /* ··· 2693 2683 int err; 2694 2684 2695 2685 mutex_lock(&u->iolock); 2686 + 2696 2687 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); 2697 - mutex_unlock(&u->iolock); 2698 - if (!skb) 2688 + if (!skb) { 2689 + mutex_unlock(&u->iolock); 2699 2690 return err; 2691 + } 2692 + 2693 + unix_orphan_scm(sk, skb); 2694 + 2695 + mutex_unlock(&u->iolock); 2700 2696 2701 2697 return recv_actor(sk, skb); 2702 2698 } ··· 2902 2886 #endif 2903 2887 2904 2888 spin_unlock(&queue->lock); 2889 + 2890 + unix_orphan_scm(sk, skb); 2891 + 2905 2892 mutex_unlock(&u->iolock); 2906 2893 2907 2894 return recv_actor(sk, skb);

+25 -4

net/vmw_vsock/hyperv_transport.c

··· 694 694 static s64 hvs_stream_has_data(struct vsock_sock *vsk) 695 695 { 696 696 struct hvsock *hvs = vsk->trans; 697 - bool need_refill; 698 697 s64 ret; 699 698 700 699 if (hvs->recv_data_len > 0) ··· 701 702 702 703 switch (hvs_channel_readable_payload(hvs->chan)) { 703 704 case 1: 704 - need_refill = !hvs->recv_desc; 705 - if (!need_refill) 706 - return -EIO; 705 + if (hvs->recv_desc) { 706 + /* Here hvs->recv_data_len is 0, so hvs->recv_desc must 707 + * be NULL unless it points to the 0-byte-payload FIN 708 + * packet or a malformed/short packet: see 709 + * hvs_update_recv_data(). 710 + * 711 + * If hvs->recv_desc points to the FIN packet, here all 712 + * the payload has been dequeued and the peer_shutdown 713 + * flag is set, but hvs_channel_readable_payload() still 714 + * returns 1, because the VMBus ringbuffer's read_index 715 + * is not updated for the FIN packet: 716 + * hvs_stream_dequeue() -> hv_pkt_iter_next() updates 717 + * the cached priv_read_index but has no opportunity to 718 + * update the read_index in hv_pkt_iter_close() as 719 + * hvs_stream_has_data() returns 0 for the FIN packet, 720 + * so it won't get dequeued. 721 + * 722 + * In case hvs->recv_desc points to a malformed/short 723 + * packet, return -EIO. 724 + */ 725 + if (!(vsk->peer_shutdown & SEND_SHUTDOWN)) 726 + return -EIO; 727 + 728 + return 0; 729 + } 707 730 708 731 hvs->recv_desc = hv_pkt_iter_first(hvs->chan); 709 732 if (!hvs->recv_desc)

+11 -8

net/vmw_vsock/virtio_transport_common.c

··· 73 73 static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk, 74 74 struct sk_buff *skb, 75 75 struct msghdr *msg, 76 + size_t pkt_len, 76 77 bool zerocopy) 77 78 { 78 79 struct ubuf_info *uarg; ··· 82 81 uarg = msg->msg_ubuf; 83 82 net_zcopy_get(uarg); 84 83 } else { 85 - struct iov_iter *iter = &msg->msg_iter; 86 84 struct ubuf_info_msgzc *uarg_zc; 87 85 88 86 uarg = msg_zerocopy_realloc(sk_vsock(vsk), 89 - iter->count, 90 - NULL, false); 87 + pkt_len, NULL, false); 91 88 if (!uarg) 92 89 return -1; 93 90 ··· 397 398 * each iteration. If this is last skb for this buffer 398 399 * and MSG_ZEROCOPY mode is in use - we must allocate 399 400 * completion for the current syscall. 401 + * 402 + * Pass pkt_len because msg iter is already consumed 403 + * by virtio_transport_fill_skb(), so iter->count 404 + * can not be used for RLIMIT_MEMLOCK pinned-pages 405 + * accounting done by msg_zerocopy_realloc(). 400 406 */ 401 407 if (info->msg && info->msg->msg_flags & MSG_ZEROCOPY && 402 408 skb_len == rest_len && info->op == VIRTIO_VSOCK_OP_RW) { 403 409 if (virtio_transport_init_zcopy_skb(vsk, skb, 404 410 info->msg, 411 + pkt_len, 405 412 can_zcopy)) { 406 413 kfree_skb(skb); 407 414 ret = -ENOMEM; ··· 550 545 skb_queue_walk(&vvs->rx_queue, skb) { 551 546 size_t bytes; 552 547 553 - bytes = len - total; 554 - if (bytes > skb->len) 555 - bytes = skb->len; 548 + bytes = min_t(size_t, len - total, 549 + skb->len - VIRTIO_VSOCK_SKB_CB(skb)->offset); 556 550 557 551 spin_unlock_bh(&vvs->rx_lock); 558 552 ··· 1562 1558 return -ENOMEM; 1563 1559 } 1564 1560 1565 - sk_acceptq_added(sk); 1566 - 1567 1561 lock_sock_nested(child, SINGLE_DEPTH_NESTING); 1568 1562 1569 1563 child->sk_state = TCP_ESTABLISHED; ··· 1583 1581 return ret; 1584 1582 } 1585 1583 1584 + sk_acceptq_added(sk); 1586 1585 if (virtio_transport_space_update(child, skb)) 1587 1586 child->sk_write_space(child); 1588 1587

+42 -7

tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c

··· 7 7 * 3. call listen() for 1 server socket. (migration target) 8 8 * 4. update a map to migrate all child sockets 9 9 * to the last server socket (migrate_map[cookie] = 4) 10 - * 5. call shutdown() for first 4 server sockets 10 + * 5. for TCP_ESTABLISHED and TCP_SYN_RECV cases, verify via epoll 11 + * that the last server socket is not ready before migration. 12 + * 6. call shutdown() for first 4 server sockets 11 13 * and migrate the requests in the accept queue 12 14 * to the last server socket. 13 - * 6. call listen() for the second server socket. 14 - * 7. call shutdown() for the last server 15 + * 7. for TCP_ESTABLISHED and TCP_SYN_RECV cases, verify via epoll 16 + * that the last server socket is ready after migration. 17 + * 8. call listen() for the second server socket. 18 + * 9. call shutdown() for the last server 15 19 * and migrate the requests in the accept queue 16 20 * to the second server socket. 17 - * 8. call listen() for the last server. 18 - * 9. call shutdown() for the second server 21 + * 10. call listen() for the last server. 22 + * 11. call shutdown() for the second server 19 23 * and migrate the requests in the accept queue 20 24 * to the last server socket. 21 - * 10. call accept() for the last server socket. 25 + * 12. call accept() for the last server socket. 22 26 * 23 27 * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp> 24 28 */ 25 29 26 30 #include <bpf/bpf.h> 27 31 #include <bpf/libbpf.h> 32 + #include <sys/epoll.h> 28 33 29 34 #include "test_progs.h" 30 35 #include "test_migrate_reuseport.skel.h" ··· 355 350 356 351 static int migrate_dance(struct migrate_reuseport_test_case *test_case) 357 352 { 353 + struct epoll_event ev = { 354 + .events = EPOLLIN, 355 + }; 356 + int epoll = -1, nfds; 358 357 int i, err; 358 + 359 + if (test_case->state != BPF_TCP_NEW_SYN_RECV) { 360 + epoll = epoll_create1(0); 361 + if (!ASSERT_NEQ(epoll, -1, "epoll_create1")) 362 + return -1; 363 + 364 + ev.data.fd = test_case->servers[MIGRATED_TO]; 365 + if (!ASSERT_OK(epoll_ctl(epoll, EPOLL_CTL_ADD, 366 + test_case->servers[MIGRATED_TO], &ev), 367 + "epoll_ctl")) 368 + goto close_epoll; 369 + 370 + nfds = epoll_wait(epoll, &ev, 1, 0); 371 + if (!ASSERT_EQ(nfds, 0, "epoll_wait 1")) 372 + goto close_epoll; 373 + } 359 374 360 375 /* Migrate TCP_ESTABLISHED and TCP_SYN_RECV requests 361 376 * to the last listener based on eBPF. ··· 383 358 for (i = 0; i < MIGRATED_TO; i++) { 384 359 err = shutdown(test_case->servers[i], SHUT_RDWR); 385 360 if (!ASSERT_OK(err, "shutdown")) 386 - return -1; 361 + goto close_epoll; 387 362 } 388 363 389 364 /* No dance for TCP_NEW_SYN_RECV to migrate based on eBPF */ 390 365 if (test_case->state == BPF_TCP_NEW_SYN_RECV) 391 366 return 0; 367 + 368 + nfds = epoll_wait(epoll, &ev, 1, 0); 369 + if (!ASSERT_EQ(nfds, 1, "epoll_wait 2")) { 370 + close_epoll: 371 + if (epoll >= 0) 372 + close(epoll); 373 + return -1; 374 + } 375 + 376 + close(epoll); 392 377 393 378 /* Note that we use the second listener instead of the 394 379 * first one here.

+3 -14

tools/testing/selftests/drivers/net/bonding/lag_lib.sh

··· 23 23 ip link set dev dummy2 master "$name" 24 24 elif [ "$driver" = "team" ]; then 25 25 name="team0" 26 - teamd -d -c ' 27 - { 28 - "device": "'"$name"'", 29 - "runner": { 30 - "name": "'"$mode"'" 31 - }, 32 - "ports": { 33 - "dummy1": 34 - {}, 35 - "dummy2": 36 - {} 37 - } 38 - } 39 - ' 26 + ip link add "$name" type team 27 + ip link set dev dummy1 master "$name" 28 + ip link set dev dummy2 master "$name" 40 29 ip link set dev "$name" up 41 30 else 42 31 check_err 1

-2

tools/testing/selftests/drivers/net/team/dev_addr_lists.sh

··· 42 42 } 43 43 44 44 45 - require_command teamd 46 - 47 45 trap cleanup EXIT 48 46 49 47 tests_run

+1

tools/testing/selftests/net/config

··· 101 101 CONFIG_NET_SCH_INGRESS=m 102 102 CONFIG_NET_SCH_NETEM=y 103 103 CONFIG_NET_SCH_PRIO=m 104 + CONFIG_NET_TEAM=y 104 105 CONFIG_NET_VRF=y 105 106 CONFIG_NF_CONNTRACK=m 106 107 CONFIG_NF_CONNTRACK_OVS=y

+22

tools/testing/selftests/net/fib_nexthops.sh

··· 1209 1209 run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124" 1210 1210 log_test $? 0 "IPv6 route using a group after replacing v4 gateways" 1211 1211 1212 + # Replacing an IPv6 nexthop with an IPv4 nexthop should update has_v4 1213 + # for all groups using it, preventing IPv6 routes from referencing the 1214 + # group after the replace. 1215 + run_cmd "$IP nexthop add id 89 via 2001:db8:91::2 dev veth1" 1216 + run_cmd "$IP nexthop add id 125 group 89" 1217 + run_cmd "$IP nexthop replace id 89 via 172.16.1.1 dev veth1" 1218 + run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 125" 1219 + log_test $? 2 "IPv6 route can not use group after v6 nexthop replaced by v4" 1220 + 1221 + # Same scenario but with a blackhole nexthop: the group has no IPv6 1222 + # routes yet when the replace happens, so fib6_check_nh_list returns 1223 + # early without checking. has_v4 must still be updated to block 1224 + # subsequent IPv6 route additions. 1225 + run_cmd "$IP nexthop flush >/dev/null 2>&1" 1226 + run_cmd "$IP -6 nexthop add id 90 blackhole" 1227 + run_cmd "$IP nexthop add id 125 group 90" 1228 + run_cmd "$IP nexthop replace id 90 blackhole" 1229 + run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 125" 1230 + log_test $? 2 "IPv6 route reject v6 blackhole replaced by v4 blackhole" 1231 + run_cmd "ip netns exec $me ping -6 2001:db8:101::1 -c1 -w$PING_TIMEOUT" 1232 + log_test $? 2 "Ping unreachable after rejected route" 1233 + 1212 1234 $IP nexthop flush >/dev/null 2>&1 1213 1235 1214 1236 #

+28

tools/testing/selftests/net/mptcp/diag.sh

··· 322 322 done 323 323 } 324 324 325 + chk_sndbuf() 326 + { 327 + local server_sndbuf client_sndbuf msg 328 + local port=${1} 329 + 330 + msg="....chk sndbuf server/client" 331 + server_sndbuf=$(ss -N "${ns}" -inmHM "sport" "${port}" | \ 332 + sed -n 's/.*tb$[0-9]\+$.*/\1/p') 333 + client_sndbuf=$(ss -N "${ns}" -inmHM "dport" "${port}" | \ 334 + sed -n 's/.*tb$[0-9]\+$.*/\1/p') 335 + 336 + mptcp_lib_print_title "${msg}" 337 + if [ -z "${server_sndbuf}" ] || [ -z "${client_sndbuf}" ]; then 338 + mptcp_lib_pr_fail "sndbuf S=${server_sndbuf} C=${client_sndbuf}" 339 + mptcp_lib_result_fail "${msg}" 340 + ret=${KSFT_FAIL} 341 + elif [ "${server_sndbuf}" != "${client_sndbuf}" ]; then 342 + mptcp_lib_pr_fail "sndbuf S=${server_sndbuf} != C=${client_sndbuf}" 343 + mptcp_lib_result_fail "${msg}" 344 + ret=${KSFT_FAIL} 345 + else 346 + mptcp_lib_pr_ok 347 + mptcp_lib_result_pass "${msg}" 348 + fi 349 + } 350 + 351 + 325 352 trap cleanup EXIT 326 353 mptcp_lib_ns_init ns 327 354 ··· 368 341 127.0.0.1 >/dev/null & 369 342 wait_connected $ns 10000 370 343 chk_msk_nr 2 "after MPC handshake" 344 + chk_sndbuf 10000 371 345 chk_last_time_info 10000 372 346 chk_msk_remote_key_nr 2 "....chk remote_key" 373 347 chk_msk_fallback_nr 0 "....chk no fallback"

+272 -91

tools/testing/selftests/net/ovpn/common.sh

··· 4 4 # 5 5 # Author: Antonio Quartulli <antonio@openvpn.net> 6 6 7 - UDP_PEERS_FILE=${UDP_PEERS_FILE:-udp_peers.txt} 8 - TCP_PEERS_FILE=${TCP_PEERS_FILE:-tcp_peers.txt} 9 - OVPN_CLI=${OVPN_CLI:-./ovpn-cli} 10 - YNL_CLI=${YNL_CLI:-../../../../net/ynl/pyynl/cli.py} 11 - ALG=${ALG:-aes} 12 - PROTO=${PROTO:-UDP} 13 - FLOAT=${FLOAT:-0} 14 - SYMMETRIC_ID=${SYMMETRIC_ID:-0} 7 + OVPN_COMMON_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")") 8 + source "$OVPN_COMMON_DIR/../../kselftest/ktap_helpers.sh" 15 9 16 - export ID_OFFSET=$(( 9 * (SYMMETRIC_ID == 0) )) 10 + OVPN_UDP_PEERS_FILE=${OVPN_UDP_PEERS_FILE:-udp_peers.txt} 11 + OVPN_TCP_PEERS_FILE=${OVPN_TCP_PEERS_FILE:-tcp_peers.txt} 12 + OVPN_CLI=${OVPN_CLI:-${OVPN_COMMON_DIR}/ovpn-cli} 13 + OVPN_YNL=${OVPN_YNL:-${OVPN_COMMON_DIR}/../../../../net/ynl/pyynl/cli.py} 14 + OVPN_ALG=${OVPN_ALG:-aes} 15 + OVPN_PROTO=${OVPN_PROTO:-UDP} 16 + OVPN_FLOAT=${OVPN_FLOAT:-0} 17 + OVPN_SYMMETRIC_ID=${OVPN_SYMMETRIC_ID:-0} 18 + OVPN_VERBOSE=${OVPN_VERBOSE:-0} 17 19 18 - JQ_FILTER='map(select(.msg.peer | has("remote-ipv6") | not)) | 20 + export OVPN_ID_OFFSET=$(( 9 * (OVPN_SYMMETRIC_ID == 0) )) 21 + 22 + OVPN_JQ_FILTER='map(if type == "array" then .[] else . end) | 23 + map(select(.msg.peer | has("remote-ipv6") | not)) | 19 24 map(del(.msg.ifindex)) | sort_by(.msg.peer.id)[]' 20 - LAN_IP="11.11.11.11" 25 + OVPN_LAN_IP="11.11.11.11" 21 26 22 - declare -A tmp_jsons=() 23 - declare -A listener_pids=() 27 + declare -A OVPN_TMP_JSONS=() 28 + declare -A OVPN_LISTENER_PIDS=() 29 + OVPN_CURRENT_STAGE="" 24 30 25 - create_ns() { 26 - ip netns add peer${1} 31 + ovpn_is_verbose() { 32 + [[ "${OVPN_VERBOSE}" == "1" ]] 27 33 } 28 34 29 - setup_ns() { 35 + ovpn_log() { 36 + ovpn_is_verbose || return 0 37 + printf '%s\n' "$*" 38 + } 39 + 40 + ovpn_print_cmd_output() { 41 + local output_file="$1" 42 + local line 43 + 44 + [[ -s "${output_file}" ]] || return 0 45 + 46 + while IFS= read -r line; do 47 + ovpn_log "${line}" 48 + done < "${output_file}" 49 + } 50 + 51 + ovpn_cmd_run() { 52 + local mode="$1" 53 + local label="$2" 54 + local output_file 55 + local rc 56 + local ret=0 57 + 58 + shift 2 59 + 60 + output_file=$(mktemp) 61 + if "$@" >"${output_file}" 2>&1; then 62 + rc=0 63 + else 64 + rc=$? 65 + fi 66 + 67 + case "${mode}" in 68 + ok) 69 + if [[ "${rc}" -ne 0 ]]; then 70 + cat "${output_file}" 71 + printf '%s\n' \ 72 + "${label}: command failed with rc=${rc}: $*" 73 + ret="${rc}" 74 + fi 75 + ;; 76 + mayfail) 77 + ;; 78 + fail) 79 + [[ "${rc}" -eq 0 ]] && ret=1 80 + ;; 81 + esac 82 + 83 + if ovpn_is_verbose && [[ "${rc}" -eq 0 || "${mode}" != "ok" ]]; then 84 + ovpn_print_cmd_output "${output_file}" 85 + fi 86 + 87 + rm -f "${output_file}" 88 + return "${ret}" 89 + } 90 + 91 + ovpn_cmd_ok() { 92 + ovpn_cmd_run ok "$@" 93 + } 94 + 95 + ovpn_cmd_mayfail() { 96 + ovpn_cmd_run mayfail "$@" 97 + } 98 + 99 + ovpn_cmd_fail() { 100 + ovpn_cmd_run fail "$@" 101 + } 102 + 103 + ovpn_run_bg() { 104 + local pid_var="$1" 105 + 106 + shift 107 + if ovpn_is_verbose; then 108 + "$@" & 109 + else 110 + "$@" >/dev/null 2>&1 & 111 + fi 112 + 113 + printf -v "${pid_var}" '%s' "$!" 114 + } 115 + 116 + ovpn_run_stage() { 117 + local label="$1" 118 + 119 + shift 120 + OVPN_CURRENT_STAGE="${label}" 121 + "$@" 122 + OVPN_CURRENT_STAGE="" 123 + ktap_test_pass "${label}" 124 + } 125 + 126 + ovpn_stage_err() { 127 + # ERR trap is global under set -eE: only report failures that happen 128 + # while ovpn_run_stage() is actively executing a stage body. 129 + if [[ -n "${OVPN_CURRENT_STAGE}" ]]; then 130 + ktap_test_fail "${OVPN_CURRENT_STAGE}" 131 + OVPN_CURRENT_STAGE="" 132 + fi 133 + } 134 + 135 + ovpn_create_ns() { 136 + ip netns add "ovpn_peer${1}" 137 + } 138 + 139 + ovpn_setup_ns() { 140 + local peer="ovpn_peer${1}" 141 + local server_ns="ovpn_peer0" 142 + local peer_ns 30 143 MODE="P2P" 31 144 32 145 if [ ${1} -eq 0 ]; then 33 146 MODE="MP" 34 - for p in $(seq 1 ${NUM_PEERS}); do 35 - ip link add veth${p} netns peer0 type veth peer name veth${p} netns peer${p} 147 + for p in $(seq 1 ${OVPN_NUM_PEERS}); do 148 + peer_ns="ovpn_peer${p}" 149 + ip link add veth${p} netns "${server_ns}" type veth \ 150 + peer name veth${p} netns "${peer_ns}" 36 151 37 - ip -n peer0 addr add 10.10.${p}.1/24 dev veth${p} 38 - ip -n peer0 addr add fd00:0:0:${p}::1/64 dev veth${p} 39 - ip -n peer0 link set veth${p} up 152 + ip -n "${server_ns}" addr add 10.10.${p}.1/24 dev \ 153 + veth${p} 154 + ip -n "${server_ns}" addr add fd00:0:0:${p}::1/64 dev \ 155 + veth${p} 156 + ip -n "${server_ns}" link set veth${p} up 40 157 41 - ip -n peer${p} addr add 10.10.${p}.2/24 dev veth${p} 42 - ip -n peer${p} addr add fd00:0:0:${p}::2/64 dev veth${p} 43 - ip -n peer${p} link set veth${p} up 158 + ip -n "${peer_ns}" addr add 10.10.${p}.2/24 dev veth${p} 159 + ip -n "${peer_ns}" addr add fd00:0:0:${p}::2/64 dev \ 160 + veth${p} 161 + ip -n "${peer_ns}" link set veth${p} up 44 162 done 45 163 fi 46 164 47 - ip netns exec peer${1} ${OVPN_CLI} new_iface tun${1} $MODE 48 - ip -n peer${1} addr add ${2} dev tun${1} 165 + ip netns exec "${peer}" ${OVPN_CLI} new_iface tun${1} $MODE 166 + ip -n "${peer}" addr add ${2} dev tun${1} 49 167 # add a secondary IP to peer 1, to test a LAN behind a client 50 - if [ ${1} -eq 1 -a -n "${LAN_IP}" ]; then 51 - ip -n peer${1} addr add ${LAN_IP} dev tun${1} 52 - ip -n peer0 route add ${LAN_IP} via $(echo ${2} |sed -e s'!/.*!!') dev tun0 168 + if [ ${1} -eq 1 -a -n "${OVPN_LAN_IP}" ]; then 169 + ip -n "${peer}" addr add ${OVPN_LAN_IP} dev tun${1} 170 + ip -n "${server_ns}" route add ${OVPN_LAN_IP} via \ 171 + $(echo ${2} |sed -e s'!/.*!!') dev tun0 53 172 fi 54 173 if [ -n "${3}" ]; then 55 - ip -n peer${1} link set mtu ${3} dev tun${1} 174 + ip -n "${peer}" link set mtu ${3} dev tun${1} 56 175 fi 57 - ip -n peer${1} link set tun${1} up 176 + ip -n "${peer}" link set tun${1} up 58 177 } 59 178 60 - build_capture_filter() { 179 + ovpn_build_capture_filter() { 61 180 # match the first four bytes of the openvpn data payload 62 - if [ "${PROTO}" == "UDP" ]; then 181 + if [ "${OVPN_PROTO}" == "UDP" ]; then 63 182 # For UDP, libpcap transport indexing only works for IPv4, so 64 183 # use an explicit IPv4 or IPv6 expression based on the peer 65 184 # address. The IPv6 branch assumes there are no extension ··· 195 76 fi 196 77 } 197 78 198 - setup_listener() { 79 + ovpn_setup_listener() { 80 + local peer="$1" 81 + local file 82 + local peer_ns="ovpn_peer${peer}" 83 + 199 84 file=$(mktemp) 200 - PYTHONUNBUFFERED=1 ip netns exec peer${p} ${YNL_CLI} --family ovpn \ 201 - --subscribe peers --output-json --duration 40 > ${file} & 202 - listener_pids[$1]=$! 203 - tmp_jsons[$1]="${file}" 85 + PYTHONUNBUFFERED=1 ip netns exec "${peer_ns}" "${OVPN_YNL}" --family \ 86 + ovpn --subscribe peers --output-json > "${file}" \ 87 + 2>/dev/null & 88 + OVPN_LISTENER_PIDS["${peer}"]=$! 89 + OVPN_TMP_JSONS["${peer}"]="${file}" 204 90 } 205 91 206 - add_peer() { 92 + ovpn_add_peer() { 207 93 labels=("ASYMM" "SYMM") 208 - M_ID=${labels[SYMMETRIC_ID]} 94 + local peer_ns 95 + local server_ns="ovpn_peer0" 96 + M_ID=${labels[OVPN_SYMMETRIC_ID]} 209 97 210 - if [ "${PROTO}" == "UDP" ]; then 98 + if [ "${OVPN_PROTO}" == "UDP" ]; then 211 99 if [ ${1} -eq 0 ]; then 212 - ip netns exec peer0 ${OVPN_CLI} new_multi_peer tun0 1 \ 213 - ${M_ID} ${UDP_PEERS_FILE} 100 + ip netns exec "${server_ns}" ${OVPN_CLI} \ 101 + new_multi_peer tun0 1 ${M_ID} \ 102 + ${OVPN_UDP_PEERS_FILE} 214 103 215 - for p in $(seq 1 ${NUM_PEERS}); do 216 - ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 ${ALG} 0 \ 104 + for p in $(seq 1 ${OVPN_NUM_PEERS}); do 105 + ip netns exec "${server_ns}" ${OVPN_CLI} \ 106 + new_key tun0 ${p} 1 0 ${OVPN_ALG} 0 \ 217 107 data64.key 218 108 done 219 109 else 220 - if [ "${SYMMETRIC_ID}" -eq 1 ]; then 110 + peer_ns="ovpn_peer${1}" 111 + if [ "${OVPN_SYMMETRIC_ID}" -eq 1 ]; then 221 112 PEER_ID=${1} 222 113 TX_ID="none" 223 114 else 224 115 PEER_ID=$(awk "NR == ${1} {print \$2}" \ 225 - ${UDP_PEERS_FILE}) 116 + ${OVPN_UDP_PEERS_FILE}) 226 117 TX_ID=${1} 227 118 fi 228 - RADDR=$(awk "NR == ${1} {print \$3}" ${UDP_PEERS_FILE}) 229 - RPORT=$(awk "NR == ${1} {print \$4}" ${UDP_PEERS_FILE}) 230 - LPORT=$(awk "NR == ${1} {print \$6}" ${UDP_PEERS_FILE}) 231 - ip netns exec peer${1} ${OVPN_CLI} new_peer tun${1} \ 232 - ${PEER_ID} ${TX_ID} ${LPORT} ${RADDR} ${RPORT} 233 - ip netns exec peer${1} ${OVPN_CLI} new_key tun${1} \ 234 - ${PEER_ID} 1 0 ${ALG} 1 data64.key 119 + RADDR=$(awk "NR == ${1} {print \$3}" \ 120 + ${OVPN_UDP_PEERS_FILE}) 121 + RPORT=$(awk "NR == ${1} {print \$4}" \ 122 + ${OVPN_UDP_PEERS_FILE}) 123 + LPORT=$(awk "NR == ${1} {print \$6}" \ 124 + ${OVPN_UDP_PEERS_FILE}) 125 + ip netns exec "${peer_ns}" ${OVPN_CLI} new_peer \ 126 + tun${1} ${PEER_ID} ${TX_ID} ${LPORT} ${RADDR} \ 127 + ${RPORT} 128 + ip netns exec "${peer_ns}" ${OVPN_CLI} new_key tun${1} \ 129 + ${PEER_ID} 1 0 ${OVPN_ALG} 1 data64.key 235 130 fi 236 131 else 237 132 if [ ${1} -eq 0 ]; then 238 - (ip netns exec peer0 ${OVPN_CLI} listen tun0 1 ${M_ID} \ 239 - ${TCP_PEERS_FILE} && { 240 - for p in $(seq 1 ${NUM_PEERS}); do 241 - ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 \ 242 - ${ALG} 0 data64.key 133 + (ip netns exec "${server_ns}" ${OVPN_CLI} listen tun0 \ 134 + 1 ${M_ID} ${OVPN_TCP_PEERS_FILE} && { 135 + for p in $(seq 1 ${OVPN_NUM_PEERS}); do 136 + ip netns exec "${server_ns}" \ 137 + ${OVPN_CLI} new_key tun0 ${p} \ 138 + 1 0 ${OVPN_ALG} 0 data64.key 243 139 done 244 140 }) & 245 141 sleep 5 246 142 else 247 - if [ "${SYMMETRIC_ID}" -eq 1 ]; then 143 + peer_ns="ovpn_peer${1}" 144 + if [ "${OVPN_SYMMETRIC_ID}" -eq 1 ]; then 248 145 PEER_ID=${1} 249 146 TX_ID="none" 250 147 else 251 148 PEER_ID=$(awk "NR == ${1} {print \$2}" \ 252 - ${TCP_PEERS_FILE}) 149 + ${OVPN_TCP_PEERS_FILE}) 253 150 TX_ID=${1} 254 151 fi 255 - ip netns exec peer${1} ${OVPN_CLI} connect tun${1} \ 152 + ip netns exec "${peer_ns}" ${OVPN_CLI} connect tun${1} \ 256 153 ${PEER_ID} ${TX_ID} 10.10.${1}.1 1 data64.key 257 154 fi 258 155 fi 259 156 } 260 157 261 - compare_ntfs() { 262 - if [ ${#tmp_jsons[@]} -gt 0 ]; then 263 - suffix="" 264 - [ "${SYMMETRIC_ID}" -eq 1 ] && suffix="${suffix}-symm" 265 - [ "$FLOAT" == 1 ] && suffix="${suffix}-float" 266 - expected="json/peer${1}${suffix}.json" 267 - received="${tmp_jsons[$1]}" 158 + ovpn_compare_ntfs() { 159 + local diff_rc=0 160 + local diff_file 268 161 269 - kill -TERM ${listener_pids[$1]} || true 270 - wait ${listener_pids[$1]} || true 162 + if [ ${#OVPN_TMP_JSONS[@]} -gt 0 ]; then 163 + suffix="" 164 + [ "${OVPN_SYMMETRIC_ID}" -eq 1 ] && suffix="${suffix}-symm" 165 + [ "$OVPN_FLOAT" == 1 ] && suffix="${suffix}-float" 166 + expected="json/peer${1}${suffix}.json" 167 + received="${OVPN_TMP_JSONS[$1]}" 168 + diff_file=$(mktemp) 169 + 170 + ovpn_stop_listener "${1}" 1 271 171 printf "Checking notifications for peer ${1}... " 272 - if diff <(jq -s "${JQ_FILTER}" ${expected}) \ 273 - <(jq -s "${JQ_FILTER}" ${received}); then 172 + if diff <(jq -s "${OVPN_JQ_FILTER}" ${expected}) \ 173 + <(jq -s "${OVPN_JQ_FILTER}" ${received}) \ 174 + >"${diff_file}" 2>&1; then 274 175 echo "OK" 176 + else 177 + diff_rc=$? 178 + echo "failed" 179 + cat "${diff_file}" 275 180 fi 276 181 277 - rm -f ${received} || true 182 + rm -f "${diff_file}" || true 183 + rm -f "${received}" || true 184 + unset "OVPN_TMP_JSONS[$1]" 185 + fi 186 + 187 + return "${diff_rc}" 188 + } 189 + 190 + ovpn_stop_listener() { 191 + local peer="$1" 192 + local keep_json="${2:-0}" 193 + local pid="${OVPN_LISTENER_PIDS[$peer]:-}" 194 + local json="${OVPN_TMP_JSONS[$peer]:-}" 195 + 196 + if [[ -n "${pid}" ]]; then 197 + kill -TERM "${pid}" 2>/dev/null || true 198 + wait "${pid}" 2>/dev/null || true 199 + unset "OVPN_LISTENER_PIDS[$peer]" 200 + fi 201 + 202 + if [[ -n "${json}" && "${keep_json}" -eq 0 ]]; then 203 + rm -f "${json}" || true 204 + unset "OVPN_TMP_JSONS[$peer]" 278 205 fi 279 206 } 280 207 281 - cleanup() { 282 - # some ovpn-cli processes sleep in background so they need manual poking 283 - killall $(basename ${OVPN_CLI}) 2>/dev/null || true 208 + ovpn_cleanup_peer_ns() { 209 + local peer="$1" 210 + local peer_id="${peer#ovpn_peer}" 284 211 285 - # netns peer0 is deleted without erasing ifaces first 286 - for p in $(seq 1 10); do 287 - ip -n peer${p} link set tun${p} down 2>/dev/null || true 288 - ip netns exec peer${p} ${OVPN_CLI} del_iface tun${p} 2>/dev/null || true 289 - done 290 - for p in $(seq 1 10); do 291 - ip -n peer0 link del veth${p} 2>/dev/null || true 292 - done 293 - for p in $(seq 0 10); do 294 - ip netns del peer${p} 2>/dev/null || true 295 - done 212 + ip -n "${peer}" link set tun${peer_id} down 2>/dev/null || true 213 + ip netns exec "${peer}" ${OVPN_CLI} del_iface tun${peer_id} \ 214 + 1>/dev/null 2>&1 || true 215 + ip netns del "${peer}" 2>/dev/null || true 296 216 } 297 217 298 - if [ "${PROTO}" == "UDP" ]; then 299 - NUM_PEERS=${NUM_PEERS:-$(wc -l ${UDP_PEERS_FILE} | awk '{print $1}')} 218 + ovpn_cleanup() { 219 + local peer 220 + 221 + # some ovpn-cli processes sleep in background so they need manual poking 222 + killall "$(basename "${OVPN_CLI}")" 2>/dev/null || true 223 + 224 + for peer in "${!OVPN_LISTENER_PIDS[@]}"; do 225 + ovpn_stop_listener "${peer}" 2>/dev/null 226 + done 227 + 228 + for p in $(seq 1 10); do 229 + ip -n ovpn_peer0 link del veth${p} 2>/dev/null || true 230 + done 231 + 232 + # remove from ovpn's netns pool 233 + while IFS= read -r peer; do 234 + [[ -n "${peer}" ]] || continue 235 + ovpn_cleanup_peer_ns "${peer}" 2>/dev/null 236 + done < <(ip netns list 2>/dev/null | awk '/^ovpn_/ {print $1}') 237 + } 238 + 239 + if [ "${OVPN_PROTO}" == "UDP" ]; then 240 + OVPN_NUM_PEERS=${OVPN_NUM_PEERS:-$(wc -l ${OVPN_UDP_PEERS_FILE} | \ 241 + awk '{print $1}')} 300 242 else 301 - NUM_PEERS=${NUM_PEERS:-$(wc -l ${TCP_PEERS_FILE} | awk '{print $1}')} 243 + OVPN_NUM_PEERS=${OVPN_NUM_PEERS:-$(wc -l ${OVPN_TCP_PEERS_FILE} | \ 244 + awk '{print $1}')} 302 245 fi

+3

tools/testing/selftests/net/ovpn/config

··· 5 5 CONFIG_DST_CACHE=y 6 6 CONFIG_INET=y 7 7 CONFIG_NET=y 8 + CONFIG_NETFILTER=y 8 9 CONFIG_NET_UDP_TUNNEL=y 10 + CONFIG_NF_TABLES=m 11 + CONFIG_NF_TABLES_INET=y 9 12 CONFIG_OVPN=m 10 13 CONFIG_STREAM_PARSER=y

+1 -1

tools/testing/selftests/net/ovpn/test-chachapoly.sh

··· 4 4 # 5 5 # Author: Antonio Quartulli <antonio@openvpn.net> 6 6 7 - ALG="chachapoly" 7 + OVPN_ALG="chachapoly" 8 8 9 9 source test.sh

+1 -1

tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh

··· 4 4 # 5 5 # Author: Antonio Quartulli <antonio@openvpn.net> 6 6 7 - PROTO="TCP" 7 + OVPN_PROTO="TCP" 8 8 9 9 source test-close-socket.sh

+71 -31

tools/testing/selftests/net/ovpn/test-close-socket.sh

··· 5 5 # Author: Antonio Quartulli <antonio@openvpn.net> 6 6 7 7 #set -x 8 - set -e 8 + set -eE 9 9 10 10 source ./common.sh 11 11 12 - cleanup 12 + ovpn_test_finished=0 13 13 14 + ovpn_test_exit() { 15 + ovpn_cleanup 16 + modprobe -r ovpn || true 17 + 18 + if [ "${ovpn_test_finished}" -eq 0 ]; then 19 + ktap_print_totals 20 + fi 21 + } 22 + 23 + ovpn_prepare_network() { 24 + local p 25 + local peer_ns 26 + 27 + for p in $(seq 0 ${OVPN_NUM_PEERS}); do 28 + ovpn_cmd_ok "create namespace peer${p}" ovpn_create_ns "${p}" 29 + done 30 + 31 + for p in $(seq 0 ${OVPN_NUM_PEERS}); do 32 + ovpn_cmd_ok "configure peer${p} namespace" ovpn_setup_ns \ 33 + "${p}" 5.5.5.$((p + 1))/24 34 + done 35 + 36 + for p in $(seq 0 ${OVPN_NUM_PEERS}); do 37 + ovpn_cmd_ok "register peer${p} in overlay" ovpn_add_peer "${p}" 38 + done 39 + 40 + for p in $(seq 1 ${OVPN_NUM_PEERS}); do 41 + peer_ns="ovpn_peer${p}" 42 + ovpn_cmd_ok "set peer0 timeout for peer ${p}" \ 43 + ip netns exec ovpn_peer0 ${OVPN_CLI} set_peer tun0 \ 44 + ${p} 60 120 45 + ovpn_cmd_ok "set peer${p} timeout for peer ${p}" \ 46 + ip netns exec "${peer_ns}" ${OVPN_CLI} set_peer \ 47 + tun${p} $((p + OVPN_ID_OFFSET)) 60 120 48 + done 49 + } 50 + 51 + ovpn_run_ping_traffic() { 52 + local p 53 + 54 + for p in $(seq 1 ${OVPN_NUM_PEERS}); do 55 + ovpn_cmd_ok "send ping traffic to peer ${p}" \ 56 + ip netns exec ovpn_peer0 ping -qfc 500 -w 3 \ 57 + 5.5.5.$((p + 1)) 58 + done 59 + } 60 + 61 + ovpn_run_iperf() { 62 + local iperf_pid 63 + 64 + ovpn_run_bg iperf_pid ip netns exec ovpn_peer0 iperf3 -1 -s 65 + sleep 1 66 + ovpn_cmd_ok "run iperf throughput flow" \ 67 + ip netns exec ovpn_peer1 iperf3 -Z -t 3 -c 5.5.5.1 68 + wait "${iperf_pid}" || return 1 69 + } 70 + 71 + trap ovpn_test_exit EXIT 72 + trap ovpn_stage_err ERR 73 + 74 + ktap_print_header 75 + ktap_set_plan 3 76 + 77 + ovpn_cleanup 14 78 modprobe -q ovpn || true 15 79 16 - for p in $(seq 0 ${NUM_PEERS}); do 17 - create_ns ${p} 18 - done 80 + ovpn_run_stage "setup network topology" ovpn_prepare_network 81 + ovpn_run_stage "run ping traffic" ovpn_run_ping_traffic 82 + ovpn_run_stage "run iperf throughput" ovpn_run_iperf 19 83 20 - for p in $(seq 0 ${NUM_PEERS}); do 21 - setup_ns ${p} 5.5.5.$((${p} + 1))/24 22 - done 23 - 24 - for p in $(seq 0 ${NUM_PEERS}); do 25 - add_peer ${p} 26 - done 27 - 28 - for p in $(seq 1 ${NUM_PEERS}); do 29 - ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120 30 - ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} $((${p}+9)) 60 120 31 - done 32 - 33 - sleep 1 34 - 35 - for p in $(seq 1 ${NUM_PEERS}); do 36 - ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1)) 37 - done 38 - 39 - ip netns exec peer0 iperf3 -1 -s & 40 - sleep 1 41 - ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1 42 - 43 - cleanup 44 - 45 - modprobe -r ovpn || true 84 + ovpn_test_finished=1 85 + ktap_finished

+1 -1

tools/testing/selftests/net/ovpn/test-float.sh

··· 4 4 # 5 5 # Author: Antonio Quartulli <antonio@openvpn.net> 6 6 7 - FLOAT="1" 7 + OVPN_FLOAT="1" 8 8 9 9 source test.sh

+154 -79

tools/testing/selftests/net/ovpn/test-mark.sh

··· 6 6 # Antonio Quartulli <antonio@openvpn.net> 7 7 8 8 #set -x 9 - set -e 9 + set -eE 10 10 11 11 MARK=1056 12 + MARK_DROP_COUNTER=0 12 13 13 14 source ./common.sh 14 15 15 - cleanup 16 + ovpn_test_finished=0 16 17 18 + ovpn_test_exit() { 19 + ovpn_cleanup 20 + modprobe -r ovpn || true 21 + 22 + if [ "${ovpn_test_finished}" -eq 0 ]; then 23 + ktap_print_totals 24 + fi 25 + } 26 + 27 + ovpn_mark_prepare_network() { 28 + local p 29 + local peer_ns 30 + 31 + for p in $(seq 0 "${OVPN_NUM_PEERS}"); do 32 + ovpn_cmd_ok "create namespace peer${p}" ovpn_create_ns "${p}" 33 + done 34 + 35 + for p in $(seq 0 3); do 36 + ovpn_cmd_ok "configure peer${p} namespace" ovpn_setup_ns \ 37 + "${p}" 5.5.5.$((p + 1))/24 38 + done 39 + 40 + ovpn_cmd_ok "create server-side multi-peer with fwmark" \ 41 + ip netns exec ovpn_peer0 "${OVPN_CLI}" new_multi_peer tun0 1 \ 42 + ASYMM "${OVPN_UDP_PEERS_FILE}" "${MARK}" 43 + for p in $(seq 1 3); do 44 + ovpn_cmd_ok "install server key for peer ${p}" \ 45 + ip netns exec ovpn_peer0 "${OVPN_CLI}" new_key tun0 \ 46 + "${p}" 1 0 "${OVPN_ALG}" 0 data64.key 47 + done 48 + 49 + for p in $(seq 1 3); do 50 + ovpn_cmd_ok "register peer${p} in overlay" ovpn_add_peer "${p}" 51 + done 52 + 53 + for p in $(seq 1 3); do 54 + peer_ns="ovpn_peer${p}" 55 + ovpn_cmd_ok "set peer0 timeout for peer ${p}" \ 56 + ip netns exec ovpn_peer0 "${OVPN_CLI}" set_peer tun0 \ 57 + "${p}" 60 120 58 + ovpn_cmd_ok "set peer${p} timeout for peer ${p}" \ 59 + ip netns exec "${peer_ns}" "${OVPN_CLI}" set_peer \ 60 + tun"${p}" $((p + OVPN_ID_OFFSET)) 60 120 61 + done 62 + } 63 + 64 + ovpn_mark_run_baseline_traffic() { 65 + local p 66 + 67 + for p in $(seq 1 3); do 68 + ovpn_cmd_ok "send baseline traffic to peer ${p}" \ 69 + ip netns exec ovpn_peer0 ping -qfc 500 -w 3 \ 70 + 5.5.5.$((p + 1)) 71 + done 72 + } 73 + 74 + ovpn_mark_add_drop_rule() { 75 + ovpn_log "Adding an nftables drop rule based on mark value ${MARK}" 76 + 77 + ovpn_cmd_ok "flush nft ruleset" ip netns exec ovpn_peer0 nft flush \ 78 + ruleset 79 + ovpn_cmd_ok "create nft filter table" ip netns exec ovpn_peer0 nft \ 80 + "add table inet filter" 81 + ovpn_cmd_ok "create nft filter output chain" \ 82 + ip netns exec ovpn_peer0 nft "add chain inet filter output { \ 83 + type filter hook output priority 0; policy accept; }" 84 + ovpn_cmd_ok "add nft drop rule for mark ${MARK}" \ 85 + ip netns exec ovpn_peer0 nft add rule inet filter output \ 86 + meta mark == "${MARK}" \ 87 + counter drop 88 + 89 + MARK_DROP_COUNTER=$(ip netns exec ovpn_peer0 nft list chain inet \ 90 + filter output | sed -n 's/.*packets $[0-9]*$.*/\1/p') 91 + if [ -z "${MARK_DROP_COUNTER}" ]; then 92 + printf '%s\n' "unable to read nft drop counter" 93 + return 1 94 + fi 95 + } 96 + 97 + ovpn_mark_verify_drop_traffic() { 98 + local p 99 + local ping_output 100 + local lost_packets 101 + local total_count 102 + 103 + for p in $(seq 1 3); do 104 + if ping_output=$(ip netns exec ovpn_peer0 ping -qfc 500 -w 1 \ 105 + 5.5.5.$((p + 1)) 2>&1); then 106 + printf '%s\n' "expected ping to peer ${p} to fail \ 107 + after nft drop rule" 108 + return 1 109 + fi 110 + ovpn_log "${ping_output}" 111 + lost_packets=$(echo "${ping_output}" | \ 112 + awk '/packets transmitted/ { print $1 }') 113 + if [ -z "${lost_packets}" ]; then 114 + printf '%s\n' "unable to parse lost packets for peer \ 115 + ${p}" 116 + return 1 117 + fi 118 + MARK_DROP_COUNTER=$((MARK_DROP_COUNTER + lost_packets)) 119 + done 120 + 121 + total_count=$(ip netns exec ovpn_peer0 nft list chain inet filter \ 122 + output | sed -n 's/.*packets $[0-9]*$.*/\1/p') 123 + if [ -z "${total_count}" ]; then 124 + printf '%s\n' "unable to read final nft drop counter" 125 + return 1 126 + fi 127 + if [ "${MARK_DROP_COUNTER}" -ne "${total_count}" ]; then 128 + printf '%s\n' "expected ${MARK_DROP_COUNTER} drops, got \ 129 + ${total_count}" 130 + return 1 131 + fi 132 + } 133 + 134 + ovpn_mark_remove_drop_rule() { 135 + ovpn_log "Removing the drop rule" 136 + 137 + ovpn_cmd_ok "flush nft ruleset" ip netns exec ovpn_peer0 nft flush \ 138 + ruleset 139 + } 140 + 141 + ovpn_mark_verify_traffic_recovery() { 142 + local p 143 + 144 + sleep 1 145 + for p in $(seq 1 3); do 146 + ovpn_cmd_ok "send recovery traffic to peer ${p}" \ 147 + ip netns exec ovpn_peer0 ping -qfc 500 -w 3 \ 148 + 5.5.5.$((p + 1)) 149 + done 150 + } 151 + 152 + trap ovpn_test_exit EXIT 153 + trap ovpn_stage_err ERR 154 + 155 + ktap_print_header 156 + ktap_set_plan 6 157 + 158 + ovpn_cleanup 17 159 modprobe -q ovpn || true 18 160 19 - for p in $(seq 0 "${NUM_PEERS}"); do 20 - create_ns "${p}" 21 - done 161 + ovpn_run_stage "setup marked network topology" ovpn_mark_prepare_network 162 + ovpn_run_stage "run baseline traffic" ovpn_mark_run_baseline_traffic 163 + ovpn_run_stage "install nft mark drop rule" ovpn_mark_add_drop_rule 164 + ovpn_run_stage "drop marked traffic and count packets" \ 165 + ovpn_mark_verify_drop_traffic 166 + ovpn_run_stage "remove nft drop rule" ovpn_mark_remove_drop_rule 167 + ovpn_run_stage "traffic recovers after drop removal" \ 168 + ovpn_mark_verify_traffic_recovery 22 169 23 - for p in $(seq 0 3); do 24 - setup_ns "${p}" 5.5.5.$((p + 1))/24 25 - done 26 - 27 - # add peer0 with mark 28 - ip netns exec peer0 "${OVPN_CLI}" new_multi_peer tun0 1 ASYMM \ 29 - "${UDP_PEERS_FILE}" \ 30 - ${MARK} 31 - for p in $(seq 1 3); do 32 - ip netns exec peer0 "${OVPN_CLI}" new_key tun0 "${p}" 1 0 "${ALG}" 0 \ 33 - data64.key 34 - done 35 - 36 - for p in $(seq 1 3); do 37 - add_peer "${p}" 38 - done 39 - 40 - for p in $(seq 1 3); do 41 - ip netns exec peer0 "${OVPN_CLI}" set_peer tun0 "${p}" 60 120 42 - ip netns exec peer"${p}" "${OVPN_CLI}" set_peer tun"${p}" \ 43 - $((p + 9)) 60 120 44 - done 45 - 46 - sleep 1 47 - 48 - for p in $(seq 1 3); do 49 - ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((p + 1)) 50 - done 51 - 52 - echo "Adding an nftables drop rule based on mark value ${MARK}" 53 - ip netns exec peer0 nft flush ruleset 54 - ip netns exec peer0 nft 'add table inet filter' 55 - ip netns exec peer0 nft 'add chain inet filter output { 56 - type filter hook output priority 0; 57 - policy accept; 58 - }' 59 - ip netns exec peer0 nft add rule inet filter output \ 60 - meta mark == ${MARK} \ 61 - counter drop 62 - 63 - DROP_COUNTER=$(ip netns exec peer0 nft list chain inet filter output \ 64 - | sed -n 's/.*packets $[0-9]*$.*/\1/p') 65 - sleep 1 66 - 67 - # ping should fail 68 - for p in $(seq 1 3); do 69 - PING_OUTPUT=$(ip netns exec peer0 ping \ 70 - -qfc 500 -w 1 5.5.5.$((p + 1)) 2>&1) && exit 1 71 - echo "${PING_OUTPUT}" 72 - LOST_PACKETS=$(echo "$PING_OUTPUT" \ 73 - | awk '/packets transmitted/ { print $1 }') 74 - # increment the drop counter by the amount of lost packets 75 - DROP_COUNTER=$((DROP_COUNTER + LOST_PACKETS)) 76 - done 77 - 78 - # check if the final nft counter matches our counter 79 - TOTAL_COUNT=$(ip netns exec peer0 nft list chain inet filter output \ 80 - | sed -n 's/.*packets $[0-9]*$.*/\1/p') 81 - if [ "${DROP_COUNTER}" -ne "${TOTAL_COUNT}" ]; then 82 - echo "Expected ${TOTAL_COUNT} drops, got ${DROP_COUNTER}" 83 - exit 1 84 - fi 85 - 86 - echo "Removing the drop rule" 87 - ip netns exec peer0 nft flush ruleset 88 - sleep 1 89 - 90 - for p in $(seq 1 3); do 91 - ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((p + 1)) 92 - done 93 - 94 - cleanup 95 - 96 - modprobe -r ovpn || true 170 + ovpn_test_finished=1 171 + ktap_finished

+2 -2

tools/testing/selftests/net/ovpn/test-symmetric-id-float.sh

··· 5 5 # Author: Ralf Lici <ralf@mandelbit.com> 6 6 # Antonio Quartulli <antonio@openvpn.net> 7 7 8 - SYMMETRIC_ID="1" 9 - FLOAT="1" 8 + OVPN_SYMMETRIC_ID="1" 9 + OVPN_FLOAT="1" 10 10 11 11 source test.sh

+2 -2

tools/testing/selftests/net/ovpn/test-symmetric-id-tcp.sh

··· 5 5 # Author: Ralf Lici <ralf@mandelbit.com> 6 6 # Antonio Quartulli <antonio@openvpn.net> 7 7 8 - PROTO="TCP" 9 - SYMMETRIC_ID=1 8 + OVPN_PROTO="TCP" 9 + OVPN_SYMMETRIC_ID=1 10 10 11 11 source test.sh

+1 -1

tools/testing/selftests/net/ovpn/test-symmetric-id.sh

··· 5 5 # Author: Ralf Lici <ralf@mandelbit.com> 6 6 # Antonio Quartulli <antonio@openvpn.net> 7 7 8 - SYMMETRIC_ID="1" 8 + OVPN_SYMMETRIC_ID="1" 9 9 10 10 source test.sh

+1 -1

tools/testing/selftests/net/ovpn/test-tcp.sh

··· 4 4 # 5 5 # Author: Antonio Quartulli <antonio@openvpn.net> 6 6 7 - PROTO="TCP" 7 + OVPN_PROTO="TCP" 8 8 9 9 source test.sh

+291 -136

tools/testing/selftests/net/ovpn/test.sh

··· 5 5 # Author: Antonio Quartulli <antonio@openvpn.net> 6 6 7 7 #set -x 8 - set -e 8 + set -eE 9 9 10 10 source ./common.sh 11 11 12 - cleanup 12 + ovpn_test_finished=0 13 13 14 - modprobe -q ovpn || true 14 + ovpn_test_exit() { 15 + ovpn_cleanup 16 + modprobe -r ovpn || true 15 17 16 - for p in $(seq 0 ${NUM_PEERS}); do 17 - create_ns ${p} 18 - done 18 + if [ "${ovpn_test_finished}" -eq 0 ]; then 19 + ktap_print_totals 20 + fi 21 + } 19 22 20 - for p in $(seq 0 ${NUM_PEERS}); do 21 - setup_listener ${p} 22 - done 23 + ovpn_prepare_network() { 24 + local p 25 + local peer_ns 23 26 24 - for p in $(seq 0 ${NUM_PEERS}); do 25 - setup_ns ${p} 5.5.5.$((${p} + 1))/24 ${MTU} 26 - done 27 + for p in $(seq 0 ${OVPN_NUM_PEERS}); do 28 + ovpn_cmd_ok "create namespace peer${p}" ovpn_create_ns "${p}" 29 + done 27 30 28 - for p in $(seq 0 ${NUM_PEERS}); do 29 - add_peer ${p} 30 - done 31 + for p in $(seq 0 ${OVPN_NUM_PEERS}); do 32 + ovpn_cmd_ok "start notification listener peer${p}" \ 33 + ovpn_setup_listener "${p}" 34 + # starting all YNL listeners back-to-back can intermittently 35 + # stall their startup so serialize launches a bit 36 + sleep 0.5 37 + done 31 38 32 - for p in $(seq 1 ${NUM_PEERS}); do 33 - ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120 34 - ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} \ 35 - $((${p}+ID_OFFSET)) 60 120 36 - done 39 + for p in $(seq 0 ${OVPN_NUM_PEERS}); do 40 + ovpn_cmd_ok "configure peer${p} namespace" ovpn_setup_ns \ 41 + "${p}" 5.5.5.$((p + 1))/24 "${MTU}" 42 + done 37 43 38 - sleep 1 44 + for p in $(seq 0 ${OVPN_NUM_PEERS}); do 45 + ovpn_cmd_ok "register peer${p} in overlay" ovpn_add_peer "${p}" 46 + done 39 47 40 - TCPDUMP_TIMEOUT="1.5s" 41 - for p in $(seq 1 ${NUM_PEERS}); do 42 - # The first part of the data packet header consists of: 43 - # - TCP only: 2 bytes for the packet length 44 - # - 5 bits for opcode ("9" for DATA_V2) 45 - # - 3 bits for key-id ("0" at this point) 46 - # - 12 bytes for peer-id: 47 - # - with asymmetric ID: "${p}" one way and "${p} + 9" the other way 48 - # - with symmetric ID: "${p}" both ways 49 - HEADER1=$(printf "0x4800000%x" ${p}) 50 - HEADER2=$(printf "0x4800000%x" $((${p} + ID_OFFSET))) 51 - RADDR="" 52 - if [ "${PROTO}" == "UDP" ]; then 53 - RADDR=$(awk "NR == ${p} {print \$3}" ${UDP_PEERS_FILE}) 48 + for p in $(seq 1 ${OVPN_NUM_PEERS}); do 49 + peer_ns="ovpn_peer${p}" 50 + ovpn_cmd_ok "set peer0 timeout for peer ${p}" \ 51 + ip netns exec ovpn_peer0 ${OVPN_CLI} set_peer tun0 \ 52 + ${p} 60 120 53 + ovpn_cmd_ok "set peer${p} timeout for peer ${p}" \ 54 + ip netns exec "${peer_ns}" ${OVPN_CLI} set_peer \ 55 + tun${p} $((p + OVPN_ID_OFFSET)) 60 120 56 + done 57 + } 58 + 59 + ovpn_run_basic_traffic() { 60 + local p 61 + local header1 62 + local header2 63 + local peer_ns 64 + local raddr 65 + local tcpdump_pid1 66 + local tcpdump_pid2 67 + local tcpdump_timeout="1.5s" 68 + 69 + for p in $(seq 1 ${OVPN_NUM_PEERS}); do 70 + # The first part of the data packet header consists of: 71 + # - TCP only: 2 bytes for the packet length 72 + # - 5 bits for opcode ("9" for DATA_V2) 73 + # - 3 bits for key-id ("0" at this point) 74 + # - 12 bytes for peer-id: 75 + # - with asymmetric ID: "${p}" one way and "${p} + 9" the 76 + # other way 77 + # - with symmetric ID: "${p}" both ways 78 + header1=$(printf "0x4800000%x" ${p}) 79 + header2=$(printf "0x4800000%x" $((p + OVPN_ID_OFFSET))) 80 + raddr="" 81 + if [ "${OVPN_PROTO}" == "UDP" ]; then 82 + raddr=$(awk "NR == ${p} {print \$3}" \ 83 + "${OVPN_UDP_PEERS_FILE}") 84 + fi 85 + peer_ns="ovpn_peer${p}" 86 + 87 + timeout ${tcpdump_timeout} ip netns exec "${peer_ns}" \ 88 + tcpdump --immediate-mode -p -ni veth${p} -c 1 \ 89 + "$(ovpn_build_capture_filter "${header1}" "${raddr}")" \ 90 + >/dev/null 2>&1 & 91 + tcpdump_pid1=$! 92 + timeout ${tcpdump_timeout} ip netns exec "${peer_ns}" \ 93 + tcpdump --immediate-mode -p -ni veth${p} -c 1 \ 94 + "$(ovpn_build_capture_filter "${header2}" "${raddr}")" \ 95 + >/dev/null 2>&1 & 96 + tcpdump_pid2=$! 97 + 98 + sleep 0.3 99 + ovpn_cmd_ok "send baseline traffic to peer ${p}" \ 100 + ip netns exec ovpn_peer0 \ 101 + ping -qfc 500 -w 3 5.5.5.$((p + 1)) 102 + ovpn_cmd_ok "send large-payload traffic to peer ${p}" \ 103 + ip netns exec ovpn_peer0 \ 104 + ping -qfc 500 -s 3000 -w 3 5.5.5.$((p + 1)) 105 + 106 + wait "${tcpdump_pid1}" || return 1 107 + wait "${tcpdump_pid2}" || return 1 108 + done 109 + } 110 + 111 + ovpn_run_lan_traffic() { 112 + ovpn_cmd_ok "ping LAN behind peer1" \ 113 + ip netns exec ovpn_peer0 ping -qfc 500 -w 3 "${OVPN_LAN_IP}" 114 + } 115 + 116 + ovpn_run_float_mode() { 117 + local p 118 + local peer_ns 119 + 120 + for p in $(seq 1 ${OVPN_NUM_PEERS}); do 121 + peer_ns="ovpn_peer${p}" 122 + ovpn_cmd_ok "float: remove old transport address on peer${p}" \ 123 + ip -n "${peer_ns}" addr del 10.10.${p}.2/24 dev veth${p} 124 + ovpn_cmd_ok "float: add new transport address on peer${p}" \ 125 + ip -n "${peer_ns}" addr add 10.10.${p}.3/24 dev veth${p} 126 + done 127 + for p in $(seq 1 ${OVPN_NUM_PEERS}); do 128 + peer_ns="ovpn_peer${p}" 129 + ovpn_cmd_ok "ping tunnel after float peer ${p}" \ 130 + ip netns exec "${peer_ns}" ping -qfc 500 -w 3 5.5.5.1 131 + done 132 + } 133 + 134 + ovpn_run_iperf() { 135 + local iperf_pid 136 + 137 + ovpn_run_bg iperf_pid ip netns exec ovpn_peer0 iperf3 -1 -s 138 + sleep 1 139 + 140 + ovpn_cmd_ok "run iperf throughput flow" \ 141 + ip netns exec ovpn_peer1 iperf3 -Z -t 3 -c 5.5.5.1 142 + wait "${iperf_pid}" || return 1 143 + } 144 + 145 + ovpn_run_key_rollover() { 146 + local p 147 + local peer_ns 148 + 149 + ovpn_log "Adding secondary key and then swap:" 150 + 151 + for p in $(seq 1 ${OVPN_NUM_PEERS}); do 152 + peer_ns="ovpn_peer${p}" 153 + ovpn_cmd_ok "add secondary key on peer0 for peer ${p}" \ 154 + ip netns exec ovpn_peer0 ${OVPN_CLI} new_key tun0 \ 155 + ${p} 2 1 ${OVPN_ALG} 0 data64.key 156 + ovpn_cmd_ok "add secondary key on peer${p} for peer ${p}" \ 157 + ip netns exec "${peer_ns}" ${OVPN_CLI} new_key tun${p} \ 158 + $((p + OVPN_ID_OFFSET)) 2 1 ${OVPN_ALG} 1 \ 159 + data64.key 160 + ovpn_cmd_ok "swap keys on peer${p}" \ 161 + ip netns exec "${peer_ns}" ${OVPN_CLI} swap_keys \ 162 + tun${p} $((p + OVPN_ID_OFFSET)) 163 + done 164 + } 165 + 166 + ovpn_run_queries() { 167 + ovpn_log "Querying all peers:" 168 + 169 + ovpn_cmd_ok "query all peers from peer0" \ 170 + ip netns exec ovpn_peer0 ${OVPN_CLI} get_peer tun0 171 + ovpn_cmd_ok "query all peers from peer1" \ 172 + ip netns exec ovpn_peer1 ${OVPN_CLI} get_peer tun1 173 + 174 + ovpn_log "Querying peer 1:" 175 + 176 + ovpn_cmd_ok "query peer 1 from peer0" \ 177 + ip netns exec ovpn_peer0 ${OVPN_CLI} get_peer tun0 1 178 + } 179 + 180 + ovpn_query_peer_missing() { 181 + ovpn_log "Querying non-existent peer 20:" 182 + 183 + ovpn_cmd_fail "query missing peer 20 on peer0" \ 184 + ip netns exec ovpn_peer0 ${OVPN_CLI} get_peer tun0 20 185 + } 186 + 187 + ovpn_run_peer_cleanup() { 188 + local p 189 + local peer_ns 190 + 191 + ovpn_log "Deleting peer 1:" 192 + 193 + ovpn_cmd_ok "delete peer1 on peer0" \ 194 + ip netns exec ovpn_peer0 ${OVPN_CLI} del_peer tun0 1 195 + ovpn_cmd_ok "delete peer1 on peer1" \ 196 + ip netns exec ovpn_peer1 ${OVPN_CLI} del_peer tun1 \ 197 + $((1 + OVPN_ID_OFFSET)) 198 + 199 + ovpn_log "Querying keys:" 200 + 201 + for p in $(seq 2 ${OVPN_NUM_PEERS}); do 202 + peer_ns="ovpn_peer${p}" 203 + ovpn_cmd_ok "query peer${p} key 1" \ 204 + ip netns exec "${peer_ns}" ${OVPN_CLI} get_key tun${p} \ 205 + $((p + OVPN_ID_OFFSET)) 1 206 + ovpn_cmd_ok "query peer${p} key 2" \ 207 + ip netns exec "${peer_ns}" ${OVPN_CLI} get_key tun${p} \ 208 + $((p + OVPN_ID_OFFSET)) 2 209 + done 210 + } 211 + 212 + ovpn_run_traffic_delete_peer() { 213 + local ping_pid 214 + 215 + ovpn_log "Deleting peer while sending traffic:" 216 + 217 + ovpn_run_bg ping_pid ip netns exec ovpn_peer2 ping -qf -w 4 5.5.5.1 218 + sleep 2 219 + ovpn_cmd_ok "delete peer0 peer 2" \ 220 + ip netns exec ovpn_peer0 ${OVPN_CLI} del_peer tun0 2 221 + 222 + if [ "${OVPN_PROTO}" == "TCP" ]; then 223 + # In TCP mode this command is expected to fail for both peers. 224 + ovpn_cmd_mayfail "delete peer2 peer 2 (TCP non-fatal)" \ 225 + ip netns exec ovpn_peer2 ${OVPN_CLI} del_peer tun2 \ 226 + $((2 + OVPN_ID_OFFSET)) 227 + else 228 + ovpn_cmd_ok "delete peer2 peer 2" ip netns exec ovpn_peer2 \ 229 + ${OVPN_CLI} del_peer tun2 $((2 + OVPN_ID_OFFSET)) 54 230 fi 55 231 56 - timeout ${TCPDUMP_TIMEOUT} ip netns exec peer${p} \ 57 - tcpdump --immediate-mode -p -ni veth${p} -c 1 \ 58 - "$(build_capture_filter "${HEADER1}" "${RADDR}")" \ 59 - >/dev/null 2>&1 & 60 - TCPDUMP_PID1=$! 61 - timeout ${TCPDUMP_TIMEOUT} ip netns exec peer${p} \ 62 - tcpdump --immediate-mode -p -ni veth${p} -c 1 \ 63 - "$(build_capture_filter "${HEADER2}" "${RADDR}")" \ 64 - >/dev/null 2>&1 & 65 - TCPDUMP_PID2=$! 232 + wait "${ping_pid}" || true 233 + } 66 234 67 - sleep 0.3 68 - ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1)) 69 - ip netns exec peer0 ping -qfc 500 -s 3000 -w 3 5.5.5.$((${p} + 1)) 235 + ovpn_run_key_cleanup() { 236 + local p 237 + local peer_ns 70 238 71 - wait ${TCPDUMP_PID1} 72 - wait ${TCPDUMP_PID2} 73 - done 239 + ovpn_log "Deleting keys:" 74 240 75 - # ping LAN behind client 1 76 - ip netns exec peer0 ping -qfc 500 -w 3 ${LAN_IP} 77 - 78 - if [ "$FLOAT" == "1" ]; then 79 - # make clients float.. 80 - for p in $(seq 1 ${NUM_PEERS}); do 81 - ip -n peer${p} addr del 10.10.${p}.2/24 dev veth${p} 82 - ip -n peer${p} addr add 10.10.${p}.3/24 dev veth${p} 241 + for p in $(seq 3 ${OVPN_NUM_PEERS}); do 242 + peer_ns="ovpn_peer${p}" 243 + ovpn_cmd_ok "delete key 1 for peer${p}" \ 244 + ip netns exec "${peer_ns}" ${OVPN_CLI} del_key tun${p} \ 245 + $((p + OVPN_ID_OFFSET)) 1 246 + ovpn_cmd_ok "delete key 2 for peer${p}" \ 247 + ip netns exec "${peer_ns}" ${OVPN_CLI} del_key tun${p} \ 248 + $((p + OVPN_ID_OFFSET)) 2 83 249 done 84 - for p in $(seq 1 ${NUM_PEERS}); do 85 - ip netns exec peer${p} ping -qfc 500 -w 3 5.5.5.1 250 + } 251 + 252 + ovpn_run_timeouts() { 253 + local p 254 + local peer_ns 255 + 256 + ovpn_log "Setting timeout to 3s MP:" 257 + 258 + for p in $(seq 3 ${OVPN_NUM_PEERS}); do 259 + # Non-fatal: this may fail in some protocol modes. 260 + ovpn_cmd_mayfail "set peer0 timeout for peer ${p} (non-fatal)" \ 261 + ip netns exec ovpn_peer0 ${OVPN_CLI} set_peer tun0 \ 262 + ${p} 3 3 263 + peer_ns="ovpn_peer${p}" 264 + ovpn_cmd_ok "disable timeout on peer${p} while peer0 adjusts \ 265 + state" ip netns exec "${peer_ns}" ${OVPN_CLI} set_peer \ 266 + tun${p} $((p + OVPN_ID_OFFSET)) 0 0 86 267 done 268 + # wait for peers to timeout 269 + sleep 5 270 + 271 + ovpn_log "Setting timeout to 3s P2P:" 272 + 273 + for p in $(seq 3 ${OVPN_NUM_PEERS}); do 274 + peer_ns="ovpn_peer${p}" 275 + ovpn_cmd_ok "set peer${p} P2P timeout" \ 276 + ip netns exec "${peer_ns}" ${OVPN_CLI} set_peer \ 277 + tun${p} $((p + OVPN_ID_OFFSET)) 3 3 278 + done 279 + sleep 5 280 + } 281 + 282 + ovpn_run_notifications() { 283 + local p 284 + 285 + for p in $(seq 0 ${OVPN_NUM_PEERS}); do 286 + ovpn_cmd_ok "validate listener output for peer ${p}" \ 287 + ovpn_compare_ntfs "${p}" 288 + done 289 + } 290 + 291 + trap ovpn_test_exit EXIT 292 + trap ovpn_stage_err ERR 293 + 294 + ktap_print_header 295 + if [ "${OVPN_FLOAT}" == "1" ]; then 296 + ktap_set_plan 13 297 + else 298 + ktap_set_plan 12 87 299 fi 88 300 89 - ip netns exec peer0 iperf3 -1 -s & 90 - sleep 1 91 - ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1 301 + ovpn_cleanup 302 + modprobe -q ovpn || true 92 303 93 - echo "Adding secondary key and then swap:" 94 - for p in $(seq 1 ${NUM_PEERS}); do 95 - ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 2 1 ${ALG} 0 \ 96 - data64.key 97 - ip netns exec peer${p} ${OVPN_CLI} new_key tun${p} \ 98 - $((${p} + ID_OFFSET)) 2 1 ${ALG} 1 data64.key 99 - ip netns exec peer${p} ${OVPN_CLI} swap_keys tun${p} \ 100 - $((${p} + ID_OFFSET)) 101 - done 304 + ovpn_run_stage "setup network topology" ovpn_prepare_network 305 + ovpn_run_stage "run baseline data traffic" ovpn_run_basic_traffic 306 + ovpn_run_stage "run LAN traffic behind peer1" ovpn_run_lan_traffic 307 + [ "${OVPN_FLOAT}" == "1" ] && ovpn_run_stage "run floating peer checks" \ 308 + ovpn_run_float_mode 309 + ovpn_run_stage "run iperf throughput" ovpn_run_iperf 310 + ovpn_run_stage "run key rollout" ovpn_run_key_rollover 311 + ovpn_run_stage "query peers" ovpn_run_queries 312 + ovpn_run_stage "query missing peer fails" ovpn_query_peer_missing 313 + ovpn_run_stage "peer lifecycle and key queries" ovpn_run_peer_cleanup 314 + ovpn_run_stage "delete peer while traffic" ovpn_run_traffic_delete_peer 315 + ovpn_run_stage "delete stale keys" ovpn_run_key_cleanup 316 + ovpn_run_stage "check timeout behavior" ovpn_run_timeouts 317 + ovpn_run_stage "validate notification output" ovpn_run_notifications 102 318 103 - sleep 1 104 - 105 - echo "Querying all peers:" 106 - ip netns exec peer0 ${OVPN_CLI} get_peer tun0 107 - ip netns exec peer1 ${OVPN_CLI} get_peer tun1 108 - 109 - echo "Querying peer 1:" 110 - ip netns exec peer0 ${OVPN_CLI} get_peer tun0 1 111 - 112 - echo "Querying non-existent peer 20:" 113 - ip netns exec peer0 ${OVPN_CLI} get_peer tun0 20 || true 114 - 115 - echo "Deleting peer 1:" 116 - ip netns exec peer0 ${OVPN_CLI} del_peer tun0 1 117 - ip netns exec peer1 ${OVPN_CLI} del_peer tun1 $((1 + ID_OFFSET)) 118 - 119 - echo "Querying keys:" 120 - for p in $(seq 2 ${NUM_PEERS}); do 121 - ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} \ 122 - $((${p} + ID_OFFSET)) 1 123 - ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} \ 124 - $((${p} + ID_OFFSET)) 2 125 - done 126 - 127 - echo "Deleting peer while sending traffic:" 128 - (ip netns exec peer2 ping -qf -w 4 5.5.5.1)& 129 - sleep 2 130 - ip netns exec peer0 ${OVPN_CLI} del_peer tun0 2 131 - # following command fails in TCP mode 132 - # (both ends get conn reset when one peer disconnects) 133 - ip netns exec peer2 ${OVPN_CLI} del_peer tun2 $((2 + ID_OFFSET)) || true 134 - 135 - echo "Deleting keys:" 136 - for p in $(seq 3 ${NUM_PEERS}); do 137 - ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} \ 138 - $((${p} + ID_OFFSET)) 1 139 - ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} \ 140 - $((${p} + ID_OFFSET)) 2 141 - done 142 - 143 - echo "Setting timeout to 3s MP:" 144 - for p in $(seq 3 ${NUM_PEERS}); do 145 - ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 3 3 || true 146 - ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} \ 147 - $((${p} + ID_OFFSET)) 0 0 148 - done 149 - # wait for peers to timeout 150 - sleep 5 151 - 152 - echo "Setting timeout to 3s P2P:" 153 - for p in $(seq 3 ${NUM_PEERS}); do 154 - ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} \ 155 - $((${p} + ID_OFFSET)) 3 3 156 - done 157 - sleep 5 158 - 159 - for p in $(seq 0 ${NUM_PEERS}); do 160 - compare_ntfs ${p} 161 - done 162 - 163 - cleanup 164 - 165 - modprobe -r ovpn || true 319 + ovpn_test_finished=1 320 + ktap_finished

+48

tools/testing/selftests/net/packetdrill/tcp_rfc5961_ack-out-of-window.pkt

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // 3 + // RFC 5961 Section 5.2 / RFC 793 Section 3.9: an incoming segment's 4 + // ACK value must lie in [SND.UNA - MAX.SND.WND, SND.NXT]; otherwise 5 + // the receiver MUST discard the segment and send a challenge ACK 6 + // back. Exercise both edges of that window in a single connection. 7 + 8 + `./defaults.sh 9 + sysctl -q net.ipv4.tcp_invalid_ratelimit=0 10 + ` 11 + 12 + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 13 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 14 + +0 bind(3, ..., ...) = 0 15 + +0 listen(3, 1) = 0 16 + 17 + // Three-way handshake. Peer advertises rwnd = 1000 (no wscale), so 18 + // MAX.SND.WND is tracked as 1000. 19 + +0 < S 0:0(0) win 1000 <mss 1000,sackOK,nop,nop,nop,wscale 0> 20 + +0 > S. 0:0(0) ack 1 <...> 21 + +.1 < . 1:1(0) ack 1 win 1000 22 + +0 accept(3, ..., ...) = 4 23 + 24 + // ---- Upper edge: SEG.ACK > SND.NXT -------------------------------- 25 + // Server has sent nothing yet, so SND.UNA = SND.NXT = 1. 26 + // Peer sends a pure ACK with SEG.ACK = 2, beyond SND.NXT. 27 + +0 < . 1:1(0) ack 2 win 1000 28 + // Expect a challenge ACK: <SEQ = SND.NXT = 1, ACK = RCV.NXT = 1>. 29 + +0 > . 1:1(0) ack 1 30 + 31 + // Advance SND.UNA past MAX.SND.WND so that the lower edge becomes 32 + // reachable. Issue two 1-MSS writes so each skb is exactly one MSS 33 + // and PSH is set by tcp_push() at the end of each sendmsg, keeping 34 + // the setup independent of the TSO / tcp_fragment split path. 35 + +0 write(4, ..., 1000) = 1000 36 + +0 > P. 1:1001(1000) ack 1 37 + +.01 < . 1:1(0) ack 1001 win 1000 38 + +0 write(4, ..., 1000) = 1000 39 + +0 > P. 1001:2001(1000) ack 1 40 + +.01 < . 1:1(0) ack 2001 win 1000 41 + // Now SND.UNA = SND.NXT = 2001, MAX.SND.WND = 1000, bytes_acked = 2000. 42 + 43 + // ---- Lower edge: SEG.ACK < SND.UNA - MAX.SND.WND ------------------ 44 + // SND.UNA - MAX.SND.WND = 2001 - 1000 = 1001, so SEG.ACK = 1000 falls 45 + // below the acceptable range. 46 + +0 < . 1:1(0) ack 1000 win 1000 47 + // Expect a challenge ACK: <SEQ = SND.NXT = 2001, ACK = RCV.NXT = 1>. 48 + +0 > . 2001:2001(0) ack 1

+3 -1

tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt

··· 19 19 20 20 // bad packet with high tsval (its ACK sequence is above our sndnxt) 21 21 +0 < F. 1:1(0) ack 9999 win 20000 <nop,nop,TS val 200000 ecr 100> 22 - 22 + // Challenge ACK for SEG.ACK > SND.NXT (RFC 5961 5.2 / RFC 793 3.9). 23 + // ecr=200 (not 200000) proves ts_recent was not updated from the bad packet. 24 + +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 200> 23 25 24 26 +0 < . 1:1001(1000) ack 1 win 20000 <nop,nop,TS val 201 ecr 100> 25 27 +0 > . 1:1(0) ack 1001 <nop,nop,TS val 200 ecr 201>

+44

tools/testing/selftests/net/rtnetlink.sh

··· 23 23 kci_test_encap 24 24 kci_test_macsec 25 25 kci_test_macsec_vlan 26 + kci_test_team_bridge_macvlan 26 27 kci_test_ipsec 27 28 kci_test_ipsec_offload 28 29 kci_test_fdb_get ··· 635 634 fi 636 635 637 636 end_test "PASS: macsec_vlan" 637 + } 638 + 639 + # Test ndo_change_rx_flags call from dev_uc_add under addr_list_lock spinlock. 640 + # When we are flipping the promisc, make sure it runs on the work queue. 641 + # 642 + # https://lore.kernel.org/netdev/20260214033859.43857-1-jiayuan.chen@linux.dev/ 643 + # With (more conventional) macvlan instead of macsec. 644 + # macvlan -> bridge -> team -> dummy 645 + kci_test_team_bridge_macvlan() 646 + { 647 + local vlan="test_macv1" 648 + local bridge="test_br1" 649 + local team="test_team1" 650 + local dummy="test_dummy1" 651 + local ret=0 652 + 653 + run_cmd ip link add $team type team 654 + if [ $ret -ne 0 ]; then 655 + end_test "SKIP: team_bridge_macvlan: can't add team interface" 656 + return $ksft_skip 657 + fi 658 + 659 + run_cmd ip link add $dummy type dummy 660 + run_cmd ip link set $dummy master $team 661 + run_cmd ip link set $team up 662 + run_cmd ip link add $bridge type bridge vlan_filtering 1 663 + run_cmd ip link set $bridge up 664 + run_cmd ip link set $team master $bridge 665 + run_cmd ip link add link $bridge name $vlan \ 666 + address 00:aa:bb:cc:dd:ee type macvlan mode bridge 667 + run_cmd ip link set $vlan up 668 + 669 + run_cmd ip link del $vlan 670 + run_cmd ip link del $bridge 671 + run_cmd ip link del $team 672 + run_cmd ip link del $dummy 673 + 674 + if [ $ret -ne 0 ]; then 675 + end_test "FAIL: team_bridge_macvlan" 676 + return 1 677 + fi 678 + 679 + end_test "PASS: team_bridge_macvlan" 638 680 } 639 681 640 682 #-------------------------------------------------------------------

+1

tools/testing/selftests/net/tcp_ao/config

··· 1 + CONFIG_CRYPTO_CMAC=y 1 2 CONFIG_CRYPTO_HMAC=y 2 3 CONFIG_CRYPTO_RMD160=y 3 4 CONFIG_CRYPTO_SHA1=y

+15

tools/testing/vsock/util.c

··· 381 381 } 382 382 } 383 383 384 + #define RECV_PEEK_RETRY_USEC (10 * 1000) 385 + 384 386 /* Receive bytes in a buffer and check the return value. 387 + * 388 + * When MSG_PEEK is set, recv() is retried until it returns at least 389 + * expected_ret bytes. The function returns on error, EOF, or timeout 390 + * as usual. 385 391 * 386 392 * expected_ret: 387 393 * <0 Negative errno (for testing errors) ··· 408 402 continue; 409 403 if (ret <= 0) 410 404 break; 405 + 406 + if (flags & MSG_PEEK) { 407 + if (ret >= expected_ret) { 408 + nread = ret; 409 + break; 410 + } 411 + timeout_usleep(RECV_PEEK_RETRY_USEC); 412 + continue; 413 + } 411 414 412 415 nread += ret; 413 416 } while (nread < len);

+38 -12

tools/testing/vsock/vsock_test.c

··· 346 346 return test_msg_peek_server(opts, false); 347 347 } 348 348 349 + static void test_stream_peek_after_recv_server(const struct test_opts *opts) 350 + { 351 + unsigned char buf_normal[MSG_PEEK_BUF_LEN]; 352 + unsigned char buf_peek[MSG_PEEK_BUF_LEN]; 353 + int fd; 354 + 355 + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); 356 + if (fd < 0) { 357 + perror("accept"); 358 + exit(EXIT_FAILURE); 359 + } 360 + 361 + control_writeln("SRVREADY"); 362 + 363 + /* Partial recv to advance offset within the skb */ 364 + recv_buf(fd, buf_normal, 1, 0, 1); 365 + 366 + /* Peek with a buffer larger than the remaining data */ 367 + recv_buf(fd, buf_peek, sizeof(buf_peek), MSG_PEEK, sizeof(buf_peek) - 1); 368 + 369 + /* Consume the remaining data */ 370 + recv_buf(fd, buf_normal, sizeof(buf_normal) - 1, 0, sizeof(buf_normal) - 1); 371 + 372 + /* Compare full peek and normal read. */ 373 + if (memcmp(buf_peek, buf_normal, sizeof(buf_peek) - 1)) { 374 + fprintf(stderr, "Full peek data mismatch\n"); 375 + exit(EXIT_FAILURE); 376 + } 377 + 378 + close(fd); 379 + } 380 + 349 381 #define SOCK_BUF_SIZE (2 * 1024 * 1024) 350 382 #define SOCK_BUF_SIZE_SMALL (64 * 1024) 351 383 #define MAX_MSG_PAGES 4 ··· 1532 1500 } 1533 1501 1534 1502 /* Wait until there will be 128KB of data in rx queue. */ 1535 - while (1) { 1536 - ssize_t res; 1537 - 1538 - res = recv(fd, buf, buf_size, MSG_PEEK); 1539 - if (res == buf_size) 1540 - break; 1541 - 1542 - if (res <= 0) { 1543 - fprintf(stderr, "unexpected 'recv()' return: %zi\n", res); 1544 - exit(EXIT_FAILURE); 1545 - } 1546 - } 1503 + recv_buf(fd, buf, buf_size, MSG_PEEK, buf_size); 1547 1504 1548 1505 /* There is 128KB of data in the socket's rx queue, dequeue first 1549 1506 * 64KB, credit update is sent if 'low_rx_bytes_test' == true. ··· 2540 2519 .name = "SOCK_STREAM TX credit bounds", 2541 2520 .run_client = test_stream_tx_credit_bounds_client, 2542 2521 .run_server = test_stream_tx_credit_bounds_server, 2522 + }, 2523 + { 2524 + .name = "SOCK_STREAM MSG_PEEK after partial recv", 2525 + .run_client = test_stream_msg_peek_client, 2526 + .run_server = test_stream_peek_after_recv_server, 2543 2527 }, 2544 2528 {}, 2545 2529 };

Configure Feed

Configure Feed