Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller:

1) Various ipvlan fixes from Eric Dumazet and Mahesh Bandewar.

The most important is to not assume the packet is RX just because
the destination address matches that of the device. Such an
assumption causes problems when an interface is put into loopback
mode.

2) If we retry when creating a new tc entry (because we dropped the
RTNL mutex in order to load a module, for example) we end up with
-EAGAIN and then loop trying to replay the request. But we didn't
reset some state when looping back to the top like this, and if
another thread meanwhile inserted the same tc entry we were trying
to, we re-link it creating an enless loop in the tc chain. Fix from
Daniel Borkmann.

3) There are two different WRITE bits in the MDIO address register for
the stmmac chip, depending upon the chip variant. Due to a bug we
could set them both, fix from Hock Leong Kweh.

4) Fix mlx4 bug in XDP_TX handling, from Tariq Toukan.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net:
net: stmmac: fix incorrect bit set in gmac4 mdio addr register
r8169: add support for RTL8168 series add-on card.
net: xdp: remove unused bfp_warn_invalid_xdp_buffer()
openvswitch: upcall: Fix vlan handling.
ipv4: Namespaceify tcp_tw_reuse knob
net: korina: Fix NAPI versus resources freeing
net, sched: fix soft lockup in tc_classify
net/mlx4_en: Fix user prio field in XDP forward
tipc: don't send FIN message from connectionless socket
ipvlan: fix multicast processing
ipvlan: fix various issues in ipvlan_process_multicast()

+112 -86
+4 -4
drivers/net/ethernet/korina.c
··· 900 900 DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR, 901 901 &lp->rx_dma_regs->dmasm); 902 902 903 - korina_free_ring(dev); 904 - 905 903 napi_disable(&lp->napi); 904 + 905 + korina_free_ring(dev); 906 906 907 907 if (korina_init(dev) < 0) { 908 908 printk(KERN_ERR "%s: cannot restart device\n", dev->name); ··· 1064 1064 tmp = tmp | DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR; 1065 1065 writel(tmp, &lp->rx_dma_regs->dmasm); 1066 1066 1067 - korina_free_ring(dev); 1068 - 1069 1067 napi_disable(&lp->napi); 1070 1068 1071 1069 cancel_work_sync(&lp->restart_task); 1070 + 1071 + korina_free_ring(dev); 1072 1072 1073 1073 free_irq(lp->rx_irq, dev); 1074 1074 free_irq(lp->tx_irq, dev);
+2 -1
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
··· 1638 1638 1639 1639 /* Configure tx cq's and rings */ 1640 1640 for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) { 1641 - u8 num_tx_rings_p_up = t == TX ? priv->num_tx_rings_p_up : 1; 1641 + u8 num_tx_rings_p_up = t == TX ? 1642 + priv->num_tx_rings_p_up : priv->tx_ring_num[t]; 1642 1643 1643 1644 for (i = 0; i < priv->tx_ring_num[t]; i++) { 1644 1645 /* Configure cq */
+1
drivers/net/ethernet/realtek/r8169.c
··· 326 326 static const struct pci_device_id rtl8169_pci_tbl[] = { 327 327 { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8129), 0, 0, RTL_CFG_0 }, 328 328 { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8136), 0, 0, RTL_CFG_2 }, 329 + { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8161), 0, 0, RTL_CFG_1 }, 329 330 { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8167), 0, 0, RTL_CFG_0 }, 330 331 { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8168), 0, 0, RTL_CFG_1 }, 331 332 { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8169), 0, 0, RTL_CFG_0 },
+3 -1
drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
··· 116 116 unsigned int mii_address = priv->hw->mii.addr; 117 117 unsigned int mii_data = priv->hw->mii.data; 118 118 119 - u32 value = MII_WRITE | MII_BUSY; 119 + u32 value = MII_BUSY; 120 120 121 121 value |= (phyaddr << priv->hw->mii.addr_shift) 122 122 & priv->hw->mii.addr_mask; ··· 126 126 & priv->hw->mii.clk_csr_mask; 127 127 if (priv->plat->has_gmac4) 128 128 value |= MII_GMAC4_WRITE; 129 + else 130 + value |= MII_WRITE; 129 131 130 132 /* Wait until any existing MII operation is complete */ 131 133 if (stmmac_mdio_busy_wait(priv->ioaddr, mii_address))
+5
drivers/net/ipvlan/ipvlan.h
··· 99 99 int count; 100 100 }; 101 101 102 + struct ipvl_skb_cb { 103 + bool tx_pkt; 104 + }; 105 + #define IPVL_SKB_CB(_skb) ((struct ipvl_skb_cb *)&((_skb)->cb[0])) 106 + 102 107 static inline struct ipvl_port *ipvlan_port_get_rcu(const struct net_device *d) 103 108 { 104 109 return rcu_dereference(d->rx_handler_data);
+38 -22
drivers/net/ipvlan/ipvlan_core.c
··· 198 198 unsigned int mac_hash; 199 199 int ret; 200 200 u8 pkt_type; 201 - bool hlocal, dlocal; 201 + bool tx_pkt; 202 202 203 203 __skb_queue_head_init(&list); 204 204 ··· 207 207 spin_unlock_bh(&port->backlog.lock); 208 208 209 209 while ((skb = __skb_dequeue(&list)) != NULL) { 210 + struct net_device *dev = skb->dev; 211 + bool consumed = false; 212 + 210 213 ethh = eth_hdr(skb); 211 - hlocal = ether_addr_equal(ethh->h_source, port->dev->dev_addr); 214 + tx_pkt = IPVL_SKB_CB(skb)->tx_pkt; 212 215 mac_hash = ipvlan_mac_hash(ethh->h_dest); 213 216 214 217 if (ether_addr_equal(ethh->h_dest, port->dev->broadcast)) ··· 219 216 else 220 217 pkt_type = PACKET_MULTICAST; 221 218 222 - dlocal = false; 223 219 rcu_read_lock(); 224 220 list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { 225 - if (hlocal && (ipvlan->dev == skb->dev)) { 226 - dlocal = true; 221 + if (tx_pkt && (ipvlan->dev == skb->dev)) 227 222 continue; 228 - } 229 223 if (!test_bit(mac_hash, ipvlan->mac_filters)) 230 224 continue; 231 - 225 + if (!(ipvlan->dev->flags & IFF_UP)) 226 + continue; 232 227 ret = NET_RX_DROP; 233 228 len = skb->len + ETH_HLEN; 234 229 nskb = skb_clone(skb, GFP_ATOMIC); 235 - if (!nskb) 236 - goto acct; 237 - 238 - nskb->pkt_type = pkt_type; 239 - nskb->dev = ipvlan->dev; 240 - if (hlocal) 241 - ret = dev_forward_skb(ipvlan->dev, nskb); 242 - else 243 - ret = netif_rx(nskb); 244 - acct: 230 + local_bh_disable(); 231 + if (nskb) { 232 + consumed = true; 233 + nskb->pkt_type = pkt_type; 234 + nskb->dev = ipvlan->dev; 235 + if (tx_pkt) 236 + ret = dev_forward_skb(ipvlan->dev, nskb); 237 + else 238 + ret = netif_rx(nskb); 239 + } 245 240 ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); 241 + local_bh_enable(); 246 242 } 247 243 rcu_read_unlock(); 248 244 249 - if (dlocal) { 245 + if (tx_pkt) { 250 246 /* If the packet originated here, send it out. */ 251 247 skb->dev = port->dev; 252 248 skb->pkt_type = pkt_type; 253 249 dev_queue_xmit(skb); 254 250 } else { 255 - kfree_skb(skb); 251 + if (consumed) 252 + consume_skb(skb); 253 + else 254 + kfree_skb(skb); 256 255 } 256 + if (dev) 257 + dev_put(dev); 257 258 } 258 259 } 259 260 ··· 477 470 } 478 471 479 472 static void ipvlan_multicast_enqueue(struct ipvl_port *port, 480 - struct sk_buff *skb) 473 + struct sk_buff *skb, bool tx_pkt) 481 474 { 482 475 if (skb->protocol == htons(ETH_P_PAUSE)) { 483 476 kfree_skb(skb); 484 477 return; 485 478 } 486 479 480 + /* Record that the deferred packet is from TX or RX path. By 481 + * looking at mac-addresses on packet will lead to erronus decisions. 482 + * (This would be true for a loopback-mode on master device or a 483 + * hair-pin mode of the switch.) 484 + */ 485 + IPVL_SKB_CB(skb)->tx_pkt = tx_pkt; 486 + 487 487 spin_lock(&port->backlog.lock); 488 488 if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) { 489 + if (skb->dev) 490 + dev_hold(skb->dev); 489 491 __skb_queue_tail(&port->backlog, skb); 490 492 spin_unlock(&port->backlog.lock); 491 493 schedule_work(&port->wq); ··· 553 537 554 538 } else if (is_multicast_ether_addr(eth->h_dest)) { 555 539 ipvlan_skb_crossing_ns(skb, NULL); 556 - ipvlan_multicast_enqueue(ipvlan->port, skb); 540 + ipvlan_multicast_enqueue(ipvlan->port, skb, true); 557 541 return NET_XMIT_SUCCESS; 558 542 } 559 543 ··· 650 634 */ 651 635 if (nskb) { 652 636 ipvlan_skb_crossing_ns(nskb, NULL); 653 - ipvlan_multicast_enqueue(port, nskb); 637 + ipvlan_multicast_enqueue(port, nskb, false); 654 638 } 655 639 } 656 640 } else {
+6 -1
drivers/net/ipvlan/ipvlan_main.c
··· 135 135 static void ipvlan_port_destroy(struct net_device *dev) 136 136 { 137 137 struct ipvl_port *port = ipvlan_port_get_rtnl(dev); 138 + struct sk_buff *skb; 138 139 139 140 dev->priv_flags &= ~IFF_IPVLAN_MASTER; 140 141 if (port->mode == IPVLAN_MODE_L3S) { ··· 145 144 } 146 145 netdev_rx_handler_unregister(dev); 147 146 cancel_work_sync(&port->wq); 148 - __skb_queue_purge(&port->backlog); 147 + while ((skb = __skb_dequeue(&port->backlog)) != NULL) { 148 + if (skb->dev) 149 + dev_put(skb->dev); 150 + kfree_skb(skb); 151 + } 149 152 kfree(port); 150 153 } 151 154
-1
include/linux/filter.h
··· 610 610 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, 611 611 const struct bpf_insn *patch, u32 len); 612 612 void bpf_warn_invalid_xdp_action(u32 act); 613 - void bpf_warn_invalid_xdp_buffer(void); 614 613 615 614 #ifdef CONFIG_BPF_JIT 616 615 extern int bpf_jit_enable;
+1
include/net/netns/ipv4.h
··· 110 110 int sysctl_tcp_orphan_retries; 111 111 int sysctl_tcp_fin_timeout; 112 112 unsigned int sysctl_tcp_notsent_lowat; 113 + int sysctl_tcp_tw_reuse; 113 114 114 115 int sysctl_igmp_max_memberships; 115 116 int sysctl_igmp_max_msf;
-1
include/net/tcp.h
··· 252 252 extern int sysctl_tcp_rmem[3]; 253 253 extern int sysctl_tcp_app_win; 254 254 extern int sysctl_tcp_adv_win_scale; 255 - extern int sysctl_tcp_tw_reuse; 256 255 extern int sysctl_tcp_frto; 257 256 extern int sysctl_tcp_low_latency; 258 257 extern int sysctl_tcp_nometrics_save;
-6
net/core/filter.c
··· 2972 2972 } 2973 2973 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); 2974 2974 2975 - void bpf_warn_invalid_xdp_buffer(void) 2976 - { 2977 - WARN_ONCE(1, "Illegal XDP buffer encountered, expect throughput degradation\n"); 2978 - } 2979 - EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_buffer); 2980 - 2981 2975 static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg, 2982 2976 int src_reg, int ctx_off, 2983 2977 struct bpf_insn *insn_buf,
+7 -7
net/ipv4/sysctl_net_ipv4.c
··· 433 433 .extra2 = &tcp_adv_win_scale_max, 434 434 }, 435 435 { 436 - .procname = "tcp_tw_reuse", 437 - .data = &sysctl_tcp_tw_reuse, 438 - .maxlen = sizeof(int), 439 - .mode = 0644, 440 - .proc_handler = proc_dointvec 441 - }, 442 - { 443 436 .procname = "tcp_frto", 444 437 .data = &sysctl_tcp_frto, 445 438 .maxlen = sizeof(int), ··· 952 959 .maxlen = sizeof(unsigned int), 953 960 .mode = 0644, 954 961 .proc_handler = proc_dointvec, 962 + }, 963 + { 964 + .procname = "tcp_tw_reuse", 965 + .data = &init_net.ipv4.sysctl_tcp_tw_reuse, 966 + .maxlen = sizeof(int), 967 + .mode = 0644, 968 + .proc_handler = proc_dointvec 955 969 }, 956 970 #ifdef CONFIG_IP_ROUTE_MULTIPATH 957 971 {
+2 -2
net/ipv4/tcp_ipv4.c
··· 84 84 #include <crypto/hash.h> 85 85 #include <linux/scatterlist.h> 86 86 87 - int sysctl_tcp_tw_reuse __read_mostly; 88 87 int sysctl_tcp_low_latency __read_mostly; 89 88 90 89 #ifdef CONFIG_TCP_MD5SIG ··· 119 120 and use initial timestamp retrieved from peer table. 120 121 */ 121 122 if (tcptw->tw_ts_recent_stamp && 122 - (!twp || (sysctl_tcp_tw_reuse && 123 + (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse && 123 124 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { 124 125 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; 125 126 if (tp->write_seq == 0) ··· 2455 2456 net->ipv4.sysctl_tcp_orphan_retries = 0; 2456 2457 net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; 2457 2458 net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; 2459 + net->ipv4.sysctl_tcp_tw_reuse = 0; 2458 2460 2459 2461 return 0; 2460 2462 fail:
-1
net/openvswitch/datapath.c
··· 606 606 rcu_assign_pointer(flow->sf_acts, acts); 607 607 packet->priority = flow->key.phy.priority; 608 608 packet->mark = flow->key.phy.skb_mark; 609 - packet->protocol = flow->key.eth.type; 610 609 611 610 rcu_read_lock(); 612 611 dp = get_dp_rcu(net, ovs_header->dp_ifindex);
+27 -27
net/openvswitch/flow.c
··· 312 312 * Returns 0 if it encounters a non-vlan or incomplete packet. 313 313 * Returns 1 after successfully parsing vlan tag. 314 314 */ 315 - static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh) 315 + static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh, 316 + bool untag_vlan) 316 317 { 317 318 struct vlan_head *vh = (struct vlan_head *)skb->data; 318 319 ··· 331 330 key_vh->tci = vh->tci | htons(VLAN_TAG_PRESENT); 332 331 key_vh->tpid = vh->tpid; 333 332 334 - __skb_pull(skb, sizeof(struct vlan_head)); 333 + if (unlikely(untag_vlan)) { 334 + int offset = skb->data - skb_mac_header(skb); 335 + u16 tci; 336 + int err; 337 + 338 + __skb_push(skb, offset); 339 + err = __skb_vlan_pop(skb, &tci); 340 + __skb_pull(skb, offset); 341 + if (err) 342 + return err; 343 + __vlan_hwaccel_put_tag(skb, key_vh->tpid, tci); 344 + } else { 345 + __skb_pull(skb, sizeof(struct vlan_head)); 346 + } 335 347 return 1; 336 348 } 337 349 ··· 365 351 key->eth.vlan.tpid = skb->vlan_proto; 366 352 } else { 367 353 /* Parse outer vlan tag in the non-accelerated case. */ 368 - res = parse_vlan_tag(skb, &key->eth.vlan); 354 + res = parse_vlan_tag(skb, &key->eth.vlan, true); 369 355 if (res <= 0) 370 356 return res; 371 357 } 372 358 373 359 /* Parse inner vlan tag. */ 374 - res = parse_vlan_tag(skb, &key->eth.cvlan); 360 + res = parse_vlan_tag(skb, &key->eth.cvlan, false); 375 361 if (res <= 0) 376 362 return res; 377 363 ··· 814 800 if (err) 815 801 return err; 816 802 817 - if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) { 818 - /* key_extract assumes that skb->protocol is set-up for 819 - * layer 3 packets which is the case for other callers, 820 - * in particular packets recieved from the network stack. 821 - * Here the correct value can be set from the metadata 822 - * extracted above. 823 - */ 824 - skb->protocol = key->eth.type; 825 - } else { 826 - struct ethhdr *eth; 803 + /* key_extract assumes that skb->protocol is set-up for 804 + * layer 3 packets which is the case for other callers, 805 + * in particular packets received from the network stack. 806 + * Here the correct value can be set from the metadata 807 + * extracted above. 808 + * For L2 packet key eth type would be zero. skb protocol 809 + * would be set to correct value later during key-extact. 810 + */ 827 811 828 - skb_reset_mac_header(skb); 829 - eth = eth_hdr(skb); 830 - 831 - /* Normally, setting the skb 'protocol' field would be 832 - * handled by a call to eth_type_trans(), but it assumes 833 - * there's a sending device, which we may not have. 834 - */ 835 - if (eth_proto_is_802_3(eth->h_proto)) 836 - skb->protocol = eth->h_proto; 837 - else 838 - skb->protocol = htons(ETH_P_802_2); 839 - } 840 - 812 + skb->protocol = key->eth.type; 841 813 return key_extract(skb, key); 842 814 }
+3 -1
net/sched/cls_api.c
··· 148 148 unsigned long cl; 149 149 unsigned long fh; 150 150 int err; 151 - int tp_created = 0; 151 + int tp_created; 152 152 153 153 if ((n->nlmsg_type != RTM_GETTFILTER) && 154 154 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 155 155 return -EPERM; 156 156 157 157 replay: 158 + tp_created = 0; 159 + 158 160 err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL); 159 161 if (err < 0) 160 162 return err;
+13 -11
net/tipc/socket.c
··· 441 441 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { 442 442 if (TIPC_SKB_CB(skb)->bytes_read) { 443 443 kfree_skb(skb); 444 - } else { 445 - if (!tipc_sk_type_connectionless(sk) && 446 - sk->sk_state != TIPC_DISCONNECTING) { 447 - tipc_set_sk_state(sk, TIPC_DISCONNECTING); 448 - tipc_node_remove_conn(net, dnode, tsk->portid); 449 - } 450 - tipc_sk_respond(sk, skb, error); 444 + continue; 451 445 } 446 + if (!tipc_sk_type_connectionless(sk) && 447 + sk->sk_state != TIPC_DISCONNECTING) { 448 + tipc_set_sk_state(sk, TIPC_DISCONNECTING); 449 + tipc_node_remove_conn(net, dnode, tsk->portid); 450 + } 451 + tipc_sk_respond(sk, skb, error); 452 452 } 453 + 454 + if (tipc_sk_type_connectionless(sk)) 455 + return; 456 + 453 457 if (sk->sk_state != TIPC_DISCONNECTING) { 454 458 skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, 455 459 TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, ··· 461 457 tsk->portid, error); 462 458 if (skb) 463 459 tipc_node_xmit_skb(net, skb, dnode, tsk->portid); 464 - if (!tipc_sk_type_connectionless(sk)) { 465 - tipc_node_remove_conn(net, dnode, tsk->portid); 466 - tipc_set_sk_state(sk, TIPC_DISCONNECTING); 467 - } 460 + tipc_node_remove_conn(net, dnode, tsk->portid); 461 + tipc_set_sk_state(sk, TIPC_DISCONNECTING); 468 462 } 469 463 } 470 464