Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'ovs-drop-reasons'

Adrian Moreno says:

====================
openvswitch: add drop reasons

There is currently a gap in drop visibility in the openvswitch module.
This series tries to improve this by adding a new drop reason subsystem
for OVS.

Apart from adding a new drop reasson subsystem and some common drop
reasons, this series takes Eric's preliminary work [1] on adding an
explicit drop action and integrates it into the same subsystem.

A limitation of this series is that it does not report upcall errors.
The reason is that there could be many sources of upcall drops and the
most common one, which is the netlink buffer overflow, cannot be
reported via kfree_skb() because the skb is freed in the netlink layer
(see [2]). Therefore, using a reason for the rare events and not the
common one would be even more misleading. I'd propose we add (in a
follow up patch) a tracepoint to better report upcall errors.

[1] https://lore.kernel.org/netdev/202306300609.tdRdZscy-lkp@intel.com/T/
[2] commit 1100248a5c5c ("openvswitch: Fix double reporting of drops in dropwatch")

---
v4 -> v5:
- Rebased
- Added a helper function to explicitly convert drop reason enum types

v3 -> v4:
- Changed names of errors following Ilya's suggestions
- Moved the ovs-dpctl.py changes from patch 7/7 to 3/7
- Added a test to ensure actions following a drop are rejected

rfc2 -> v3:
- Rebased on top of latest net-next

rfc1 -> rfc2:
- Fail when an explicit drop is not the last
- Added a drop reason for action errors
- Added braces around macros
- Dropped patch that added support for masks in ovs-dpctl.py as it's now
included in Aaron's series [2].
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+226 -18
+6
include/net/dropreason.h
··· 23 23 */ 24 24 SKB_DROP_REASON_SUBSYS_MAC80211_MONITOR, 25 25 26 + /** 27 + * @SKB_DROP_REASON_SUBSYS_OPENVSWITCH: openvswitch drop reasons, 28 + * see net/openvswitch/drop.h 29 + */ 30 + SKB_DROP_REASON_SUBSYS_OPENVSWITCH, 31 + 26 32 /** @SKB_DROP_REASON_SUBSYS_NUM: number of subsystems defined */ 27 33 SKB_DROP_REASON_SUBSYS_NUM 28 34 };
+2
include/uapi/linux/openvswitch.h
··· 965 965 * start of the packet or at the start of the l3 header depending on the value 966 966 * of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS 967 967 * argument. 968 + * @OVS_ACTION_ATTR_DROP: Explicit drop action. 968 969 * 969 970 * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all 970 971 * fields within a header are modifiable, e.g. the IPv4 protocol and fragment ··· 1003 1002 OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */ 1004 1003 OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */ 1005 1004 OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */ 1005 + OVS_ACTION_ATTR_DROP, /* u32 error code. */ 1006 1006 1007 1007 __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted 1008 1008 * from userspace. */
+31 -11
net/openvswitch/actions.c
··· 27 27 #include <net/sctp/checksum.h> 28 28 29 29 #include "datapath.h" 30 + #include "drop.h" 30 31 #include "flow.h" 31 32 #include "conntrack.h" 32 33 #include "vport.h" ··· 782 781 struct vport *vport = data->vport; 783 782 784 783 if (skb_cow_head(skb, data->l2_len) < 0) { 785 - kfree_skb(skb); 784 + kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM); 786 785 return -ENOMEM; 787 786 } 788 787 ··· 853 852 struct sk_buff *skb, u16 mru, 854 853 struct sw_flow_key *key) 855 854 { 855 + enum ovs_drop_reason reason; 856 856 u16 orig_network_offset = 0; 857 857 858 858 if (eth_p_mpls(skb->protocol)) { ··· 863 861 864 862 if (skb_network_offset(skb) > MAX_L2_LEN) { 865 863 OVS_NLERR(1, "L2 header too long to fragment"); 864 + reason = OVS_DROP_FRAG_L2_TOO_LONG; 866 865 goto err; 867 866 } 868 867 ··· 904 901 WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.", 905 902 ovs_vport_name(vport), ntohs(key->eth.type), mru, 906 903 vport->dev->mtu); 904 + reason = OVS_DROP_FRAG_INVALID_PROTO; 907 905 goto err; 908 906 } 909 907 910 908 return; 911 909 err: 912 - kfree_skb(skb); 910 + ovs_kfree_skb_reason(skb, reason); 913 911 } 914 912 915 913 static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, ··· 937 933 938 934 ovs_fragment(net, vport, skb, mru, key); 939 935 } else { 940 - kfree_skb(skb); 936 + kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); 941 937 } 942 938 } else { 943 - kfree_skb(skb); 939 + kfree_skb_reason(skb, SKB_DROP_REASON_DEV_READY); 944 940 } 945 941 } 946 942 ··· 1014 1010 return clone_execute(dp, skb, key, 0, nla_data(actions), 1015 1011 nla_len(actions), true, false); 1016 1012 1017 - consume_skb(skb); 1013 + ovs_kfree_skb_reason(skb, OVS_DROP_IP_TTL); 1018 1014 return 0; 1019 1015 } 1020 1016 ··· 1040 1036 if ((arg->probability != U32_MAX) && 1041 1037 (!arg->probability || get_random_u32() > arg->probability)) { 1042 1038 if (last) 1043 - consume_skb(skb); 1039 + ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION); 1044 1040 return 0; 1045 1041 } 1046 1042 ··· 1301 1297 if (trace_ovs_do_execute_action_enabled()) 1302 1298 trace_ovs_do_execute_action(dp, skb, key, a, rem); 1303 1299 1300 + /* Actions that rightfully have to consume the skb should do it 1301 + * and return directly. 1302 + */ 1304 1303 switch (nla_type(a)) { 1305 1304 case OVS_ACTION_ATTR_OUTPUT: { 1306 1305 int port = nla_get_u32(a); ··· 1339 1332 output_userspace(dp, skb, key, a, attr, 1340 1333 len, OVS_CB(skb)->cutlen); 1341 1334 OVS_CB(skb)->cutlen = 0; 1335 + if (nla_is_last(a, rem)) { 1336 + consume_skb(skb); 1337 + return 0; 1338 + } 1342 1339 break; 1343 1340 1344 1341 case OVS_ACTION_ATTR_HASH: ··· 1457 1446 1458 1447 case OVS_ACTION_ATTR_METER: 1459 1448 if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) { 1460 - consume_skb(skb); 1449 + ovs_kfree_skb_reason(skb, OVS_DROP_METER); 1461 1450 return 0; 1462 1451 } 1463 1452 break; ··· 1488 1477 return dec_ttl_exception_handler(dp, skb, 1489 1478 key, a); 1490 1479 break; 1480 + 1481 + case OVS_ACTION_ATTR_DROP: { 1482 + enum ovs_drop_reason reason = nla_get_u32(a) 1483 + ? OVS_DROP_EXPLICIT_WITH_ERROR 1484 + : OVS_DROP_EXPLICIT; 1485 + 1486 + ovs_kfree_skb_reason(skb, reason); 1487 + return 0; 1488 + } 1491 1489 } 1492 1490 1493 1491 if (unlikely(err)) { 1494 - kfree_skb(skb); 1492 + ovs_kfree_skb_reason(skb, OVS_DROP_ACTION_ERROR); 1495 1493 return err; 1496 1494 } 1497 1495 } 1498 1496 1499 - consume_skb(skb); 1497 + ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION); 1500 1498 return 0; 1501 1499 } 1502 1500 ··· 1567 1547 /* Out of per CPU action FIFO space. Drop the 'skb' and 1568 1548 * log an error. 1569 1549 */ 1570 - kfree_skb(skb); 1550 + ovs_kfree_skb_reason(skb, OVS_DROP_DEFERRED_LIMIT); 1571 1551 1572 1552 if (net_ratelimit()) { 1573 1553 if (actions) { /* Sample action */ ··· 1619 1599 if (unlikely(level > OVS_RECURSION_LIMIT)) { 1620 1600 net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n", 1621 1601 ovs_dp_name(dp)); 1622 - kfree_skb(skb); 1602 + ovs_kfree_skb_reason(skb, OVS_DROP_RECURSION_LIMIT); 1623 1603 err = -ENETDOWN; 1624 1604 goto out; 1625 1605 }
+2 -1
net/openvswitch/conntrack.c
··· 29 29 #include <net/netfilter/nf_conntrack_act_ct.h> 30 30 31 31 #include "datapath.h" 32 + #include "drop.h" 32 33 #include "conntrack.h" 33 34 #include "flow.h" 34 35 #include "flow_netlink.h" ··· 1036 1035 1037 1036 skb_push_rcsum(skb, nh_ofs); 1038 1037 if (err) 1039 - kfree_skb(skb); 1038 + ovs_kfree_skb_reason(skb, OVS_DROP_CONNTRACK); 1040 1039 return err; 1041 1040 } 1042 1041
+16
net/openvswitch/datapath.c
··· 41 41 #include <net/pkt_cls.h> 42 42 43 43 #include "datapath.h" 44 + #include "drop.h" 44 45 #include "flow.h" 45 46 #include "flow_table.h" 46 47 #include "flow_netlink.h" ··· 2703 2702 .size = sizeof(struct ovs_net), 2704 2703 }; 2705 2704 2705 + static const char * const ovs_drop_reasons[] = { 2706 + #define S(x) (#x), 2707 + OVS_DROP_REASONS(S) 2708 + #undef S 2709 + }; 2710 + 2711 + static struct drop_reason_list drop_reason_list_ovs = { 2712 + .reasons = ovs_drop_reasons, 2713 + .n_reasons = ARRAY_SIZE(ovs_drop_reasons), 2714 + }; 2715 + 2706 2716 static int __init dp_init(void) 2707 2717 { 2708 2718 int err; ··· 2755 2743 if (err < 0) 2756 2744 goto error_unreg_netdev; 2757 2745 2746 + drop_reasons_register_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH, 2747 + &drop_reason_list_ovs); 2748 + 2758 2749 return 0; 2759 2750 2760 2751 error_unreg_netdev: ··· 2784 2769 ovs_netdev_exit(); 2785 2770 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2786 2771 unregister_pernet_device(&ovs_net_ops); 2772 + drop_reasons_unregister_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH); 2787 2773 rcu_barrier(); 2788 2774 ovs_vport_exit(); 2789 2775 ovs_flow_exit();
+41
net/openvswitch/drop.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * OpenvSwitch drop reason list. 4 + */ 5 + 6 + #ifndef OPENVSWITCH_DROP_H 7 + #define OPENVSWITCH_DROP_H 8 + #include <linux/skbuff.h> 9 + #include <net/dropreason.h> 10 + 11 + #define OVS_DROP_REASONS(R) \ 12 + R(OVS_DROP_LAST_ACTION) \ 13 + R(OVS_DROP_ACTION_ERROR) \ 14 + R(OVS_DROP_EXPLICIT) \ 15 + R(OVS_DROP_EXPLICIT_WITH_ERROR) \ 16 + R(OVS_DROP_METER) \ 17 + R(OVS_DROP_RECURSION_LIMIT) \ 18 + R(OVS_DROP_DEFERRED_LIMIT) \ 19 + R(OVS_DROP_FRAG_L2_TOO_LONG) \ 20 + R(OVS_DROP_FRAG_INVALID_PROTO) \ 21 + R(OVS_DROP_CONNTRACK) \ 22 + R(OVS_DROP_IP_TTL) \ 23 + /* deliberate comment for trailing \ */ 24 + 25 + enum ovs_drop_reason { 26 + __OVS_DROP_REASON = SKB_DROP_REASON_SUBSYS_OPENVSWITCH << 27 + SKB_DROP_REASON_SUBSYS_SHIFT, 28 + #define ENUM(x) x, 29 + OVS_DROP_REASONS(ENUM) 30 + #undef ENUM 31 + 32 + OVS_DROP_MAX, 33 + }; 34 + 35 + static inline void 36 + ovs_kfree_skb_reason(struct sk_buff *skb, enum ovs_drop_reason reason) 37 + { 38 + kfree_skb_reason(skb, (u32)reason); 39 + } 40 + 41 + #endif /* OPENVSWITCH_DROP_H */
+9 -1
net/openvswitch/flow_netlink.c
··· 38 38 #include <net/tun_proto.h> 39 39 #include <net/erspan.h> 40 40 41 + #include "drop.h" 41 42 #include "flow_netlink.h" 42 43 43 44 struct ovs_len_tbl { ··· 62 61 case OVS_ACTION_ATTR_RECIRC: 63 62 case OVS_ACTION_ATTR_TRUNC: 64 63 case OVS_ACTION_ATTR_USERSPACE: 64 + case OVS_ACTION_ATTR_DROP: 65 65 break; 66 66 67 67 case OVS_ACTION_ATTR_CT: ··· 2396 2394 /* Whenever new actions are added, the need to update this 2397 2395 * function should be considered. 2398 2396 */ 2399 - BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23); 2397 + BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24); 2400 2398 2401 2399 if (!actions) 2402 2400 return; ··· 3184 3182 [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1, 3185 3183 [OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls), 3186 3184 [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1, 3185 + [OVS_ACTION_ATTR_DROP] = sizeof(u32), 3187 3186 }; 3188 3187 const struct ovs_action_push_vlan *vlan; 3189 3188 int type = nla_type(a); ··· 3454 3451 if (err) 3455 3452 return err; 3456 3453 skip_copy = true; 3454 + break; 3455 + 3456 + case OVS_ACTION_ATTR_DROP: 3457 + if (!nla_is_last(a, rem)) 3458 + return -EINVAL; 3457 3459 break; 3458 3460 3459 3461 default:
+101 -1
tools/testing/selftests/net/openvswitch/openvswitch.sh
··· 16 16 connect_v4 ip4-xon: Basic ipv4 ping between two NS 17 17 nat_connect_v4 ip4-nat-xon: Basic ipv4 tcp connection via NAT 18 18 netlink_checks ovsnl: validate netlink attrs and settings 19 - upcall_interfaces ovs: test the upcall interfaces" 19 + upcall_interfaces ovs: test the upcall interfaces 20 + drop_reason drop: test drop reasons are emitted" 20 21 21 22 info() { 22 23 [ $VERBOSE = 0 ] || echo $* ··· 142 141 return 0 143 142 } 144 143 144 + ovs_drop_record_and_run () { 145 + local sbx=$1 146 + shift 147 + 148 + perf record -a -q -e skb:kfree_skb -o ${ovs_dir}/perf.data $* \ 149 + >> ${ovs_dir}/stdout 2>> ${ovs_dir}/stderr 150 + return $? 151 + } 152 + 153 + ovs_drop_reason_count() 154 + { 155 + local reason=$1 156 + 157 + local perf_output=`perf script -i ${ovs_dir}/perf.data -F trace:event,trace` 158 + local pattern="skb:kfree_skb:.*reason: $reason" 159 + 160 + return `echo "$perf_output" | grep "$pattern" | wc -l` 161 + } 162 + 145 163 usage() { 146 164 echo 147 165 echo "$0 [OPTIONS] [TEST]..." ··· 173 153 echo 174 154 echo "Available tests${tests}" 175 155 exit 1 156 + } 157 + 158 + # drop_reason test 159 + # - drop packets and verify the right drop reason is reported 160 + test_drop_reason() { 161 + which perf >/dev/null 2>&1 || return $ksft_skip 162 + 163 + sbx_add "test_drop_reason" || return $? 164 + 165 + ovs_add_dp "test_drop_reason" dropreason || return 1 166 + 167 + info "create namespaces" 168 + for ns in client server; do 169 + ovs_add_netns_and_veths "test_drop_reason" "dropreason" "$ns" \ 170 + "${ns:0:1}0" "${ns:0:1}1" || return 1 171 + done 172 + 173 + # Setup client namespace 174 + ip netns exec client ip addr add 172.31.110.10/24 dev c1 175 + ip netns exec client ip link set c1 up 176 + 177 + # Setup server namespace 178 + ip netns exec server ip addr add 172.31.110.20/24 dev s1 179 + ip netns exec server ip link set s1 up 180 + 181 + # Allow ARP 182 + ovs_add_flow "test_drop_reason" dropreason \ 183 + 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1 184 + ovs_add_flow "test_drop_reason" dropreason \ 185 + 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1 186 + 187 + # Allow client ICMP traffic but drop return path 188 + ovs_add_flow "test_drop_reason" dropreason \ 189 + "in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=1),icmp()" '2' 190 + ovs_add_flow "test_drop_reason" dropreason \ 191 + "in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,proto=1),icmp()" 'drop' 192 + 193 + ovs_drop_record_and_run "test_drop_reason" ip netns exec client ping -c 2 172.31.110.20 194 + ovs_drop_reason_count 0x30001 # OVS_DROP_FLOW_ACTION 195 + if [[ "$?" -ne "2" ]]; then 196 + info "Did not detect expected drops: $?" 197 + return 1 198 + fi 199 + 200 + # Drop UDP 6000 traffic with an explicit action and an error code. 201 + ovs_add_flow "test_drop_reason" dropreason \ 202 + "in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=17),udp(dst=6000)" \ 203 + 'drop(42)' 204 + # Drop UDP 7000 traffic with an explicit action with no error code. 205 + ovs_add_flow "test_drop_reason" dropreason \ 206 + "in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=17),udp(dst=7000)" \ 207 + 'drop(0)' 208 + 209 + ovs_drop_record_and_run \ 210 + "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 6000 211 + ovs_drop_reason_count 0x30004 # OVS_DROP_EXPLICIT_ACTION_ERROR 212 + if [[ "$?" -ne "1" ]]; then 213 + info "Did not detect expected explicit error drops: $?" 214 + return 1 215 + fi 216 + 217 + ovs_drop_record_and_run \ 218 + "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 7000 219 + ovs_drop_reason_count 0x30003 # OVS_DROP_EXPLICIT_ACTION 220 + if [[ "$?" -ne "1" ]]; then 221 + info "Did not detect expected explicit drops: $?" 222 + return 1 223 + fi 224 + 225 + return 0 176 226 } 177 227 178 228 # arp_ping test ··· 483 393 wc -l) == 2 ] || \ 484 394 return 1 485 395 396 + ERR_MSG="Flow actions may not be safe on all matching packets" 397 + PRE_TEST=$(dmesg | grep -c "${ERR_MSG}") 398 + ovs_add_flow "test_netlink_checks" nv0 \ 399 + 'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(0),2' \ 400 + &> /dev/null && return 1 401 + POST_TEST=$(dmesg | grep -c "${ERR_MSG}") 402 + if [ "$PRE_TEST" == "$POST_TEST" ]; then 403 + info "failed - error not generated" 404 + return 1 405 + fi 486 406 return 0 487 407 } 488 408
+18 -4
tools/testing/selftests/net/openvswitch/ovs-dpctl.py
··· 301 301 ("OVS_ACTION_ATTR_CHECK_PKT_LEN", "none"), 302 302 ("OVS_ACTION_ATTR_ADD_MPLS", "none"), 303 303 ("OVS_ACTION_ATTR_DEC_TTL", "none"), 304 + ("OVS_ACTION_ATTR_DROP", "uint32"), 304 305 ) 305 306 306 307 class ctact(nla): ··· 448 447 print_str += "recirc(0x%x)" % int(self.get_attr(field[0])) 449 448 elif field[0] == "OVS_ACTION_ATTR_TRUNC": 450 449 print_str += "trunc(%d)" % int(self.get_attr(field[0])) 450 + elif field[0] == "OVS_ACTION_ATTR_DROP": 451 + print_str += "drop(%d)" % int(self.get_attr(field[0])) 451 452 elif field[1] == "flag": 452 453 if field[0] == "OVS_ACTION_ATTR_CT_CLEAR": 453 454 print_str += "ct_clear" ··· 471 468 while len(actstr) != 0: 472 469 parsed = False 473 470 if actstr.startswith("drop"): 474 - # for now, drops have no explicit action, so we 475 - # don't need to set any attributes. The final 476 - # act of the processing chain will just drop the packet 477 - return 471 + # If no reason is provided, the implicit drop is used (i.e no 472 + # action). If some reason is given, an explicit action is used. 473 + actstr, reason = parse_extract_field( 474 + actstr, 475 + "drop(", 476 + "([0-9]+)", 477 + lambda x: int(x, 0), 478 + False, 479 + None, 480 + ) 481 + if reason is not None: 482 + self["attrs"].append(["OVS_ACTION_ATTR_DROP", reason]) 483 + parsed = True 484 + else: 485 + return 478 486 479 487 elif parse_starts_block(actstr, "^(\d+)", False, True): 480 488 actstr, output = parse_extract_field(