Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'add-support-for-so_priority-cmsg'

Anna Emese Nyiri says:

====================
Add support for SO_PRIORITY cmsg

Introduce a new helper function, `sk_set_prio_allowed`,
to centralize the logic for validating priority settings.
Add support for the `SO_PRIORITY` control message,
enabling user-space applications to set socket priority
via control messages (cmsg).
====================

Link: https://patch.msgid.link/20241213084457.45120-1-annaemesenyiri@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+228 -16
+2
arch/alpha/include/uapi/asm/socket.h
··· 148 148 149 149 #define SCM_TS_OPT_ID 81 150 150 151 + #define SO_RCVPRIORITY 82 152 + 151 153 #if !defined(__KERNEL__) 152 154 153 155 #if __BITS_PER_LONG == 64
+2
arch/mips/include/uapi/asm/socket.h
··· 159 159 160 160 #define SCM_TS_OPT_ID 81 161 161 162 + #define SO_RCVPRIORITY 82 163 + 162 164 #if !defined(__KERNEL__) 163 165 164 166 #if __BITS_PER_LONG == 64
+2
arch/parisc/include/uapi/asm/socket.h
··· 140 140 141 141 #define SCM_TS_OPT_ID 0x404C 142 142 143 + #define SO_RCVPRIORITY 0x404D 144 + 143 145 #if !defined(__KERNEL__) 144 146 145 147 #if __BITS_PER_LONG == 64
+2
arch/sparc/include/uapi/asm/socket.h
··· 141 141 142 142 #define SCM_TS_OPT_ID 0x005a 143 143 144 + #define SO_RCVPRIORITY 0x005b 145 + 144 146 #if !defined(__KERNEL__) 145 147 146 148
+1 -1
include/net/inet_sock.h
··· 172 172 u8 tx_flags; 173 173 __u8 ttl; 174 174 __s16 tos; 175 - char priority; 175 + u32 priority; 176 176 __u16 gso_size; 177 177 u32 ts_opt_id; 178 178 u64 transmit_time;
+1 -1
include/net/ip.h
··· 81 81 __u8 protocol; 82 82 __u8 ttl; 83 83 __s16 tos; 84 - char priority; 85 84 __u16 gso_size; 86 85 }; 87 86 ··· 95 96 ipcm_init(ipcm); 96 97 97 98 ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark); 99 + ipcm->sockc.priority = READ_ONCE(inet->sk.sk_priority); 98 100 ipcm->sockc.tsflags = READ_ONCE(inet->sk.sk_tsflags); 99 101 ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if); 100 102 ipcm->addr = inet->inet_saddr;
+6 -2
include/net/sock.h
··· 953 953 SOCK_XDP, /* XDP is attached */ 954 954 SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */ 955 955 SOCK_RCVMARK, /* Receive SO_MARK ancillary data with packet */ 956 + SOCK_RCVPRIORITY, /* Receive SO_PRIORITY ancillary data with packet */ 956 957 }; 957 958 958 959 #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) ··· 1815 1814 u32 mark; 1816 1815 u32 tsflags; 1817 1816 u32 ts_opt_id; 1817 + u32 priority; 1818 1818 }; 1819 1819 1820 1820 static inline void sockcm_init(struct sockcm_cookie *sockc, 1821 1821 const struct sock *sk) 1822 1822 { 1823 1823 *sockc = (struct sockcm_cookie) { 1824 - .tsflags = READ_ONCE(sk->sk_tsflags) 1824 + .tsflags = READ_ONCE(sk->sk_tsflags), 1825 + .priority = READ_ONCE(sk->sk_priority), 1825 1826 }; 1826 1827 } 1827 1828 ··· 2661 2658 { 2662 2659 #define FLAGS_RECV_CMSGS ((1UL << SOCK_RXQ_OVFL) | \ 2663 2660 (1UL << SOCK_RCVTSTAMP) | \ 2664 - (1UL << SOCK_RCVMARK)) 2661 + (1UL << SOCK_RCVMARK) |\ 2662 + (1UL << SOCK_RCVPRIORITY)) 2665 2663 #define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \ 2666 2664 SOF_TIMESTAMPING_RAW_HARDWARE) 2667 2665
+2
include/uapi/asm-generic/socket.h
··· 143 143 144 144 #define SCM_TS_OPT_ID 81 145 145 146 + #define SO_RCVPRIORITY 82 147 + 146 148 #if !defined(__KERNEL__) 147 149 148 150 #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
+1 -1
net/can/raw.c
··· 962 962 } 963 963 964 964 skb->dev = dev; 965 - skb->priority = READ_ONCE(sk->sk_priority); 965 + skb->priority = sockc.priority; 966 966 skb->mark = READ_ONCE(sk->sk_mark); 967 967 skb->tstamp = sockc.transmit_time; 968 968
+23 -3
net/core/sock.c
··· 454 454 return 0; 455 455 } 456 456 457 + static bool sk_set_prio_allowed(const struct sock *sk, int val) 458 + { 459 + return ((val >= TC_PRIO_BESTEFFORT && val <= TC_PRIO_INTERACTIVE) || 460 + sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || 461 + sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)); 462 + } 463 + 457 464 static bool sock_needs_netstamp(const struct sock *sk) 458 465 { 459 466 switch (sk->sk_family) { ··· 1200 1193 /* handle options which do not require locking the socket. */ 1201 1194 switch (optname) { 1202 1195 case SO_PRIORITY: 1203 - if ((val >= 0 && val <= 6) || 1204 - sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || 1205 - sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { 1196 + if (sk_set_prio_allowed(sk, val)) { 1206 1197 sock_set_priority(sk, val); 1207 1198 return 0; 1208 1199 } ··· 1517 1512 break; 1518 1513 case SO_RCVMARK: 1519 1514 sock_valbool_flag(sk, SOCK_RCVMARK, valbool); 1515 + break; 1516 + 1517 + case SO_RCVPRIORITY: 1518 + sock_valbool_flag(sk, SOCK_RCVPRIORITY, valbool); 1520 1519 break; 1521 1520 1522 1521 case SO_RXQ_OVFL: ··· 1949 1940 1950 1941 case SO_RCVMARK: 1951 1942 v.val = sock_flag(sk, SOCK_RCVMARK); 1943 + break; 1944 + 1945 + case SO_RCVPRIORITY: 1946 + v.val = sock_flag(sk, SOCK_RCVPRIORITY); 1952 1947 break; 1953 1948 1954 1949 case SO_RXQ_OVFL: ··· 2954 2941 /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */ 2955 2942 case SCM_RIGHTS: 2956 2943 case SCM_CREDENTIALS: 2944 + break; 2945 + case SO_PRIORITY: 2946 + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) 2947 + return -EINVAL; 2948 + if (!sk_set_prio_allowed(sk, *(u32 *)CMSG_DATA(cmsg))) 2949 + return -EPERM; 2950 + sockc->priority = *(u32 *)CMSG_DATA(cmsg); 2957 2951 break; 2958 2952 default: 2959 2953 return -EINVAL;
+2 -2
net/ipv4/ip_output.c
··· 1333 1333 cork->ttl = ipc->ttl; 1334 1334 cork->tos = ipc->tos; 1335 1335 cork->mark = ipc->sockc.mark; 1336 - cork->priority = ipc->priority; 1336 + cork->priority = ipc->sockc.priority; 1337 1337 cork->transmit_time = ipc->sockc.transmit_time; 1338 1338 cork->tx_flags = 0; 1339 1339 sock_tx_timestamp(sk, &ipc->sockc, &cork->tx_flags); ··· 1470 1470 ip_options_build(skb, opt, cork->addr, rt); 1471 1471 } 1472 1472 1473 - skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority); 1473 + skb->priority = cork->priority; 1474 1474 skb->mark = cork->mark; 1475 1475 if (sk_is_tcp(sk)) 1476 1476 skb_set_delivery_time(skb, cork->transmit_time, SKB_CLOCK_MONOTONIC);
+1 -1
net/ipv4/ip_sockglue.c
··· 315 315 if (val < 0 || val > 255) 316 316 return -EINVAL; 317 317 ipc->tos = val; 318 - ipc->priority = rt_tos2priority(ipc->tos); 318 + ipc->sockc.priority = rt_tos2priority(ipc->tos); 319 319 break; 320 320 case IP_PROTOCOL: 321 321 if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
+1 -1
net/ipv4/raw.c
··· 358 358 skb_reserve(skb, hlen); 359 359 360 360 skb->protocol = htons(ETH_P_IP); 361 - skb->priority = READ_ONCE(sk->sk_priority); 361 + skb->priority = sockc->priority; 362 362 skb->mark = sockc->mark; 363 363 skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid); 364 364 skb_dst_set(skb, &rt->dst);
+2 -1
net/ipv6/ip6_output.c
··· 1401 1401 cork->base.gso_size = ipc6->gso_size; 1402 1402 cork->base.tx_flags = 0; 1403 1403 cork->base.mark = ipc6->sockc.mark; 1404 + cork->base.priority = ipc6->sockc.priority; 1404 1405 sock_tx_timestamp(sk, &ipc6->sockc, &cork->base.tx_flags); 1405 1406 if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) { 1406 1407 cork->base.flags |= IPCORK_TS_OPT_ID; ··· 1943 1942 hdr->saddr = fl6->saddr; 1944 1943 hdr->daddr = *final_dst; 1945 1944 1946 - skb->priority = READ_ONCE(sk->sk_priority); 1945 + skb->priority = cork->base.priority; 1947 1946 skb->mark = cork->base.mark; 1948 1947 if (sk_is_tcp(sk)) 1949 1948 skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC);
+1
net/ipv6/ping.c
··· 119 119 return -EINVAL; 120 120 121 121 ipcm6_init_sk(&ipc6, sk); 122 + ipc6.sockc.priority = READ_ONCE(sk->sk_priority); 122 123 ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); 123 124 ipc6.sockc.mark = READ_ONCE(sk->sk_mark); 124 125
+2 -1
net/ipv6/raw.c
··· 619 619 skb_reserve(skb, hlen); 620 620 621 621 skb->protocol = htons(ETH_P_IPV6); 622 - skb->priority = READ_ONCE(sk->sk_priority); 622 + skb->priority = sockc->priority; 623 623 skb->mark = sockc->mark; 624 624 skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid); 625 625 ··· 780 780 ipcm6_init(&ipc6); 781 781 ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); 782 782 ipc6.sockc.mark = fl6.flowi6_mark; 783 + ipc6.sockc.priority = READ_ONCE(sk->sk_priority); 783 784 784 785 if (sin6) { 785 786 if (addr_len < SIN6_LEN_RFC2133)
+1
net/ipv6/udp.c
··· 1448 1448 ipc6.gso_size = READ_ONCE(up->gso_size); 1449 1449 ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); 1450 1450 ipc6.sockc.mark = READ_ONCE(sk->sk_mark); 1451 + ipc6.sockc.priority = READ_ONCE(sk->sk_priority); 1451 1452 1452 1453 /* destination address check */ 1453 1454 if (sin6) {
+1 -1
net/packet/af_packet.c
··· 3126 3126 3127 3127 skb->protocol = proto; 3128 3128 skb->dev = dev; 3129 - skb->priority = READ_ONCE(sk->sk_priority); 3129 + skb->priority = sockc.priority; 3130 3130 skb->mark = sockc.mark; 3131 3131 skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid); 3132 3132
+11
net/socket.c
··· 1008 1008 } 1009 1009 } 1010 1010 1011 + static void sock_recv_priority(struct msghdr *msg, struct sock *sk, 1012 + struct sk_buff *skb) 1013 + { 1014 + if (sock_flag(sk, SOCK_RCVPRIORITY) && skb) { 1015 + __u32 priority = skb->priority; 1016 + 1017 + put_cmsg(msg, SOL_SOCKET, SO_PRIORITY, sizeof(__u32), &priority); 1018 + } 1019 + } 1020 + 1011 1021 void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, 1012 1022 struct sk_buff *skb) 1013 1023 { 1014 1024 sock_recv_timestamp(msg, sk, skb); 1015 1025 sock_recv_drops(msg, sk, skb); 1016 1026 sock_recv_mark(msg, sk, skb); 1027 + sock_recv_priority(msg, sk, skb); 1017 1028 } 1018 1029 EXPORT_SYMBOL_GPL(__sock_recv_cmsgs); 1019 1030
+2
tools/include/uapi/asm-generic/socket.h
··· 126 126 127 127 #define SCM_TS_OPT_ID 78 128 128 129 + #define SO_RCVPRIORITY 79 130 + 129 131 #if !defined(__KERNEL__) 130 132 131 133 #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
+1
tools/testing/selftests/net/Makefile
··· 32 32 TEST_PROGS += gro.sh 33 33 TEST_PROGS += gre_gso.sh 34 34 TEST_PROGS += cmsg_so_mark.sh 35 + TEST_PROGS += cmsg_so_priority.sh 35 36 TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh 36 37 TEST_PROGS += netns-name.sh 37 38 TEST_PROGS += nl_netdev.py
+10 -1
tools/testing/selftests/net/cmsg_sender.c
··· 59 59 unsigned int proto; 60 60 } sock; 61 61 struct option_cmsg_u32 mark; 62 + struct option_cmsg_u32 priority; 62 63 struct { 63 64 bool ena; 64 65 unsigned int delay; ··· 98 97 "\n" 99 98 "\t\t-m val Set SO_MARK with given value\n" 100 99 "\t\t-M val Set SO_MARK via setsockopt\n" 100 + "\t\t-P val Set SO_PRIORITY via setsockopt\n" 101 + "\t\t-Q val Set SO_PRIORITY via cmsg\n" 101 102 "\t\t-d val Set SO_TXTIME with given delay (usec)\n" 102 103 "\t\t-t Enable time stamp reporting\n" 103 104 "\t\t-f val Set don't fragment via cmsg\n" ··· 118 115 { 119 116 int o; 120 117 121 - while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) { 118 + while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:Q:")) != -1) { 122 119 switch (o) { 123 120 case 's': 124 121 opt.silent_send = true; ··· 150 147 case 'm': 151 148 opt.mark.ena = true; 152 149 opt.mark.val = atoi(optarg); 150 + break; 151 + case 'Q': 152 + opt.priority.ena = true; 153 + opt.priority.val = atoi(optarg); 153 154 break; 154 155 case 'M': 155 156 opt.sockopt.mark = atoi(optarg); ··· 259 252 260 253 ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, 261 254 SOL_SOCKET, SO_MARK, &opt.mark); 255 + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, 256 + SOL_SOCKET, SO_PRIORITY, &opt.priority); 262 257 ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, 263 258 SOL_IPV6, IPV6_DONTFRAG, &opt.v6.dontfrag); 264 259 ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+151
tools/testing/selftests/net/cmsg_so_priority.sh
··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + source lib.sh 5 + 6 + readonly KSFT_SKIP=4 7 + 8 + IP4=192.0.2.1/24 9 + TGT4=192.0.2.2 10 + TGT4_RAW=192.0.2.3 11 + IP6=2001:db8::1/64 12 + TGT6=2001:db8::2 13 + TGT6_RAW=2001:db8::3 14 + PORT=1234 15 + TOTAL_TESTS=0 16 + FAILED_TESTS=0 17 + 18 + if ! command -v jq &> /dev/null; then 19 + echo "SKIP cmsg_so_priroity.sh test: jq is not installed." >&2 20 + exit "$KSFT_SKIP" 21 + fi 22 + 23 + check_result() { 24 + ((TOTAL_TESTS++)) 25 + if [ "$1" -ne 0 ]; then 26 + ((FAILED_TESTS++)) 27 + fi 28 + } 29 + 30 + cleanup() 31 + { 32 + cleanup_ns $NS 33 + } 34 + 35 + trap cleanup EXIT 36 + 37 + setup_ns NS 38 + 39 + create_filter() { 40 + local handle=$1 41 + local vlan_prio=$2 42 + local ip_type=$3 43 + local proto=$4 44 + local dst_ip=$5 45 + local ip_proto 46 + 47 + if [[ "$proto" == "u" ]]; then 48 + ip_proto="udp" 49 + elif [[ "$ip_type" == "ipv4" && "$proto" == "i" ]]; then 50 + ip_proto="icmp" 51 + elif [[ "$ip_type" == "ipv6" && "$proto" == "i" ]]; then 52 + ip_proto="icmpv6" 53 + fi 54 + 55 + tc -n $NS filter add dev dummy1 \ 56 + egress pref 1 handle "$handle" proto 802.1q \ 57 + flower vlan_prio "$vlan_prio" vlan_ethtype "$ip_type" \ 58 + dst_ip "$dst_ip" ${ip_proto:+ip_proto $ip_proto} \ 59 + action pass 60 + } 61 + 62 + ip -n $NS link set dev lo up 63 + ip -n $NS link add name dummy1 up type dummy 64 + 65 + ip -n $NS link add link dummy1 name dummy1.10 up type vlan id 10 \ 66 + egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 67 + 68 + ip -n $NS address add $IP4 dev dummy1.10 69 + ip -n $NS address add $IP6 dev dummy1.10 nodad 70 + 71 + ip netns exec $NS sysctl -wq net.ipv4.ping_group_range='0 2147483647' 72 + 73 + ip -n $NS neigh add $TGT4 lladdr 00:11:22:33:44:55 nud permanent \ 74 + dev dummy1.10 75 + ip -n $NS neigh add $TGT6 lladdr 00:11:22:33:44:55 nud permanent \ 76 + dev dummy1.10 77 + ip -n $NS neigh add $TGT4_RAW lladdr 00:11:22:33:44:66 nud permanent \ 78 + dev dummy1.10 79 + ip -n $NS neigh add $TGT6_RAW lladdr 00:11:22:33:44:66 nud permanent \ 80 + dev dummy1.10 81 + 82 + tc -n $NS qdisc add dev dummy1 clsact 83 + 84 + FILTER_COUNTER=10 85 + 86 + for i in 4 6; do 87 + for proto in u i r; do 88 + echo "Test IPV$i, prot: $proto" 89 + for priority in {0..7}; do 90 + if [[ $i == 4 && $proto == "r" ]]; then 91 + TGT=$TGT4_RAW 92 + elif [[ $i == 6 && $proto == "r" ]]; then 93 + TGT=$TGT6_RAW 94 + elif [ $i == 4 ]; then 95 + TGT=$TGT4 96 + else 97 + TGT=$TGT6 98 + fi 99 + 100 + handle="${FILTER_COUNTER}${priority}" 101 + 102 + create_filter $handle $priority ipv$i $proto $TGT 103 + 104 + pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \ 105 + | jq ".[] | select(.options.handle == ${handle}) | \ 106 + .options.actions[0].stats.packets") 107 + 108 + if [[ $pkts == 0 ]]; then 109 + check_result 0 110 + else 111 + echo "prio $priority: expected 0, got $pkts" 112 + check_result 1 113 + fi 114 + 115 + ip netns exec $NS ./cmsg_sender -$i -Q $priority \ 116 + -p $proto $TGT $PORT 117 + 118 + pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \ 119 + | jq ".[] | select(.options.handle == ${handle}) | \ 120 + .options.actions[0].stats.packets") 121 + if [[ $pkts == 1 ]]; then 122 + check_result 0 123 + else 124 + echo "prio $priority -Q: expected 1, got $pkts" 125 + check_result 1 126 + fi 127 + 128 + ip netns exec $NS ./cmsg_sender -$i -P $priority \ 129 + -p $proto $TGT $PORT 130 + 131 + pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \ 132 + | jq ".[] | select(.options.handle == ${handle}) | \ 133 + .options.actions[0].stats.packets") 134 + if [[ $pkts == 2 ]]; then 135 + check_result 0 136 + else 137 + echo "prio $priority -P: expected 2, got $pkts" 138 + check_result 1 139 + fi 140 + done 141 + FILTER_COUNTER=$((FILTER_COUNTER + 10)) 142 + done 143 + done 144 + 145 + if [ $FAILED_TESTS -ne 0 ]; then 146 + echo "FAIL - $FAILED_TESTS/$TOTAL_TESTS tests failed" 147 + exit 1 148 + else 149 + echo "OK - All $TOTAL_TESTS tests passed" 150 + exit 0 151 + fi