Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#
4# A test for switch behavior under MC overload. An issue in Spectrum chips
5# causes throughput of UC traffic to drop severely when a switch is under heavy
6# MC load. This issue can be overcome by putting the switch to MC-aware mode.
7# This test verifies that UC performance stays intact even as the switch is
8# under MC flood, and therefore that the MC-aware mode is enabled and correctly
9# configured.
10#
11# Because mlxsw throttles CPU port, the traffic can't actually reach userspace
12# at full speed. That makes it impossible to use iperf3 to simply measure the
13# throughput, because many packets (that reach $h3) don't get to the kernel at
14# all even in UDP mode (the situation is even worse in TCP mode, where one can't
15# hope to see more than a couple Mbps).
16#
17# So instead we send traffic with mausezahn and use RX ethtool counters at $h3.
18# Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore
19# each gets a different priority and we can use per-prio ethtool counters to
20# measure the throughput. In order to avoid prioritizing unicast traffic, prio
21# qdisc is installed on $swp3 and maps all priorities to the same band #7 (and
22# thus TC 0).
23#
24# Mausezahn can't actually saturate the links unless it's using large frames.
25# Thus we set MTU to 10K on all involved interfaces. Then both unicast and
26# multicast traffic uses 8K frames.
27#
28# +---------------------------+ +----------------------------------+
29# | H1 | | H2 |
30# | | | unicast --> + $h2.111 |
31# | multicast | | traffic | 192.0.2.129/28 |
32# | traffic | | | e-qos-map 0:1 |
33# | $h1 + <----- | | | |
34# | 192.0.2.65/28 | | | + $h2 |
35# +---------------|-----------+ +--------------|-------------------+
36# | |
37# +---------------|---------------------------------------|-------------------+
38# | $swp1 + + $swp2 |
39# | >1Gbps | | >1Gbps |
40# | +-------------|------+ +----------|----------------+ |
41# | | $swp1.1 + | | + $swp2.111 | |
42# | | BR1 | SW | BR111 | |
43# | | $swp3.1 + | | + $swp3.111 | |
44# | +-------------|------+ +----------|----------------+ |
45# | \_______________________________________/ |
46# | | |
47# | + $swp3 |
48# | | 1Gbps bottleneck |
49# | | prio qdisc: {0..7} -> 7 |
50# +------------------------------------|--------------------------------------+
51# |
52# +--|-----------------+
53# | + $h3 H3 |
54# | | 192.0.2.66/28 |
55# | | |
56# | + $h3.111 |
57# | 192.0.2.130/28 |
58# +--------------------+
59
60ALL_TESTS="
61 ping_ipv4
62 test_mc_aware
63 test_uc_aware
64"
65
66lib_dir=$(dirname $0)/../../../net/forwarding
67
68NUM_NETIFS=6
69source $lib_dir/lib.sh
70source $lib_dir/devlink_lib.sh
71source qos_lib.sh
72
73h1_create()
74{
75 adf_simple_if_init $h1 192.0.2.65/28
76
77 mtu_set $h1 10000
78 defer mtu_restore $h1
79}
80
81h2_create()
82{
83 adf_simple_if_init $h2
84
85 mtu_set $h2 10000
86 defer mtu_restore $h2
87
88 vlan_create $h2 111 v$h2 192.0.2.129/28
89 defer vlan_destroy $h2 111
90 ip link set dev $h2.111 type vlan egress-qos-map 0:1
91}
92
93h3_create()
94{
95 adf_simple_if_init $h3 192.0.2.66/28
96
97 mtu_set $h3 10000
98 defer mtu_restore $h3
99
100 vlan_create $h3 111 v$h3 192.0.2.130/28
101 defer vlan_destroy $h3 111
102}
103
104switch_create()
105{
106 ip link set dev $swp1 up
107 defer ip link set dev $swp1 down
108
109 mtu_set $swp1 10000
110 defer mtu_restore $swp1
111
112 ip link set dev $swp2 up
113 defer ip link set dev $swp2 down
114
115 mtu_set $swp2 10000
116 defer mtu_restore $swp2
117
118 ip link set dev $swp3 up
119 defer ip link set dev $swp3 down
120
121 mtu_set $swp3 10000
122 defer mtu_restore $swp3
123
124 vlan_create $swp2 111
125 defer vlan_destroy $swp2 111
126
127 vlan_create $swp3 111
128 defer vlan_destroy $swp3 111
129
130 tc qdisc replace dev $swp3 root handle 3: tbf rate 1gbit \
131 burst 128K limit 1G
132 defer tc qdisc del dev $swp3 root handle 3:
133
134 tc qdisc replace dev $swp3 parent 3:3 handle 33: \
135 prio bands 8 priomap 7 7 7 7 7 7 7 7
136 defer tc qdisc del dev $swp3 parent 3:3 handle 33:
137
138 ip link add name br1 type bridge vlan_filtering 0
139 defer ip link del dev br1
140 ip link set dev br1 addrgenmode none
141 ip link set dev br1 up
142
143 ip link set dev $swp1 master br1
144 defer ip link set dev $swp1 nomaster
145
146 ip link set dev $swp3 master br1
147 defer ip link set dev $swp3 nomaster
148
149 ip link add name br111 type bridge vlan_filtering 0
150 defer ip link del dev br111
151 ip link set dev br111 addrgenmode none
152 ip link set dev br111 up
153
154 ip link set dev $swp2.111 master br111
155 defer ip link set dev $swp2.111 nomaster
156
157 ip link set dev $swp3.111 master br111
158 defer ip link set dev $swp3.111 nomaster
159
160 # Make sure that ingress quotas are smaller than egress so that there is
161 # room for both streams of traffic to be admitted to shared buffer.
162 devlink_port_pool_th_save $swp1 0
163 devlink_port_pool_th_set $swp1 0 5
164 defer devlink_port_pool_th_restore $swp1 0
165
166 devlink_tc_bind_pool_th_save $swp1 0 ingress
167 devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5
168 defer devlink_tc_bind_pool_th_restore $swp1 0 ingress
169
170 devlink_port_pool_th_save $swp2 0
171 devlink_port_pool_th_set $swp2 0 5
172 defer devlink_port_pool_th_restore $swp2 0
173
174 devlink_tc_bind_pool_th_save $swp2 1 ingress
175 devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5
176 defer devlink_tc_bind_pool_th_restore $swp2 1 ingress
177
178 devlink_port_pool_th_save $swp3 4
179 devlink_port_pool_th_set $swp3 4 12
180 defer devlink_port_pool_th_restore $swp3 4
181}
182
183setup_prepare()
184{
185 h1=${NETIFS[p1]}
186 swp1=${NETIFS[p2]}
187
188 swp2=${NETIFS[p3]}
189 h2=${NETIFS[p4]}
190
191 swp3=${NETIFS[p5]}
192 h3=${NETIFS[p6]}
193
194 h3mac=$(mac_get $h3)
195
196 adf_vrf_prepare
197
198 h1_create
199 h2_create
200 h3_create
201 switch_create
202}
203
204ping_ipv4()
205{
206 ping_test $h2 192.0.2.130
207}
208
209__run_uc_measure_rate()
210{
211 local what=$1; shift
212 local -a uc_rate
213
214 start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac
215 defer stop_traffic $!
216
217 uc_rate=($(measure_rate $swp2 $h3 rx_octets_prio_1 "$what"))
218 check_err $? "Could not get high enough $what ingress rate"
219
220 echo ${uc_rate[@]}
221}
222
223run_uc_measure_rate()
224{
225 in_defer_scope __run_uc_measure_rate "$@"
226}
227
228test_mc_aware()
229{
230 RET=0
231
232 local -a uc_rate=($(run_uc_measure_rate "UC-only"))
233 local ucth1=${uc_rate[1]}
234
235 start_traffic $h1 192.0.2.65 bc bc
236 defer stop_traffic $!
237
238 local d0=$(date +%s)
239 local t0=$(ethtool_stats_get $h3 rx_octets_prio_0)
240 local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0)
241
242 local -a uc_rate_2=($(run_uc_measure_rate "UC+MC"))
243 local ucth2=${uc_rate_2[1]}
244
245 local d1=$(date +%s)
246 local t1=$(ethtool_stats_get $h3 rx_octets_prio_0)
247 local u1=$(ethtool_stats_get $swp1 rx_octets_prio_0)
248
249 local deg=$(bc <<< "
250 scale=2
251 ret = 100 * ($ucth1 - $ucth2) / $ucth1
252 if (ret > 0) { ret } else { 0 }
253 ")
254
255 # Minimum shaper of 200Mbps on MC TCs should cause about 20% of
256 # degradation on 1Gbps link.
257 check_err $(bc <<< "$deg < 15") "Minimum shaper not in effect"
258 check_err $(bc <<< "$deg > 25") "MC traffic degrades UC performance too much"
259
260 local interval=$((d1 - d0))
261 local mc_ir=$(rate $u0 $u1 $interval)
262 local mc_er=$(rate $t0 $t1 $interval)
263
264 log_test "UC performance under MC overload"
265
266 echo "UC-only throughput $(humanize $ucth1)"
267 echo "UC+MC throughput $(humanize $ucth2)"
268 echo "Degradation $deg %"
269 echo
270 echo "Full report:"
271 echo " UC only:"
272 echo " ingress UC throughput $(humanize ${uc_rate[0]})"
273 echo " egress UC throughput $(humanize ${uc_rate[1]})"
274 echo " UC+MC:"
275 echo " ingress UC throughput $(humanize ${uc_rate_2[0]})"
276 echo " egress UC throughput $(humanize ${uc_rate_2[1]})"
277 echo " ingress MC throughput $(humanize $mc_ir)"
278 echo " egress MC throughput $(humanize $mc_er)"
279 echo
280}
281
282test_uc_aware()
283{
284 RET=0
285
286 start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac
287 defer stop_traffic $!
288
289 local d0=$(date +%s)
290 local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
291 local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
292 sleep 1
293
294 local attempts=50
295 local passes=0
296 local i
297
298 for ((i = 0; i < attempts; ++i)); do
299 if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 1; then
300 ((passes++))
301 fi
302
303 sleep 0.1
304 done
305
306 local d1=$(date +%s)
307 local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
308 local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)
309
310 local interval=$((d1 - d0))
311 local uc_ir=$(rate $u0 $u1 $interval)
312 local uc_er=$(rate $t0 $t1 $interval)
313
314 ((attempts == passes))
315 check_err $?
316
317 log_test "MC performance under UC overload"
318 echo " ingress UC throughput $(humanize ${uc_ir})"
319 echo " egress UC throughput $(humanize ${uc_er})"
320 echo " sent $attempts BC ARPs, got $passes responses"
321}
322
323trap cleanup EXIT
324
325setup_prepare
326setup_wait
327
328tests_run
329
330exit $EXIT_STATUS