Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3
4"""
5Devlink Rate TC Bandwidth Test Suite
6===================================
7
8This test suite verifies the functionality of devlink-rate traffic class (TC)
9bandwidth distribution in a virtualized environment. The tests validate that
10bandwidth can be properly allocated between different traffic classes and
11that TC mapping works as expected.
12
13Test Environment:
14----------------
15- Creates 1 VF
16- Establishes a bridge connecting the VF representor and the uplink representor
17- Sets up 2 VLAN interfaces on the VF with different VLAN IDs (101, 102)
18- Configures different traffic classes (TC3 and TC4) for each VLAN
19
20Test Cases:
21----------
221. test_no_tc_mapping_bandwidth:
23 - Verifies that without TC mapping, bandwidth is NOT distributed according to
24 the configured 20/80 split between TC3 and TC4
25 - This test should fail if bandwidth matches the 20/80 split without TC
26 mapping
27 - Expected: Bandwidth should NOT be distributed as 20/80
28
292. test_tc_mapping_bandwidth:
30 - Configures TC mapping using mqprio qdisc
31 - Verifies that with TC mapping, bandwidth IS distributed according to the
32 configured 20/80 split between TC3 and TC4
33 - Expected: Bandwidth should be distributed as 20/80
34
35Bandwidth Distribution:
36----------------------
37- TC3 (VLAN 101): Configured for 20% of total bandwidth
38- TC4 (VLAN 102): Configured for 80% of total bandwidth
39- Total bandwidth: 1Gbps
40- Tolerance: +-12%
41
42Hardware-Specific Behavior (mlx5):
43--------------------------
44mlx5 hardware enforces traffic class separation by ensuring that each transmit
45queue (SQ) is associated with a single TC. If a packet is sent on a queue that
46doesn't match the expected TC (based on DSCP or VLAN priority and hypervisor-set
47mapping), the hardware moves the queue to the correct TC scheduler to preserve
48traffic isolation.
49
50This behavior means that even without explicit TC-to-queue mapping, bandwidth
51enforcement may still appear to work—because the hardware dynamically adjusts
52the scheduling context. However, this can lead to performance issues in high
53rates and HOL blocking if traffic from different TCs is mixed on the same queue.
54"""
55
56import json
57import os
58import subprocess
59import threading
60import time
61
62from lib.py import ksft_pr, ksft_run, ksft_exit
63from lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
64from lib.py import NetDrvEpEnv, DevlinkFamily
65from lib.py import NlError
66from lib.py import cmd, defer, ethtool, ip
67from lib.py import Iperf3Runner
68
69
70class BandwidthValidator:
71 """
72 Validates total bandwidth and individual shares with tolerance
73 relative to the overall total.
74 """
75
76 def __init__(self, shares):
77 self.tolerance_percent = 12
78 self.expected_total = sum(shares.values())
79 self.bounds = {}
80
81 for name, exp in shares.items():
82 self.bounds[name] = (self.min_expected(exp), self.max_expected(exp))
83
84 def min_expected(self, value):
85 """Calculates the minimum acceptable value based on tolerance."""
86 return value - (self.expected_total * self.tolerance_percent / 100)
87
88 def max_expected(self, value):
89 """Calculates the maximum acceptable value based on tolerance."""
90 return value + (self.expected_total * self.tolerance_percent / 100)
91
92 def bound(self, values):
93 """
94 Return True if all given values fall within tolerance.
95 """
96 for name, value in values.items():
97 low, high = self.bounds[name]
98 if not low <= value <= high:
99 return False
100 return True
101
102
103def setup_vf(cfg, set_tc_mapping=True):
104 """
105 Sets up a VF on the given network interface.
106
107 Enables SR-IOV and switchdev mode, brings the VF interface up,
108 and optionally configures TC mapping using mqprio.
109 """
110 try:
111 cmd(f"devlink dev eswitch set pci/{cfg.pci} mode switchdev")
112 defer(cmd, f"devlink dev eswitch set pci/{cfg.pci} mode legacy")
113 except Exception as exc:
114 raise KsftSkipEx(f"Failed to enable switchdev mode on {cfg.pci}") from exc
115 try:
116 cmd(f"echo 1 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs", shell=True)
117 defer(cmd, f"echo 0 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs", shell=True)
118 except Exception as exc:
119 raise KsftSkipEx(f"Failed to enable SR-IOV on {cfg.ifname}") from exc
120
121 time.sleep(2)
122 vf_ifc = (os.listdir(
123 f"/sys/class/net/{cfg.ifname}/device/virtfn0/net") or [None])[0]
124 if vf_ifc:
125 ip(f"link set dev {vf_ifc} up")
126 else:
127 raise KsftSkipEx("VF interface not found")
128 if set_tc_mapping:
129 cmd(f"tc qdisc add dev {vf_ifc} root handle 5 mqprio mode dcb hw 1 num_tc 8")
130
131 return vf_ifc
132
133
134def setup_vlans_on_vf(vf_ifc):
135 """
136 Sets up two VLAN interfaces on the given VF, each mapped to a different TC.
137 """
138 vlan_configs = [
139 {"vlan_id": 101, "tc": 3, "ip": "198.51.100.1"},
140 {"vlan_id": 102, "tc": 4, "ip": "198.51.100.9"},
141 ]
142
143 for config in vlan_configs:
144 vlan_dev = f"{vf_ifc}.{config['vlan_id']}"
145 ip(f"link add link {vf_ifc} name {vlan_dev} type vlan id {config['vlan_id']}")
146 ip(f"addr add {config['ip']}/29 dev {vlan_dev}")
147 ip(f"link set dev {vlan_dev} up")
148 ip(f"link set dev {vlan_dev} type vlan egress-qos-map 0:{config['tc']}")
149 ksft_pr(f"Created VLAN {vlan_dev} on {vf_ifc} with tc {config['tc']} and IP {config['ip']}")
150
151
152def get_vf_info(cfg):
153 """
154 Finds the VF representor interface and devlink port index
155 for the given PCI device used in the test environment.
156 """
157 cfg.vf_representor = None
158 cfg.vf_port_index = None
159 out = subprocess.check_output(["devlink", "-j", "port", "show"], encoding="utf-8")
160 ports = json.loads(out)["port"]
161
162 for port_name, props in ports.items():
163 netdev = props.get("netdev")
164
165 if (port_name.startswith(f"pci/{cfg.pci}/") and
166 props.get("vfnum") == 0):
167 cfg.vf_representor = netdev
168 cfg.vf_port_index = int(port_name.split("/")[-1])
169 break
170
171
172def setup_bridge(cfg):
173 """
174 Creates and configures a Linux bridge, with both the uplink
175 and VF representor interfaces attached to it.
176 """
177 bridge_name = f"br_{os.getpid()}"
178 ip(f"link add name {bridge_name} type bridge")
179 defer(cmd, f"ip link del name {bridge_name} type bridge")
180
181 ip(f"link set dev {cfg.ifname} master {bridge_name}")
182
183 rep_name = cfg.vf_representor
184 if rep_name:
185 ip(f"link set dev {rep_name} master {bridge_name}")
186 ip(f"link set dev {rep_name} up")
187 ksft_pr(f"Set representor {rep_name} up and added to bridge")
188 else:
189 raise KsftSkipEx("Could not find representor for the VF")
190
191 ip(f"link set dev {bridge_name} up")
192
193
194def setup_devlink_rate(cfg):
195 """
196 Configures devlink rate tx_max and traffic class bandwidth for the VF.
197 """
198 port_index = cfg.vf_port_index
199 if port_index is None:
200 raise KsftSkipEx("Could not find VF port index")
201 try:
202 cfg.devnl.rate_set({
203 "bus-name": "pci",
204 "dev-name": cfg.pci,
205 "port-index": port_index,
206 "rate-tx-max": 125000000,
207 "rate-tc-bws": [
208 {"index": 0, "bw": 0},
209 {"index": 1, "bw": 0},
210 {"index": 2, "bw": 0},
211 {"index": 3, "bw": 20},
212 {"index": 4, "bw": 80},
213 {"index": 5, "bw": 0},
214 {"index": 6, "bw": 0},
215 {"index": 7, "bw": 0},
216 ]
217 })
218 except NlError as exc:
219 if exc.error == 95: # EOPNOTSUPP
220 raise KsftSkipEx("devlink rate configuration is not supported on the VF") from exc
221 raise KsftFailEx(f"rate_set failed on VF port {port_index}") from exc
222
223
224def setup_remote_vlans(cfg):
225 """
226 Sets up VLAN interfaces on the remote side.
227 """
228 remote_dev = cfg.remote_ifname
229 vlan_ids = [101, 102]
230 remote_ips = ["198.51.100.2", "198.51.100.10"]
231
232 for vlan_id, ip_addr in zip(vlan_ids, remote_ips):
233 vlan_dev = f"{remote_dev}.{vlan_id}"
234 cmd(f"ip link add link {remote_dev} name {vlan_dev} "
235 f"type vlan id {vlan_id}", host=cfg.remote)
236 cmd(f"ip addr add {ip_addr}/29 dev {vlan_dev}", host=cfg.remote)
237 cmd(f"ip link set dev {vlan_dev} up", host=cfg.remote)
238 defer(cmd, f"ip link del {vlan_dev}", host=cfg.remote)
239
240
241def setup_test_environment(cfg, set_tc_mapping=True):
242 """
243 Sets up the complete test environment including VF creation, VLANs,
244 bridge configuration and devlink rate setup.
245 """
246 vf_ifc = setup_vf(cfg, set_tc_mapping)
247 ksft_pr(f"Created VF interface: {vf_ifc}")
248
249 setup_vlans_on_vf(vf_ifc)
250
251 get_vf_info(cfg)
252 setup_bridge(cfg)
253
254 setup_devlink_rate(cfg)
255 setup_remote_vlans(cfg)
256
257
258def measure_bandwidth(cfg, server_ip, client_ip, barrier):
259 """
260 Synchronizes with peers and runs an iperf3-based bandwidth measurement
261 between the given endpoints. Returns average Gbps.
262 """
263 runner = Iperf3Runner(cfg, server_ip=server_ip, client_ip=client_ip)
264 try:
265 barrier.wait(timeout=10)
266 except Exception as exc:
267 raise KsftFailEx("iperf3 barrier wait timed") from exc
268
269 try:
270 bw_gbps = runner.measure_bandwidth(reverse=True)
271 except Exception as exc:
272 raise KsftFailEx("iperf3 bandwidth measurement failed") from exc
273
274 return bw_gbps
275
276
277def run_bandwidth_test(cfg):
278 """
279 Runs parallel bandwidth measurements for each VLAN/TC pair and collects results.
280 """
281 def _run_measure_bandwidth_thread(local_ip, remote_ip, results, barrier, tc_ix):
282 results[tc_ix] = measure_bandwidth(cfg, local_ip, remote_ip, barrier)
283
284 vf_vlan_data = [
285 # (local_ip, remote_ip, TC)
286 ("198.51.100.1", "198.51.100.2", 3),
287 ("198.51.100.9", "198.51.100.10", 4),
288 ]
289
290 results = {}
291 threads = []
292 start_barrier = threading.Barrier(len(vf_vlan_data))
293
294 for local_ip, remote_ip, tc_ix in vf_vlan_data:
295 thread = threading.Thread(
296 target=_run_measure_bandwidth_thread,
297 args=(local_ip, remote_ip, results, start_barrier, tc_ix)
298 )
299 thread.start()
300 threads.append(thread)
301
302 for thread in threads:
303 thread.join()
304
305 for tc_ix, tc_bw in results.items():
306 if tc_bw is None:
307 raise KsftFailEx("iperf3 failed; cannot evaluate bandwidth")
308
309 return results
310
311
312def calculate_bandwidth_percentages(results):
313 """
314 Calculates the percentage of total bandwidth received by TC3 and TC4.
315 """
316 if 3 not in results or 4 not in results:
317 raise KsftFailEx(f"Missing expected TC results in {results}")
318
319 tc3_bw = results[3]
320 tc4_bw = results[4]
321 total_bw = tc3_bw + tc4_bw
322 tc3_percentage = (tc3_bw / total_bw) * 100
323 tc4_percentage = (tc4_bw / total_bw) * 100
324
325 return {
326 'tc3_bw': tc3_bw,
327 'tc4_bw': tc4_bw,
328 'tc3_percentage': tc3_percentage,
329 'tc4_percentage': tc4_percentage,
330 'total_bw': total_bw
331 }
332
333
334def print_bandwidth_results(bw_data, test_name):
335 """
336 Prints bandwidth measurements and TC usage summary for a given test.
337 """
338 ksft_pr(f"Bandwidth check results {test_name}:")
339 ksft_pr(f"TC 3: {bw_data['tc3_bw']:.2f} Gbits/sec")
340 ksft_pr(f"TC 4: {bw_data['tc4_bw']:.2f} Gbits/sec")
341 ksft_pr(f"Total bandwidth: {bw_data['total_bw']:.2f} Gbits/sec")
342 ksft_pr(f"TC 3 percentage: {bw_data['tc3_percentage']:.1f}%")
343 ksft_pr(f"TC 4 percentage: {bw_data['tc4_percentage']:.1f}%")
344
345
346def verify_total_bandwidth(bw_data, validator):
347 """
348 Ensures the total measured bandwidth falls within the acceptable tolerance.
349 """
350 total = bw_data['total_bw']
351
352 if validator.bound({"total": total}):
353 return
354
355 low, high = validator.bounds["total"]
356
357 if total < low:
358 raise KsftSkipEx(
359 f"Total bandwidth {total:.2f} Gbps < minimum "
360 f"{low:.2f} Gbps; "
361 f"parent tx_max ({validator.expected_total:.1f} G) "
362 f"not reached, cannot validate share"
363 )
364
365 raise KsftFailEx(
366 f"Total bandwidth {total:.2f} Gbps exceeds allowed ceiling "
367 f"{high:.2f} Gbps "
368 f"(VF tx_max set to {validator.expected_total:.1f} G)"
369 )
370
371
372def run_bandwidth_distribution_test(cfg, set_tc_mapping):
373 """
374 Runs parallel bandwidth measurements for both TCs and collects results.
375 """
376 setup_test_environment(cfg, set_tc_mapping)
377 bandwidths = run_bandwidth_test(cfg)
378 bw_data = calculate_bandwidth_percentages(bandwidths)
379 test_name = "with TC mapping" if set_tc_mapping else "without TC mapping"
380 print_bandwidth_results(bw_data, test_name)
381
382 verify_total_bandwidth(bw_data, cfg.traffic_bw_validator)
383
384 return cfg.tc_bw_validator.bound({"tc3": bw_data['tc3_percentage'],
385 "tc4": bw_data['tc4_percentage']})
386
387
388def test_no_tc_mapping_bandwidth(cfg):
389 """
390 Verifies that bandwidth is not split 20/80 without traffic class mapping.
391 """
392 pass_bw_msg = "Bandwidth is NOT distributed as 20/80 without TC mapping"
393 fail_bw_msg = "Bandwidth matched 20/80 split without TC mapping"
394 is_mlx5 = "driver: mlx5" in ethtool(f"-i {cfg.ifname}").stdout
395
396 if run_bandwidth_distribution_test(cfg, set_tc_mapping=False):
397 if is_mlx5:
398 raise KsftXfailEx(fail_bw_msg)
399 raise KsftFailEx(fail_bw_msg)
400 if is_mlx5:
401 raise KsftFailEx("mlx5 behavior changed:" + pass_bw_msg)
402 ksft_pr(pass_bw_msg)
403
404
405def test_tc_mapping_bandwidth(cfg):
406 """
407 Verifies that bandwidth is correctly split 20/80 between TC3 and TC4
408 when traffic class mapping is set.
409 """
410 if run_bandwidth_distribution_test(cfg, set_tc_mapping=True):
411 ksft_pr("Bandwidth is distributed as 20/80 with TC mapping")
412 else:
413 raise KsftFailEx("Bandwidth did not match 20/80 split with TC mapping")
414
415
416def main() -> None:
417 """
418 Main entry point for running the test cases.
419 """
420 with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
421 cfg.devnl = DevlinkFamily()
422
423 cfg.pci = os.path.basename(
424 os.path.realpath(f"/sys/class/net/{cfg.ifname}/device")
425 )
426 if not cfg.pci:
427 raise KsftSkipEx("Could not get PCI address of the interface")
428
429 cfg.traffic_bw_validator = BandwidthValidator({"total": 1})
430 cfg.tc_bw_validator = BandwidthValidator({"tc3": 20, "tc4": 80})
431
432 cases = [test_no_tc_mapping_bandwidth, test_tc_mapping_bandwidth]
433
434 ksft_run(cases=cases, args=(cfg,))
435 ksft_exit()
436
437
438if __name__ == "__main__":
439 main()