Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

selftests: drv-net: test drivers sleeping in ndo_get_stats64

Most of our tests use rtnetlink to read device stats, so they
don't expose the drivers much to paths in which device stats
are read under RCU. Add tests which hammer profcs reads to
make sure drivers:
- don't sleep while reporting stats,
- can handle parallel reads,
- can handle device going down while reading.

Set ifname on the env class in NetDrvEnv, we already do that
in NetDrvEpEnv.

KTAP version 1
1..7
ok 1 stats.check_pause
ok 2 stats.check_fec
ok 3 stats.pkt_byte_sum
ok 4 stats.qstat_by_ifindex
ok 5 stats.check_down
ok 6 stats.procfs_hammer
# completed up/down cycles: 6
ok 7 stats.procfs_downup_hammer
# Totals: pass:7 fail:0 xfail:0 xpass:0 skip:0 error:0

Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20250107022932.2087744-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+97 -3
+1
tools/testing/selftests/drivers/net/lib/py/env.py
··· 48 48 else: 49 49 self._ns = NetdevSimDev(**kwargs) 50 50 self.dev = self._ns.nsims[0].dev 51 + self.ifname = self.dev['ifname'] 51 52 self.ifindex = self.dev['ifindex'] 52 53 53 54 def __enter__(self):
+91 -3
tools/testing/selftests/drivers/net/stats.py
··· 2 2 # SPDX-License-Identifier: GPL-2.0 3 3 4 4 import errno 5 + import subprocess 6 + import time 5 7 from lib.py import ksft_run, ksft_exit, ksft_pr 6 - from lib.py import ksft_ge, ksft_eq, ksft_in, ksft_true, ksft_raises, KsftSkipEx, KsftXfailEx 8 + from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises 9 + from lib.py import KsftSkipEx, KsftXfailEx 7 10 from lib.py import ksft_disruptive 8 11 from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError 9 12 from lib.py import NetDrvEnv 10 - from lib.py import ip, defer 13 + from lib.py import cmd, ip, defer 11 14 12 15 ethnl = EthtoolFamily() 13 16 netfam = NetdevFamily() ··· 177 174 netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True) 178 175 179 176 177 + def __run_inf_loop(body): 178 + body = body.strip() 179 + if body[-1] != ';': 180 + body += ';' 181 + 182 + return subprocess.Popen(f"while true; do {body} done", shell=True, 183 + stdout=subprocess.PIPE, stderr=subprocess.PIPE) 184 + 185 + 186 + def __stats_increase_sanely(old, new) -> None: 187 + for k in old.keys(): 188 + ksft_ge(new[k], old[k]) 189 + ksft_lt(new[k] - old[k], 1 << 31, comment="likely wrapping error") 190 + 191 + 192 + def procfs_hammer(cfg) -> None: 193 + """ 194 + Reading stats via procfs only holds the RCU lock, which is not an exclusive 195 + lock, make sure drivers can handle parallel reads of stats. 196 + """ 197 + one = __run_inf_loop("cat /proc/net/dev") 198 + defer(one.kill) 199 + two = __run_inf_loop("cat /proc/net/dev") 200 + defer(two.kill) 201 + 202 + time.sleep(1) 203 + # Make sure the processes are running 204 + ksft_is(one.poll(), None) 205 + ksft_is(two.poll(), None) 206 + 207 + rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] 208 + time.sleep(2) 209 + rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] 210 + __stats_increase_sanely(rtstat1, rtstat2) 211 + # defers will kill the loops 212 + 213 + 214 + @ksft_disruptive 215 + def procfs_downup_hammer(cfg) -> None: 216 + """ 217 + Reading stats via procfs only holds the RCU lock, drivers often try 218 + to sleep when reading the stats, or don't protect against races. 219 + """ 220 + # Max out the queues, we'll flip between max and 1 221 + channels = ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) 222 + if channels['combined-count'] == 0: 223 + rx_type = 'rx' 224 + else: 225 + rx_type = 'combined' 226 + cur_queue_cnt = channels[f'{rx_type}-count'] 227 + max_queue_cnt = channels[f'{rx_type}-max'] 228 + 229 + cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}") 230 + defer(cmd, f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}") 231 + 232 + # Real test stats 233 + stats = __run_inf_loop("cat /proc/net/dev") 234 + defer(stats.kill) 235 + 236 + ipset = f"ip link set dev {cfg.ifname}" 237 + defer(ip, f"link set dev {cfg.ifname} up") 238 + # The "echo -n 1" lets us count iterations below 239 + updown = f"{ipset} down; sleep 0.05; {ipset} up; sleep 0.05; " + \ 240 + f"ethtool -L {cfg.ifname} {rx_type} 1; " + \ 241 + f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}; " + \ 242 + "echo -n 1" 243 + updown = __run_inf_loop(updown) 244 + kill_updown = defer(updown.kill) 245 + 246 + time.sleep(1) 247 + # Make sure the processes are running 248 + ksft_is(stats.poll(), None) 249 + ksft_is(updown.poll(), None) 250 + 251 + rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] 252 + # We're looking for crashes, give it extra time 253 + time.sleep(9) 254 + rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] 255 + __stats_increase_sanely(rtstat1, rtstat2) 256 + 257 + kill_updown.exec() 258 + stdout, _ = updown.communicate(timeout=5) 259 + ksft_pr("completed up/down cycles:", len(stdout.decode('utf-8'))) 260 + 261 + 180 262 def main() -> None: 181 263 with NetDrvEnv(__file__, queue_count=100) as cfg: 182 264 ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex, 183 - check_down], 265 + check_down, procfs_hammer, procfs_downup_hammer], 184 266 args=(cfg, )) 185 267 ksft_exit() 186 268
+5
tools/testing/selftests/net/lib/py/ksft.py
··· 71 71 _fail("Check failed", a, "not in", b, comment) 72 72 73 73 74 + def ksft_is(a, b, comment=""): 75 + if a is not b: 76 + _fail("Check failed", a, "is not", b, comment) 77 + 78 + 74 79 def ksft_ge(a, b, comment=""): 75 80 if a < b: 76 81 _fail("Check failed", a, "<", b, comment)