Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

docs: kdoc_diff: add a helper tool to help checking kdoc regressions

Checking for regressions at kernel-doc can be hard. Add a helper
tool to make such task easier.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <24b3116a78348b13a74d1ff5e141160ef9705dd3.1774551940.git.mchehab+huawei@kernel.org>

authored by

Mauro Carvalho Chehab and committed by
Jonathan Corbet
210a923a 07f6cb18

+508
+508
tools/docs/kdoc_diff
··· 1 + #!/usr/bin/env python3 2 + # SPDX-License-Identifier: GPL-2.0 3 + # Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@kernel.org>. 4 + # 5 + # pylint: disable=R0903,R0912,R0913,R0914,R0915,R0917 6 + 7 + """ 8 + docdiff - Check differences between kernel‑doc output between two different 9 + commits. 10 + 11 + Examples 12 + -------- 13 + 14 + Compare the kernel‑doc output between the last two 5.15 releases:: 15 + 16 + $ kdoc_diff v6.18..v6.19 17 + 18 + Both outputs are cached 19 + 20 + Force a complete documentation scan and clean any previous cache from 21 + 6.19 to the current HEAD:: 22 + 23 + $ kdoc_diff 6.19.. --full --clean 24 + 25 + Check differences only on a single driver since origin/main:: 26 + 27 + $ kdoc_diff origin/main drivers/media 28 + 29 + Generate an YAML file and use it to check for regressions:: 30 + 31 + $ kdoc_diff HEAD~ drivers/media --regression 32 + 33 + 34 + """ 35 + 36 + import os 37 + import sys 38 + import argparse 39 + import subprocess 40 + import shutil 41 + import re 42 + import signal 43 + 44 + from glob import iglob 45 + 46 + 47 + SRC_DIR = os.path.dirname(os.path.realpath(__file__)) 48 + WORK_DIR = os.path.abspath(os.path.join(SRC_DIR, "../..")) 49 + 50 + KDOC_BINARY = os.path.join(SRC_DIR, "kernel-doc") 51 + KDOC_PARSER_TEST = os.path.join(WORK_DIR, "tools/unittests/test_kdoc_parser.py") 52 + 53 + CACHE_DIR = ".doc_diff_cache" 54 + YAML_NAME = "out.yaml" 55 + 56 + DIR_NAME = { 57 + "full": os.path.join(CACHE_DIR, "full"), 58 + "partial": os.path.join(CACHE_DIR, "partial"), 59 + "no-cache": os.path.join(CACHE_DIR, "no_cache"), 60 + "tmp": os.path.join(CACHE_DIR, "__tmp__"), 61 + } 62 + 63 + class GitHelper: 64 + """Handles all Git operations""" 65 + 66 + def __init__(self, work_dir=None): 67 + self.work_dir = work_dir 68 + 69 + def is_inside_repository(self): 70 + """Check if we're inside a Git repository""" 71 + try: 72 + output = subprocess.check_output(["git", "rev-parse", 73 + "--is-inside-work-tree"], 74 + cwd=self.work_dir, 75 + stderr=subprocess.STDOUT, 76 + universal_newlines=True) 77 + 78 + return output.strip() == "true" 79 + except subprocess.CalledProcessError: 80 + return False 81 + 82 + def is_valid_commit(self, commit_hash): 83 + """ 84 + Validate that a ref (branch, tag, commit hash, etc.) can be 85 + resolved to a commit. 86 + """ 87 + try: 88 + subprocess.check_output(["git", "rev-parse", commit_hash], 89 + cwd=self.work_dir, 90 + stderr=subprocess.STDOUT) 91 + return True 92 + except subprocess.CalledProcessError: 93 + return False 94 + 95 + def get_short_hash(self, commit_hash): 96 + """Get short commit hash""" 97 + try: 98 + return subprocess.check_output(["git", "rev-parse", "--short", 99 + commit_hash], 100 + cwd=self.work_dir, 101 + stderr=subprocess.STDOUT, 102 + universal_newlines=True).strip() 103 + except subprocess.CalledProcessError: 104 + return "" 105 + 106 + def has_uncommitted_changes(self): 107 + """Check for uncommitted changes""" 108 + try: 109 + subprocess.check_output(["git", "diff-index", 110 + "--quiet", "HEAD", "--"], 111 + cwd=self.work_dir, 112 + stderr=subprocess.STDOUT) 113 + return False 114 + except subprocess.CalledProcessError: 115 + return True 116 + 117 + def get_current_branch(self): 118 + """Get current branch name""" 119 + return subprocess.check_output(["git", "branch", "--show-current"], 120 + cwd=self.work_dir, 121 + universal_newlines=True).strip() 122 + 123 + def checkout_commit(self, commit_hash, quiet=True): 124 + """Checkout a commit safely""" 125 + args = ["git", "checkout", "-f"] 126 + if quiet: 127 + args.append("-q") 128 + args.append(commit_hash) 129 + try: 130 + subprocess.check_output(args, cwd=self.work_dir, 131 + stderr=subprocess.STDOUT) 132 + 133 + # Double-check if branch actually switched 134 + branch = self.get_short_hash("HEAD") 135 + if commit_hash != branch: 136 + raise RuntimeError(f"Branch changed to '{branch}' instead of '{commit_hash}'") 137 + 138 + return True 139 + except subprocess.CalledProcessError as e: 140 + print(f"ERROR: Failed to checkout {commit_hash}: {e}", 141 + file=sys.stderr) 142 + return False 143 + 144 + 145 + class CacheManager: 146 + """Manages persistent cache directories""" 147 + 148 + def __init__(self, work_dir): 149 + self.work_dir = work_dir 150 + 151 + def initialize(self): 152 + """Create cache directories if they don't exist""" 153 + for dir_path in DIR_NAME.values(): 154 + abs_path = os.path.join(self.work_dir, dir_path) 155 + if not os.path.exists(abs_path): 156 + os.makedirs(abs_path, exist_ok=True, mode=0o755) 157 + 158 + def get_commit_cache(self, commit_hash, path): 159 + """Generate cache path for a commit""" 160 + hash_short = GitHelper(self.work_dir).get_short_hash(commit_hash) 161 + if not hash_short: 162 + hash_short = commit_hash 163 + 164 + return os.path.join(path, hash_short) 165 + 166 + class KernelDocRunner: 167 + """Runs kernel-doc documentation generator""" 168 + 169 + def __init__(self, work_dir, kdoc_binary): 170 + self.work_dir = work_dir 171 + self.kdoc_binary = kdoc_binary 172 + self.kdoc_files = None 173 + 174 + def find_kdoc_references(self): 175 + """Find all files marked with kernel-doc:: directives""" 176 + if self.kdoc_files: 177 + print("Using cached Kdoc refs") 178 + return self.kdoc_files 179 + 180 + print("Finding kernel-doc entries in Documentation...") 181 + 182 + files = os.path.join(self.work_dir, 'Documentation/**/*.rst') 183 + pattern = re.compile(r"^\.\.\s+kernel-doc::\s*(\S+)") 184 + kdoc_files = set() 185 + 186 + for file_path in iglob(files, recursive=True): 187 + try: 188 + with open(file_path, 'r', encoding='utf-8') as fp: 189 + for line in fp: 190 + match = pattern.match(line.strip()) 191 + if match: 192 + kdoc_files.add(match.group(1)) 193 + 194 + except OSError: 195 + continue 196 + 197 + self.kdoc_files = list(kdoc_files) 198 + 199 + return self.kdoc_files 200 + 201 + def gen_yaml(self, yaml_file, kdoc_files): 202 + """Runs kernel-doc to generate a yaml file with man and rst.""" 203 + cmd = [self.kdoc_binary, "--man", "--rst", "--yaml", yaml_file] 204 + cmd += kdoc_files 205 + 206 + print(f"YAML regression test file will be stored at: {yaml_file}") 207 + 208 + try: 209 + subprocess.check_call(cmd, cwd=self.work_dir, 210 + stdout=subprocess.DEVNULL, 211 + stderr=subprocess.DEVNULL) 212 + except subprocess.CalledProcessError: 213 + return False 214 + 215 + return True 216 + 217 + def run_unittest(self, yaml_file): 218 + """Run unit tests with the generated yaml file""" 219 + cmd = [KDOC_PARSER_TEST, "-q", "--yaml", yaml_file] 220 + result = subprocess.run(cmd, cwd=self.work_dir) 221 + 222 + if result.returncode: 223 + print("To check for problems, try to run it again with -v\n") 224 + print("Use -k <regex> to filter results\n\n\t$", end="") 225 + print(" ".join(cmd) + "\n") 226 + 227 + return True 228 + 229 + def normal_run(self, tmp_dir, output_dir, kdoc_files): 230 + """Generate man, rst and errors, storing them at tmp_dir.""" 231 + os.makedirs(tmp_dir, exist_ok=True) 232 + 233 + try: 234 + with open(os.path.join(tmp_dir, "man.log"), "w", encoding="utf-8") as out: 235 + subprocess.check_call([self.kdoc_binary, "--man"] + kdoc_files, 236 + cwd=self.work_dir, 237 + stdout=out, stderr=subprocess.DEVNULL) 238 + 239 + with open(os.path.join(tmp_dir, "rst.log"), "w", encoding="utf-8") as out: 240 + with open(os.path.join(tmp_dir, "err.log"), "w", encoding="utf-8") as err: 241 + subprocess.check_call([self.kdoc_binary, "--rst"] + kdoc_files, 242 + cwd=self.work_dir, 243 + stdout=out, stderr=err) 244 + except subprocess.CalledProcessError: 245 + return False 246 + 247 + if output_dir: 248 + os.replace(tmp_dir, output_dir) 249 + 250 + return True 251 + 252 + def run(self, commit_hash, tmp_dir, output_dir, kdoc_files, is_regression, 253 + is_end): 254 + """Run kernel-doc on its several ways""" 255 + if not kdoc_files: 256 + raise RuntimeError("No kernel-doc references found") 257 + 258 + git_helper = GitHelper(self.work_dir) 259 + if not git_helper.checkout_commit(commit_hash, quiet=True): 260 + raise RuntimeError(f"ERROR: can't checkout commit {commit_hash}") 261 + 262 + print(f"Processing {commit_hash}...") 263 + 264 + if not is_regression: 265 + return self.normal_run(tmp_dir, output_dir, kdoc_files) 266 + 267 + yaml_file = os.path.join(tmp_dir, YAML_NAME) 268 + 269 + if not is_end: 270 + return self.gen_yaml(yaml_file, kdoc_files) 271 + 272 + return self.run_unittest(yaml_file) 273 + 274 + class DiffManager: 275 + """Compare documentation output directories with an external diff.""" 276 + def __init__(self, diff_tool="diff", diff_args=None): 277 + self.diff_tool = diff_tool 278 + # default: unified, no context, ignore whitespace changes 279 + self.diff_args = diff_args or ["-u0", "-w"] 280 + 281 + def diff_directories(self, dir1, dir2): 282 + """Compare two directories using an external diff.""" 283 + print(f"\nDiffing {dir1} and {dir2}:") 284 + 285 + dir1_files = set() 286 + dir2_files = set() 287 + has_diff = False 288 + 289 + for root, _, files in os.walk(dir1): 290 + for file in files: 291 + dir1_files.add(os.path.relpath(os.path.join(root, file), dir1)) 292 + for root, _, files in os.walk(dir2): 293 + for file in files: 294 + dir2_files.add(os.path.relpath(os.path.join(root, file), dir2)) 295 + 296 + common_files = sorted(dir1_files & dir2_files) 297 + for file in common_files: 298 + f1 = os.path.join(dir1, file) 299 + f2 = os.path.join(dir2, file) 300 + 301 + cmd = [self.diff_tool] + self.diff_args + [f1, f2] 302 + try: 303 + result = subprocess.run( 304 + cmd, capture_output=True, text=True, check=False 305 + ) 306 + if result.stdout: 307 + has_diff = True 308 + print(f"\n{file}") 309 + print(result.stdout, end="") 310 + except FileNotFoundError: 311 + print(f"ERROR: {self.diff_tool} not found") 312 + sys.exit(1) 313 + 314 + # Show files that exist only in one directory 315 + only_in_dir1 = dir1_files - dir2_files 316 + only_in_dir2 = dir2_files - dir1_files 317 + if only_in_dir1 or only_in_dir2: 318 + has_diff = True 319 + print("\nDifferential files:") 320 + for f in sorted(only_in_dir1): 321 + print(f" - {f} (only in {dir1})") 322 + for f in sorted(only_in_dir2): 323 + print(f" + {f} (only in {dir2})") 324 + 325 + if not has_diff: 326 + print("\nNo differences between those two commits") 327 + 328 + 329 + class SignalHandler(): 330 + """Signal handler class.""" 331 + 332 + def restore(self, force_exit=False): 333 + """Restore original HEAD state.""" 334 + if self.restored: 335 + return 336 + 337 + print(f"Restoring original branch: {self.original_head}") 338 + try: 339 + subprocess.check_call( 340 + ["git", "checkout", "-f", self.original_head], 341 + cwd=self.git_helper.work_dir, 342 + stderr=subprocess.STDOUT, 343 + ) 344 + except subprocess.CalledProcessError as e: 345 + print(f"Failed to restore: {e}", file=sys.stderr) 346 + 347 + for sig, handler in self.old_handler.items(): 348 + signal.signal(sig, handler) 349 + 350 + self.restored = True 351 + 352 + if force_exit: 353 + sys.exit(1) 354 + 355 + def signal_handler(self, sig, _): 356 + """Handle interrupt signals.""" 357 + print(f"\nSignal {sig} received. Restoring original state...") 358 + 359 + self.restore(force_exit=True) 360 + 361 + def __enter__(self): 362 + """Allow using it via with command.""" 363 + for sig in [signal.SIGINT, signal.SIGTERM]: 364 + self.old_handler[sig] = signal.getsignal(sig) 365 + signal.signal(sig, self.signal_handler) 366 + 367 + return self 368 + 369 + def __exit__(self, *args): 370 + """Restore signals at the end of with block.""" 371 + self.restore() 372 + 373 + def __init__(self, git_helper, original_head): 374 + self.git_helper = git_helper 375 + self.original_head = original_head 376 + self.old_handler = {} 377 + self.restored = False 378 + 379 + def parse_commit_range(value): 380 + """Handle a commit range.""" 381 + if ".." not in value: 382 + begin = value 383 + end = "HEAD" 384 + else: 385 + begin, _, end = value.partition("..") 386 + if not end: 387 + end = "HEAD" 388 + 389 + if not begin: 390 + raise argparse.ArgumentTypeError("Need a commit begginning") 391 + 392 + 393 + print(f"Range: {begin} to {end}") 394 + 395 + return begin, end 396 + 397 + 398 + def main(): 399 + """Main code""" 400 + parser = argparse.ArgumentParser(description="Compare kernel documentation between commits") 401 + parser.add_argument("commits", type=parse_commit_range, 402 + help="commit range like old..new") 403 + parser.add_argument("files", nargs="*", 404 + help="files to process – if supplied the --full flag is ignored") 405 + 406 + parser.add_argument("--full", "-f", action="store_true", 407 + help="Force a full scan of Documentation/*") 408 + 409 + parser.add_argument("--regression", "-r", action="store_true", 410 + help="Use YAML format to check for regressions") 411 + 412 + parser.add_argument("--work-dir", "-w", default=WORK_DIR, 413 + help="work dir (default: %(default)s)") 414 + 415 + parser.add_argument("--clean", "-c", action="store_true", 416 + help="Clean caches") 417 + 418 + args = parser.parse_args() 419 + 420 + if args.files and args.full: 421 + raise argparse.ArgumentError(args.full, 422 + "cannot combine '--full' with an explicit file list") 423 + 424 + work_dir = os.path.abspath(args.work_dir) 425 + 426 + # Initialize cache 427 + cache = CacheManager(work_dir) 428 + cache.initialize() 429 + 430 + # Validate git repository 431 + git_helper = GitHelper(work_dir) 432 + if not git_helper.is_inside_repository(): 433 + raise RuntimeError("Must run inside Git repository") 434 + 435 + old_commit, new_commit = args.commits 436 + 437 + old_commit = git_helper.get_short_hash(old_commit) 438 + new_commit = git_helper.get_short_hash(new_commit) 439 + 440 + # Validate commits 441 + for commit in [old_commit, new_commit]: 442 + if not git_helper.is_valid_commit(commit): 443 + raise RuntimeError(f"Commit '{commit}' does not exist") 444 + 445 + # Check for uncommitted changes 446 + if git_helper.has_uncommitted_changes(): 447 + raise RuntimeError("Uncommitted changes present. Commit or stash first.") 448 + 449 + runner = KernelDocRunner(git_helper.work_dir, KDOC_BINARY) 450 + 451 + # Get files to be parsed 452 + cache_msg = " (results will be cached)" 453 + if args.full: 454 + kdoc_files = ["."] 455 + diff_type = "full" 456 + print(f"Parsing all files at {work_dir}") 457 + if not args.files: 458 + diff_type = "partial" 459 + kdoc_files = runner.find_kdoc_references() 460 + print(f"Parsing files with kernel-doc markups at {work_dir}/Documentation") 461 + else: 462 + diff_type = "no-cache" 463 + cache_msg = "" 464 + kdoc_files = args.files 465 + 466 + tmp_dir = DIR_NAME["tmp"] 467 + out_path = DIR_NAME[diff_type] 468 + 469 + if not args.regression: 470 + print(f"Output will be stored at: {out_path}{cache_msg}") 471 + 472 + # Just in case - should never happen in practice 473 + if not kdoc_files: 474 + raise argparse.ArgumentError(args.files, 475 + "No kernel-doc references found") 476 + 477 + original_head = git_helper.get_current_branch() 478 + 479 + old_cache = cache.get_commit_cache(old_commit, out_path) 480 + new_cache = cache.get_commit_cache(new_commit, out_path) 481 + 482 + with SignalHandler(git_helper, original_head): 483 + if args.clean or diff_type == "no-cache": 484 + for cache_dir in [old_cache, new_cache]: 485 + if cache_dir and os.path.exists(cache_dir): 486 + shutil.rmtree(cache_dir) 487 + 488 + if args.regression or not os.path.exists(old_cache): 489 + old_success = runner.run(old_commit, tmp_dir, old_cache, kdoc_files, 490 + args.regression, False) 491 + else: 492 + old_success = True 493 + 494 + if args.regression or not os.path.exists(new_cache): 495 + new_success = runner.run(new_commit, tmp_dir, new_cache, kdoc_files, 496 + args.regression, True) 497 + else: 498 + new_success = True 499 + 500 + if not (old_success and new_success): 501 + raise RuntimeError("Failed to generate documentation") 502 + 503 + if not args.regression: 504 + diff_manager = DiffManager() 505 + diff_manager.diff_directories(old_cache, new_cache) 506 + 507 + if __name__ == "__main__": 508 + main()