Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3# Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@kernel.org>.
4#
5# pylint: disable=R0903,R0912,R0913,R0914,R0915,R0917
6
7"""
8docdiff - Check differences between kernel‑doc output between two different
9commits.
10
11Examples
12--------
13
14Compare the kernel‑doc output between the last two 5.15 releases::
15
16 $ kdoc_diff v6.18..v6.19
17
18Both outputs are cached
19
20Force a complete documentation scan and clean any previous cache from
216.19 to the current HEAD::
22
23 $ kdoc_diff 6.19.. --full --clean
24
25Check differences only on a single driver since origin/main::
26
27 $ kdoc_diff origin/main drivers/media
28
29Generate an YAML file and use it to check for regressions::
30
31 $ kdoc_diff HEAD~ drivers/media --regression
32
33
34"""
35
36import os
37import sys
38import argparse
39import subprocess
40import shutil
41import re
42import signal
43
44from glob import iglob
45
46
47SRC_DIR = os.path.dirname(os.path.realpath(__file__))
48WORK_DIR = os.path.abspath(os.path.join(SRC_DIR, "../.."))
49
50KDOC_BINARY = os.path.join(SRC_DIR, "kernel-doc")
51KDOC_PARSER_TEST = os.path.join(WORK_DIR, "tools/unittests/test_kdoc_parser.py")
52
53CACHE_DIR = ".doc_diff_cache"
54YAML_NAME = "out.yaml"
55
56DIR_NAME = {
57 "full": os.path.join(CACHE_DIR, "full"),
58 "partial": os.path.join(CACHE_DIR, "partial"),
59 "no-cache": os.path.join(CACHE_DIR, "no_cache"),
60 "tmp": os.path.join(CACHE_DIR, "__tmp__"),
61}
62
63class GitHelper:
64 """Handles all Git operations"""
65
66 def __init__(self, work_dir=None):
67 self.work_dir = work_dir
68
69 def is_inside_repository(self):
70 """Check if we're inside a Git repository"""
71 try:
72 output = subprocess.check_output(["git", "rev-parse",
73 "--is-inside-work-tree"],
74 cwd=self.work_dir,
75 stderr=subprocess.STDOUT,
76 universal_newlines=True)
77
78 return output.strip() == "true"
79 except subprocess.CalledProcessError:
80 return False
81
82 def is_valid_commit(self, commit_hash):
83 """
84 Validate that a ref (branch, tag, commit hash, etc.) can be
85 resolved to a commit.
86 """
87 try:
88 subprocess.check_output(["git", "rev-parse", commit_hash],
89 cwd=self.work_dir,
90 stderr=subprocess.STDOUT)
91 return True
92 except subprocess.CalledProcessError:
93 return False
94
95 def get_short_hash(self, commit_hash):
96 """Get short commit hash"""
97 try:
98 return subprocess.check_output(["git", "rev-parse", "--short",
99 commit_hash],
100 cwd=self.work_dir,
101 stderr=subprocess.STDOUT,
102 universal_newlines=True).strip()
103 except subprocess.CalledProcessError:
104 return ""
105
106 def has_uncommitted_changes(self):
107 """Check for uncommitted changes"""
108 try:
109 subprocess.check_output(["git", "diff-index",
110 "--quiet", "HEAD", "--"],
111 cwd=self.work_dir,
112 stderr=subprocess.STDOUT)
113 return False
114 except subprocess.CalledProcessError:
115 return True
116
117 def get_current_branch(self):
118 """Get current branch name"""
119 return subprocess.check_output(["git", "branch", "--show-current"],
120 cwd=self.work_dir,
121 universal_newlines=True).strip()
122
123 def checkout_commit(self, commit_hash, quiet=True):
124 """Checkout a commit safely"""
125 args = ["git", "checkout", "-f"]
126 if quiet:
127 args.append("-q")
128 args.append(commit_hash)
129 try:
130 subprocess.check_output(args, cwd=self.work_dir,
131 stderr=subprocess.STDOUT)
132
133 # Double-check if branch actually switched
134 branch = self.get_short_hash("HEAD")
135 if commit_hash != branch:
136 raise RuntimeError(f"Branch changed to '{branch}' instead of '{commit_hash}'")
137
138 return True
139 except subprocess.CalledProcessError as e:
140 print(f"ERROR: Failed to checkout {commit_hash}: {e}",
141 file=sys.stderr)
142 return False
143
144
145class CacheManager:
146 """Manages persistent cache directories"""
147
148 def __init__(self, work_dir):
149 self.work_dir = work_dir
150
151 def initialize(self):
152 """Create cache directories if they don't exist"""
153 for dir_path in DIR_NAME.values():
154 abs_path = os.path.join(self.work_dir, dir_path)
155 if not os.path.exists(abs_path):
156 os.makedirs(abs_path, exist_ok=True, mode=0o755)
157
158 def get_commit_cache(self, commit_hash, path):
159 """Generate cache path for a commit"""
160 hash_short = GitHelper(self.work_dir).get_short_hash(commit_hash)
161 if not hash_short:
162 hash_short = commit_hash
163
164 return os.path.join(path, hash_short)
165
166class KernelDocRunner:
167 """Runs kernel-doc documentation generator"""
168
169 def __init__(self, work_dir, kdoc_binary):
170 self.work_dir = work_dir
171 self.kdoc_binary = kdoc_binary
172 self.kdoc_files = None
173
174 def find_kdoc_references(self):
175 """Find all files marked with kernel-doc:: directives"""
176 if self.kdoc_files:
177 print("Using cached Kdoc refs")
178 return self.kdoc_files
179
180 print("Finding kernel-doc entries in Documentation...")
181
182 files = os.path.join(self.work_dir, 'Documentation/**/*.rst')
183 pattern = re.compile(r"^\.\.\s+kernel-doc::\s*(\S+)")
184 kdoc_files = set()
185
186 for file_path in iglob(files, recursive=True):
187 try:
188 with open(file_path, 'r', encoding='utf-8') as fp:
189 for line in fp:
190 match = pattern.match(line.strip())
191 if match:
192 kdoc_files.add(match.group(1))
193
194 except OSError:
195 continue
196
197 self.kdoc_files = list(kdoc_files)
198
199 return self.kdoc_files
200
201 def gen_yaml(self, yaml_file, kdoc_files):
202 """Runs kernel-doc to generate a yaml file with man and rst."""
203 cmd = [self.kdoc_binary, "--man", "--rst", "--yaml", yaml_file]
204 cmd += kdoc_files
205
206 print(f"YAML regression test file will be stored at: {yaml_file}")
207
208 try:
209 subprocess.check_call(cmd, cwd=self.work_dir,
210 stdout=subprocess.DEVNULL,
211 stderr=subprocess.DEVNULL)
212 except subprocess.CalledProcessError:
213 return False
214
215 return True
216
217 def run_unittest(self, yaml_file):
218 """Run unit tests with the generated yaml file"""
219 cmd = [KDOC_PARSER_TEST, "-q", "--yaml", yaml_file]
220 result = subprocess.run(cmd, cwd=self.work_dir)
221
222 if result.returncode:
223 print("To check for problems, try to run it again with -v\n")
224 print("Use -k <regex> to filter results\n\n\t$", end="")
225 print(" ".join(cmd) + "\n")
226
227 return True
228
229 def normal_run(self, tmp_dir, output_dir, kdoc_files):
230 """Generate man, rst and errors, storing them at tmp_dir."""
231 os.makedirs(tmp_dir, exist_ok=True)
232
233 try:
234 with open(os.path.join(tmp_dir, "man.log"), "w", encoding="utf-8") as out:
235 subprocess.check_call([self.kdoc_binary, "--man"] + kdoc_files,
236 cwd=self.work_dir,
237 stdout=out, stderr=subprocess.DEVNULL)
238
239 with open(os.path.join(tmp_dir, "rst.log"), "w", encoding="utf-8") as out:
240 with open(os.path.join(tmp_dir, "err.log"), "w", encoding="utf-8") as err:
241 subprocess.check_call([self.kdoc_binary, "--rst"] + kdoc_files,
242 cwd=self.work_dir,
243 stdout=out, stderr=err)
244 except subprocess.CalledProcessError:
245 return False
246
247 if output_dir:
248 os.replace(tmp_dir, output_dir)
249
250 return True
251
252 def run(self, commit_hash, tmp_dir, output_dir, kdoc_files, is_regression,
253 is_end):
254 """Run kernel-doc on its several ways"""
255 if not kdoc_files:
256 raise RuntimeError("No kernel-doc references found")
257
258 git_helper = GitHelper(self.work_dir)
259 if not git_helper.checkout_commit(commit_hash, quiet=True):
260 raise RuntimeError(f"ERROR: can't checkout commit {commit_hash}")
261
262 print(f"Processing {commit_hash}...")
263
264 if not is_regression:
265 return self.normal_run(tmp_dir, output_dir, kdoc_files)
266
267 yaml_file = os.path.join(tmp_dir, YAML_NAME)
268
269 if not is_end:
270 return self.gen_yaml(yaml_file, kdoc_files)
271
272 return self.run_unittest(yaml_file)
273
274class DiffManager:
275 """Compare documentation output directories with an external diff."""
276 def __init__(self, diff_tool="diff", diff_args=None):
277 self.diff_tool = diff_tool
278 # default: unified, no context, ignore whitespace changes
279 self.diff_args = diff_args or ["-u0", "-w"]
280
281 def diff_directories(self, dir1, dir2):
282 """Compare two directories using an external diff."""
283 print(f"\nDiffing {dir1} and {dir2}:")
284
285 dir1_files = set()
286 dir2_files = set()
287 has_diff = False
288
289 for root, _, files in os.walk(dir1):
290 for file in files:
291 dir1_files.add(os.path.relpath(os.path.join(root, file), dir1))
292 for root, _, files in os.walk(dir2):
293 for file in files:
294 dir2_files.add(os.path.relpath(os.path.join(root, file), dir2))
295
296 common_files = sorted(dir1_files & dir2_files)
297 for file in common_files:
298 f1 = os.path.join(dir1, file)
299 f2 = os.path.join(dir2, file)
300
301 cmd = [self.diff_tool] + self.diff_args + [f1, f2]
302 try:
303 result = subprocess.run(
304 cmd, capture_output=True, text=True, check=False
305 )
306 if result.stdout:
307 has_diff = True
308 print(f"\n{file}")
309 print(result.stdout, end="")
310 except FileNotFoundError:
311 print(f"ERROR: {self.diff_tool} not found")
312 sys.exit(1)
313
314 # Show files that exist only in one directory
315 only_in_dir1 = dir1_files - dir2_files
316 only_in_dir2 = dir2_files - dir1_files
317 if only_in_dir1 or only_in_dir2:
318 has_diff = True
319 print("\nDifferential files:")
320 for f in sorted(only_in_dir1):
321 print(f" - {f} (only in {dir1})")
322 for f in sorted(only_in_dir2):
323 print(f" + {f} (only in {dir2})")
324
325 if not has_diff:
326 print("\nNo differences between those two commits")
327
328
329class SignalHandler():
330 """Signal handler class."""
331
332 def restore(self, force_exit=False):
333 """Restore original HEAD state."""
334 if self.restored:
335 return
336
337 print(f"Restoring original branch: {self.original_head}")
338 try:
339 subprocess.check_call(
340 ["git", "checkout", "-f", self.original_head],
341 cwd=self.git_helper.work_dir,
342 stderr=subprocess.STDOUT,
343 )
344 except subprocess.CalledProcessError as e:
345 print(f"Failed to restore: {e}", file=sys.stderr)
346
347 for sig, handler in self.old_handler.items():
348 signal.signal(sig, handler)
349
350 self.restored = True
351
352 if force_exit:
353 sys.exit(1)
354
355 def signal_handler(self, sig, _):
356 """Handle interrupt signals."""
357 print(f"\nSignal {sig} received. Restoring original state...")
358
359 self.restore(force_exit=True)
360
361 def __enter__(self):
362 """Allow using it via with command."""
363 for sig in [signal.SIGINT, signal.SIGTERM]:
364 self.old_handler[sig] = signal.getsignal(sig)
365 signal.signal(sig, self.signal_handler)
366
367 return self
368
369 def __exit__(self, *args):
370 """Restore signals at the end of with block."""
371 self.restore()
372
373 def __init__(self, git_helper, original_head):
374 self.git_helper = git_helper
375 self.original_head = original_head
376 self.old_handler = {}
377 self.restored = False
378
379def parse_commit_range(value):
380 """Handle a commit range."""
381 if ".." not in value:
382 begin = value
383 end = "HEAD"
384 else:
385 begin, _, end = value.partition("..")
386 if not end:
387 end = "HEAD"
388
389 if not begin:
390 raise argparse.ArgumentTypeError("Need a commit begginning")
391
392
393 print(f"Range: {begin} to {end}")
394
395 return begin, end
396
397
398def main():
399 """Main code"""
400 parser = argparse.ArgumentParser(description="Compare kernel documentation between commits")
401 parser.add_argument("commits", type=parse_commit_range,
402 help="commit range like old..new")
403 parser.add_argument("files", nargs="*",
404 help="files to process – if supplied the --full flag is ignored")
405
406 parser.add_argument("--full", "-f", action="store_true",
407 help="Force a full scan of Documentation/*")
408
409 parser.add_argument("--regression", "-r", action="store_true",
410 help="Use YAML format to check for regressions")
411
412 parser.add_argument("--work-dir", "-w", default=WORK_DIR,
413 help="work dir (default: %(default)s)")
414
415 parser.add_argument("--clean", "-c", action="store_true",
416 help="Clean caches")
417
418 args = parser.parse_args()
419
420 if args.files and args.full:
421 raise argparse.ArgumentError(args.full,
422 "cannot combine '--full' with an explicit file list")
423
424 work_dir = os.path.abspath(args.work_dir)
425
426 # Initialize cache
427 cache = CacheManager(work_dir)
428 cache.initialize()
429
430 # Validate git repository
431 git_helper = GitHelper(work_dir)
432 if not git_helper.is_inside_repository():
433 raise RuntimeError("Must run inside Git repository")
434
435 old_commit, new_commit = args.commits
436
437 old_commit = git_helper.get_short_hash(old_commit)
438 new_commit = git_helper.get_short_hash(new_commit)
439
440 # Validate commits
441 for commit in [old_commit, new_commit]:
442 if not git_helper.is_valid_commit(commit):
443 raise RuntimeError(f"Commit '{commit}' does not exist")
444
445 # Check for uncommitted changes
446 if git_helper.has_uncommitted_changes():
447 raise RuntimeError("Uncommitted changes present. Commit or stash first.")
448
449 runner = KernelDocRunner(git_helper.work_dir, KDOC_BINARY)
450
451 # Get files to be parsed
452 cache_msg = " (results will be cached)"
453 if args.full:
454 kdoc_files = ["."]
455 diff_type = "full"
456 print(f"Parsing all files at {work_dir}")
457 if not args.files:
458 diff_type = "partial"
459 kdoc_files = runner.find_kdoc_references()
460 print(f"Parsing files with kernel-doc markups at {work_dir}/Documentation")
461 else:
462 diff_type = "no-cache"
463 cache_msg = ""
464 kdoc_files = args.files
465
466 tmp_dir = DIR_NAME["tmp"]
467 out_path = DIR_NAME[diff_type]
468
469 if not args.regression:
470 print(f"Output will be stored at: {out_path}{cache_msg}")
471
472 # Just in case - should never happen in practice
473 if not kdoc_files:
474 raise argparse.ArgumentError(args.files,
475 "No kernel-doc references found")
476
477 original_head = git_helper.get_current_branch()
478
479 old_cache = cache.get_commit_cache(old_commit, out_path)
480 new_cache = cache.get_commit_cache(new_commit, out_path)
481
482 with SignalHandler(git_helper, original_head):
483 if args.clean or diff_type == "no-cache":
484 for cache_dir in [old_cache, new_cache]:
485 if cache_dir and os.path.exists(cache_dir):
486 shutil.rmtree(cache_dir)
487
488 if args.regression or not os.path.exists(old_cache):
489 old_success = runner.run(old_commit, tmp_dir, old_cache, kdoc_files,
490 args.regression, False)
491 else:
492 old_success = True
493
494 if args.regression or not os.path.exists(new_cache):
495 new_success = runner.run(new_commit, tmp_dir, new_cache, kdoc_files,
496 args.regression, True)
497 else:
498 new_success = True
499
500 if not (old_success and new_success):
501 raise RuntimeError("Failed to generate documentation")
502
503 if not args.regression:
504 diff_manager = DiffManager()
505 diff_manager.diff_directories(old_cache, new_cache)
506
507if __name__ == "__main__":
508 main()