Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3# Copyright (c) 2016-2025 by Mauro Carvalho Chehab <mchehab@kernel.org>.
4# pylint: disable=R0912,R0915
5
6"""
7Parse a source file or header, creating ReStructured Text cross references.
8
9It accepts an optional file to change the default symbol reference or to
10suppress symbols from the output.
11
12It is capable of identifying ``define``, function, ``struct``, ``typedef``,
13``enum`` and ``enum`` symbols and create cross-references for all of them.
14It is also capable of distinguish #define used for specifying a Linux
15ioctl.
16
17The optional rules file contains a set of rules like::
18
19 ignore ioctl VIDIOC_ENUM_FMT
20 replace ioctl VIDIOC_DQBUF vidioc_qbuf
21 replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
22"""
23
24import os
25import re
26import sys
27
28
29class ParseDataStructs:
30 """
31 Creates an enriched version of a Kernel header file with cross-links
32 to each C data structure type.
33
34 It is meant to allow having a more comprehensive documentation, where
35 uAPI headers will create cross-reference links to the code.
36
37 It is capable of identifying ``define``, function, ``struct``, ``typedef``,
38 ``enum`` and ``enum`` symbols and create cross-references for all of them.
39 It is also capable of distinguish #define used for specifying a Linux
40 ioctl.
41
42 By default, it create rules for all symbols and defines, but it also
43 allows parsing an exception file. Such file contains a set of rules
44 using the syntax below:
45
46 1. Ignore rules::
47
48 ignore <type> <symbol>`
49
50 Removes the symbol from reference generation.
51
52 2. Replace rules::
53
54 replace <type> <old_symbol> <new_reference>
55
56 Replaces how old_symbol with a new reference. The new_reference can be:
57
58 - A simple symbol name;
59 - A full Sphinx reference.
60
61 3. Namespace rules::
62
63 namespace <namespace>
64
65 Sets C namespace to be used during cross-reference generation. Can
66 be overridden by replace rules.
67
68 On ignore and replace rules, ``<type>`` can be:
69 - ``ioctl``: for defines that end with ``_IO*``, e.g. ioctl definitions
70 - ``define``: for other defines
71 - ``symbol``: for symbols defined within enums;
72 - ``typedef``: for typedefs;
73 - ``enum``: for the name of a non-anonymous enum;
74 - ``struct``: for structs.
75
76 Examples::
77
78 ignore define __LINUX_MEDIA_H
79 ignore ioctl VIDIOC_ENUM_FMT
80 replace ioctl VIDIOC_DQBUF vidioc_qbuf
81 replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
82
83 namespace MC
84 """
85
86 #: Parser regex with multiple ways to capture enums.
87 RE_ENUMS = [
88 re.compile(r"^\s*enum\s+([\w_]+)\s*\{"),
89 re.compile(r"^\s*enum\s+([\w_]+)\s*$"),
90 re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"),
91 re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"),
92 ]
93
94 #: Parser regex with multiple ways to capture structs.
95 RE_STRUCTS = [
96 re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"),
97 re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"),
98 re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"),
99 re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"),
100 ]
101
102 # NOTE: the original code was written a long time before Sphinx C
103 # domain to have multiple namespaces. To avoid to much turn at the
104 # existing hyperlinks, the code kept using "c:type" instead of the
105 # right types. To change that, we need to change the types not only
106 # here, but also at the uAPI media documentation.
107
108 #: Dictionary containing C type identifiers to be transformed.
109 DEF_SYMBOL_TYPES = {
110 "ioctl": {
111 "prefix": "\\ ",
112 "suffix": "\\ ",
113 "ref_type": ":ref",
114 "description": "IOCTL Commands",
115 },
116 "define": {
117 "prefix": "\\ ",
118 "suffix": "\\ ",
119 "ref_type": ":ref",
120 "description": "Macros and Definitions",
121 },
122 # We're calling each definition inside an enum as "symbol"
123 "symbol": {
124 "prefix": "\\ ",
125 "suffix": "\\ ",
126 "ref_type": ":ref",
127 "description": "Enumeration values",
128 },
129 "typedef": {
130 "prefix": "\\ ",
131 "suffix": "\\ ",
132 "ref_type": ":c:type",
133 "description": "Type Definitions",
134 },
135 # This is the description of the enum itself
136 "enum": {
137 "prefix": "\\ ",
138 "suffix": "\\ ",
139 "ref_type": ":c:type",
140 "description": "Enumerations",
141 },
142 "struct": {
143 "prefix": "\\ ",
144 "suffix": "\\ ",
145 "ref_type": ":c:type",
146 "description": "Structures",
147 },
148 }
149
150 def __init__(self, debug: bool = False):
151 """Initialize internal vars"""
152 self.debug = debug
153 self.data = ""
154
155 self.symbols = {}
156
157 self.namespace = None
158 self.ignore = []
159 self.replace = []
160
161 for symbol_type in self.DEF_SYMBOL_TYPES:
162 self.symbols[symbol_type] = {}
163
164 def read_exceptions(self, fname: str):
165 """
166 Read an optional exceptions file, used to override defaults.
167 """
168
169 if not fname:
170 return
171
172 name = os.path.basename(fname)
173
174 with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f:
175 for ln, line in enumerate(f):
176 ln += 1
177 line = line.strip()
178 if not line or line.startswith("#"):
179 continue
180
181 # ignore rules
182 match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line)
183
184 if match:
185 self.ignore.append((ln, match.group(1), match.group(2)))
186 continue
187
188 # replace rules
189 match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line)
190 if match:
191 self.replace.append((ln, match.group(1), match.group(2),
192 match.group(3)))
193 continue
194
195 match = re.match(r"^namespace\s+(\S+)", line)
196 if match:
197 self.namespace = match.group(1)
198 continue
199
200 sys.exit(f"{name}:{ln}: invalid line: {line}")
201
202 def apply_exceptions(self):
203 """
204 Process exceptions file with rules to ignore or replace references.
205 """
206
207 # Handle ignore rules
208 for ln, c_type, symbol in self.ignore:
209 if c_type not in self.DEF_SYMBOL_TYPES:
210 sys.exit(f"{name}:{ln}: {c_type} is invalid")
211
212 d = self.symbols[c_type]
213 if symbol in d:
214 del d[symbol]
215
216 # Handle replace rules
217 for ln, c_type, old, new in self.replace:
218 if c_type not in self.DEF_SYMBOL_TYPES:
219 sys.exit(f"{name}:{ln}: {c_type} is invalid")
220
221 reftype = None
222
223 # Parse reference type when the type is specified
224
225 match = re.match(r"^\:c\:(\w+)\:\`(.+)\`", new)
226 if match:
227 reftype = f":c:{match.group(1)}"
228 new = match.group(2)
229 else:
230 match = re.search(r"(\:ref)\:\`(.+)\`", new)
231 if match:
232 reftype = match.group(1)
233 new = match.group(2)
234
235 # If the replacement rule doesn't have a type, get default
236 if not reftype:
237 reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type")
238 if not reftype:
239 reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type")
240
241 new_ref = f"{reftype}:`{old} <{new}>`"
242
243 # Change self.symbols to use the replacement rule
244 if old in self.symbols[c_type]:
245 (_, ln) = self.symbols[c_type][old]
246 self.symbols[c_type][old] = (new_ref, ln)
247 else:
248 print(f"{name}:{ln}: Warning: can't find {old} {c_type}")
249
250 def store_type(self, ln, symbol_type: str, symbol: str,
251 ref_name: str = None, replace_underscores: bool = True):
252 """
253 Store a new symbol at self.symbols under symbol_type.
254
255 By default, underscores are replaced by ``-``.
256 """
257 defs = self.DEF_SYMBOL_TYPES[symbol_type]
258
259 prefix = defs.get("prefix", "")
260 suffix = defs.get("suffix", "")
261 ref_type = defs.get("ref_type")
262
263 # Determine ref_link based on symbol type
264 if ref_type or self.namespace:
265 if not ref_name:
266 ref_name = symbol.lower()
267
268 # c-type references don't support hash
269 if ref_type == ":ref" and replace_underscores:
270 ref_name = ref_name.replace("_", "-")
271
272 # C domain references may have namespaces
273 if ref_type.startswith(":c:"):
274 if self.namespace:
275 ref_name = f"{self.namespace}.{ref_name}"
276
277 if ref_type:
278 ref_link = f"{ref_type}:`{symbol} <{ref_name}>`"
279 else:
280 ref_link = f"`{symbol} <{ref_name}>`"
281 else:
282 ref_link = symbol
283
284 self.symbols[symbol_type][symbol] = (f"{prefix}{ref_link}{suffix}", ln)
285
286 def store_line(self, line):
287 """
288 Store a line at self.data, properly indented.
289 """
290 line = " " + line.expandtabs()
291 self.data += line.rstrip(" ")
292
293 def parse_file(self, file_in: str, exceptions: str = None):
294 """
295 Read a C source file and get identifiers.
296 """
297 self.data = ""
298 is_enum = False
299 is_comment = False
300 multiline = ""
301
302 self.read_exceptions(exceptions)
303
304 with open(file_in, "r",
305 encoding="utf-8", errors="backslashreplace") as f:
306 for line_no, line in enumerate(f):
307 self.store_line(line)
308 line = line.strip("\n")
309
310 # Handle continuation lines
311 if line.endswith(r"\\"):
312 multiline += line[-1]
313 continue
314
315 if multiline:
316 line = multiline + line
317 multiline = ""
318
319 # Handle comments. They can be multilined
320 if not is_comment:
321 if re.search(r"/\*.*", line):
322 is_comment = True
323 else:
324 # Strip C99-style comments
325 line = re.sub(r"(//.*)", "", line)
326
327 if is_comment:
328 if re.search(r".*\*/", line):
329 is_comment = False
330 else:
331 multiline = line
332 continue
333
334 # At this point, line variable may be a multilined statement,
335 # if lines end with \ or if they have multi-line comments
336 # With that, it can safely remove the entire comments,
337 # and there's no need to use re.DOTALL for the logic below
338
339 line = re.sub(r"(/\*.*\*/)", "", line)
340 if not line.strip():
341 continue
342
343 # It can be useful for debug purposes to print the file after
344 # having comments stripped and multi-lines grouped.
345 if self.debug > 1:
346 print(f"line {line_no + 1}: {line}")
347
348 # Now the fun begins: parse each type and store it.
349
350 # We opted for a two parsing logic here due to:
351 # 1. it makes easier to debug issues not-parsed symbols;
352 # 2. we want symbol replacement at the entire content, not
353 # just when the symbol is detected.
354
355 if is_enum:
356 match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line)
357 if match:
358 self.store_type(line_no, "symbol", match.group(1))
359 if "}" in line:
360 is_enum = False
361 continue
362
363 match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line)
364 if match:
365 self.store_type(line_no, "ioctl", match.group(1),
366 replace_underscores=False)
367 continue
368
369 match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line)
370 if match:
371 self.store_type(line_no, "define", match.group(1))
372 continue
373
374 match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);",
375 line)
376 if match:
377 name = match.group(2).strip()
378 symbol = match.group(3)
379 self.store_type(line_no, "typedef", symbol, ref_name=name)
380 continue
381
382 for re_enum in self.RE_ENUMS:
383 match = re_enum.match(line)
384 if match:
385 self.store_type(line_no, "enum", match.group(1))
386 is_enum = True
387 break
388
389 for re_struct in self.RE_STRUCTS:
390 match = re_struct.match(line)
391 if match:
392 self.store_type(line_no, "struct", match.group(1))
393 break
394
395 self.apply_exceptions()
396
397 def debug_print(self):
398 """
399 Print debug information containing the replacement rules per symbol.
400 To make easier to check, group them per type.
401 """
402 if not self.debug:
403 return
404
405 for c_type, refs in self.symbols.items():
406 if not refs: # Skip empty dictionaries
407 continue
408
409 print(f"{c_type}:")
410
411 for symbol, (ref, ln) in sorted(refs.items()):
412 print(f" #{ln:<5d} {symbol} -> {ref}")
413
414 print()
415
416 def gen_output(self):
417 """Write the formatted output to a file."""
418
419 # Avoid extra blank lines
420 text = re.sub(r"\s+$", "", self.data) + "\n"
421 text = re.sub(r"\n\s+\n", "\n\n", text)
422
423 # Escape Sphinx special characters
424 text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text)
425
426 # Source uAPI files may have special notes. Use bold font for them
427 text = re.sub(r"DEPRECATED", "**DEPRECATED**", text)
428
429 # Delimiters to catch the entire symbol after escaped
430 start_delim = r"([ \n\t\(=\*\@])"
431 end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)"
432
433 # Process all reference types
434 for ref_dict in self.symbols.values():
435 for symbol, (replacement, _) in ref_dict.items():
436 symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol))
437 text = re.sub(fr'{start_delim}{symbol}{end_delim}',
438 fr'\1{replacement}\2', text)
439
440 # Remove "\ " where not needed: before spaces and at the end of lines
441 text = re.sub(r"\\ ([\n ])", r"\1", text)
442 text = re.sub(r" \\ ", " ", text)
443
444 return text
445
446 def gen_toc(self):
447 """
448 Create a list of symbols to be part of a TOC contents table.
449 """
450 text = []
451
452 # Sort symbol types per description
453 symbol_descriptions = []
454 for k, v in self.DEF_SYMBOL_TYPES.items():
455 symbol_descriptions.append((v['description'], k))
456
457 symbol_descriptions.sort()
458
459 # Process each category
460 for description, c_type in symbol_descriptions:
461
462 refs = self.symbols[c_type]
463 if not refs: # Skip empty categories
464 continue
465
466 text.append(f"{description}")
467 text.append("-" * len(description))
468 text.append("")
469
470 # Sort symbols alphabetically
471 for symbol, (ref, ln) in sorted(refs.items()):
472 text.append(f"- LINENO_{ln}: {ref}")
473
474 text.append("") # Add empty line between categories
475
476 return "\n".join(text)
477
478 def write_output(self, file_in: str, file_out: str, toc: bool):
479 """
480 Write a ReST output file.
481 """
482
483 title = os.path.basename(file_in)
484
485 if toc:
486 text = self.gen_toc()
487 else:
488 text = self.gen_output()
489
490 with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f:
491 f.write(".. -*- coding: utf-8; mode: rst -*-\n\n")
492 f.write(f"{title}\n")
493 f.write("=" * len(title) + "\n\n")
494
495 if not toc:
496 f.write(".. parsed-literal::\n\n")
497
498 f.write(text)