Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#!/usr/bin/env python3
2# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302
3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
4# SPDX-License-Identifier: GPL-2.0
5
6"""
7Parse ABI documentation and produce results from it.
8"""
9
10from argparse import Namespace
11import logging
12import os
13import re
14
15from pprint import pformat
16from random import randrange, seed
17
18# Import Python modules
19
20from abi.helpers import AbiDebug, ABI_DIR
21
22
23class AbiParser:
24 """Main class to parse ABI files."""
25
26 #: Valid tags at Documentation/ABI.
27 TAGS = r"(what|where|date|kernelversion|contact|description|users)"
28
29 #: ABI elements that will auto-generate cross-references.
30 XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)"
31
32 def __init__(self, directory, logger=None,
33 enable_lineno=False, show_warnings=True, debug=0):
34 """Stores arguments for the class and initialize class vars."""
35
36 self.directory = directory
37 self.enable_lineno = enable_lineno
38 self.show_warnings = show_warnings
39 self.debug = debug
40
41 if not logger:
42 self.log = logging.getLogger("get_abi")
43 else:
44 self.log = logger
45
46 self.data = {}
47 self.what_symbols = {}
48 self.file_refs = {}
49 self.what_refs = {}
50
51 # Ignore files that contain such suffixes
52 self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~")
53
54 # Regular expressions used on parser
55 self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR)
56 self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I)
57 self.re_valid = re.compile(self.TAGS)
58 self.re_start_spc = re.compile(r"(\s*)(\S.*)")
59 self.re_whitespace = re.compile(r"^\s+")
60
61 # Regular used on print
62 self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})")
63 self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])")
64 self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)")
65 self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n")
66 self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst")
67 self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)")
68 self.re_xref_node = re.compile(self.XREF)
69
70 def warn(self, fdata, msg, extra=None):
71 """Displays a parse error if warning is enabled."""
72
73 if not self.show_warnings:
74 return
75
76 msg = f"{fdata.fname}:{fdata.ln}: {msg}"
77 if extra:
78 msg += "\n\t\t" + extra
79
80 self.log.warning(msg)
81
82 def add_symbol(self, what, fname, ln=None, xref=None):
83 """Create a reference table describing where each 'what' is located."""
84
85 if what not in self.what_symbols:
86 self.what_symbols[what] = {"file": {}}
87
88 if fname not in self.what_symbols[what]["file"]:
89 self.what_symbols[what]["file"][fname] = []
90
91 if ln and ln not in self.what_symbols[what]["file"][fname]:
92 self.what_symbols[what]["file"][fname].append(ln)
93
94 if xref:
95 self.what_symbols[what]["xref"] = xref
96
97 def _parse_line(self, fdata, line):
98 """Parse a single line of an ABI file."""
99
100 new_what = False
101 new_tag = False
102 content = None
103
104 match = self.re_tag.match(line)
105 if match:
106 new = match.group(1).lower()
107 sep = match.group(2)
108 content = match.group(3)
109
110 match = self.re_valid.search(new)
111 if match:
112 new_tag = match.group(1)
113 else:
114 if fdata.tag == "description":
115 # New "tag" is actually part of description.
116 # Don't consider it a tag
117 new_tag = False
118 elif fdata.tag != "":
119 self.warn(fdata, f"tag '{fdata.tag}' is invalid", line)
120
121 if new_tag:
122 # "where" is Invalid, but was a common mistake. Warn if found
123 if new_tag == "where":
124 self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead")
125 new_tag = "what"
126
127 if new_tag == "what":
128 fdata.space = None
129
130 if content not in self.what_symbols:
131 self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln)
132
133 if fdata.tag == "what":
134 fdata.what.append(content.strip("\n"))
135 else:
136 if fdata.key:
137 if "description" not in self.data.get(fdata.key, {}):
138 self.warn(fdata, f"{fdata.key} doesn't have a description")
139
140 for w in fdata.what:
141 self.add_symbol(what=w, fname=fdata.fname,
142 ln=fdata.what_ln, xref=fdata.key)
143
144 fdata.label = content
145 new_what = True
146
147 key = "abi_" + content.lower()
148 fdata.key = self.re_unprintable.sub("_", key).strip("_")
149
150 # Avoid duplicated keys but using a defined seed, to make
151 # the namespace identical if there aren't changes at the
152 # ABI symbols
153 seed(42)
154
155 while fdata.key in self.data:
156 char = randrange(0, 51) + ord("A")
157 if char > ord("Z"):
158 char += ord("a") - ord("Z") - 1
159
160 fdata.key += chr(char)
161
162 if fdata.key and fdata.key not in self.data:
163 self.data[fdata.key] = {
164 "what": [content],
165 "file": [fdata.file_ref],
166 "path": fdata.ftype,
167 "line_no": fdata.ln,
168 }
169
170 fdata.what = self.data[fdata.key]["what"]
171
172 self.what_refs[content] = fdata.key
173 fdata.tag = new_tag
174 fdata.what_ln = fdata.ln
175
176 if fdata.nametag["what"]:
177 t = (content, fdata.key)
178 if t not in fdata.nametag["symbols"]:
179 fdata.nametag["symbols"].append(t)
180
181 return
182
183 if fdata.tag and new_tag:
184 fdata.tag = new_tag
185
186 if new_what:
187 fdata.label = ""
188
189 if "description" in self.data[fdata.key]:
190 self.data[fdata.key]["description"] += "\n\n"
191
192 if fdata.file_ref not in self.data[fdata.key]["file"]:
193 self.data[fdata.key]["file"].append(fdata.file_ref)
194
195 if self.debug == AbiDebug.WHAT_PARSING:
196 self.log.debug("what: %s", fdata.what)
197
198 if not fdata.what:
199 self.warn(fdata, "'What:' should come first:", line)
200 return
201
202 if new_tag == "description":
203 fdata.space = None
204
205 if content:
206 sep = sep.replace(":", " ")
207
208 c = " " * len(new_tag) + sep + content
209 c = c.expandtabs()
210
211 match = self.re_start_spc.match(c)
212 if match:
213 # Preserve initial spaces for the first line
214 fdata.space = match.group(1)
215 content = match.group(2) + "\n"
216
217 self.data[fdata.key][fdata.tag] = content
218
219 return
220
221 # Store any contents before tags at the database
222 if not fdata.tag and "what" in fdata.nametag:
223 fdata.nametag["description"] += line
224 return
225
226 if fdata.tag == "description":
227 content = line.expandtabs()
228
229 if self.re_whitespace.sub("", content) == "":
230 self.data[fdata.key][fdata.tag] += "\n"
231 return
232
233 if fdata.space is None:
234 match = self.re_start_spc.match(content)
235 if match:
236 # Preserve initial spaces for the first line
237 fdata.space = match.group(1)
238
239 content = match.group(2) + "\n"
240 else:
241 if content.startswith(fdata.space):
242 content = content[len(fdata.space):]
243
244 else:
245 fdata.space = ""
246
247 if fdata.tag == "what":
248 w = content.strip("\n")
249 if w:
250 self.data[fdata.key][fdata.tag].append(w)
251 else:
252 self.data[fdata.key][fdata.tag] += content
253 return
254
255 content = line.strip()
256 if fdata.tag:
257 if fdata.tag == "what":
258 w = content.strip("\n")
259 if w:
260 self.data[fdata.key][fdata.tag].append(w)
261 else:
262 self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n")
263 return
264
265 # Everything else is error
266 if content:
267 self.warn(fdata, "Unexpected content", line)
268
269 def parse_readme(self, nametag, fname):
270 """Parse ABI README file."""
271
272 nametag["what"] = ["Introduction"]
273 nametag["path"] = "README"
274 with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
275 for line in fp:
276 match = self.re_tag.match(line)
277 if match:
278 new = match.group(1).lower()
279
280 match = self.re_valid.search(new)
281 if match:
282 nametag["description"] += "\n:" + line
283 continue
284
285 nametag["description"] += line
286
287 def parse_file(self, fname, path, basename):
288 """Parse a single file."""
289
290 ref = f"abi_file_{path}_{basename}"
291 ref = self.re_unprintable.sub("_", ref).strip("_")
292
293 # Store per-file state into a namespace variable. This will be used
294 # by the per-line parser state machine and by the warning function.
295 fdata = Namespace
296
297 fdata.fname = fname
298 fdata.name = basename
299
300 pos = fname.find(ABI_DIR)
301 if pos > 0:
302 f = fname[pos:]
303 else:
304 f = fname
305
306 fdata.file_ref = (f, ref)
307 self.file_refs[f] = ref
308
309 fdata.ln = 0
310 fdata.what_ln = 0
311 fdata.tag = ""
312 fdata.label = ""
313 fdata.what = []
314 fdata.key = None
315 fdata.xrefs = None
316 fdata.space = None
317 fdata.ftype = path.split("/")[0]
318
319 fdata.nametag = {}
320 fdata.nametag["what"] = [f"ABI file {path}/{basename}"]
321 fdata.nametag["type"] = "File"
322 fdata.nametag["path"] = fdata.ftype
323 fdata.nametag["file"] = [fdata.file_ref]
324 fdata.nametag["line_no"] = 1
325 fdata.nametag["description"] = ""
326 fdata.nametag["symbols"] = []
327
328 self.data[ref] = fdata.nametag
329
330 if self.debug & AbiDebug.WHAT_OPEN:
331 self.log.debug("Opening file %s", fname)
332
333 if basename == "README":
334 self.parse_readme(fdata.nametag, fname)
335 return
336
337 with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
338 for line in fp:
339 fdata.ln += 1
340
341 self._parse_line(fdata, line)
342
343 if "description" in fdata.nametag:
344 fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n")
345
346 if fdata.key:
347 if "description" not in self.data.get(fdata.key, {}):
348 self.warn(fdata, f"{fdata.key} doesn't have a description")
349
350 for w in fdata.what:
351 self.add_symbol(what=w, fname=fname, xref=fdata.key)
352
353 def _parse_abi(self, root=None):
354 """Internal function to parse documentation ABI recursively."""
355
356 if not root:
357 root = self.directory
358
359 with os.scandir(root) as obj:
360 for entry in obj:
361 name = os.path.join(root, entry.name)
362
363 if entry.is_dir():
364 self._parse_abi(name)
365 continue
366
367 if not entry.is_file():
368 continue
369
370 basename = os.path.basename(name)
371
372 if basename.startswith("."):
373 continue
374
375 if basename.endswith(self.ignore_suffixes):
376 continue
377
378 path = self.re_abi_dir.sub("", os.path.dirname(name))
379
380 self.parse_file(name, path, basename)
381
382 def parse_abi(self, root=None):
383 """Parse documentation ABI."""
384
385 self._parse_abi(root)
386
387 if self.debug & AbiDebug.DUMP_ABI_STRUCTS:
388 self.log.debug(pformat(self.data))
389
390 def desc_txt(self, desc):
391 """Print description as found inside ABI files."""
392
393 desc = desc.strip(" \t\n")
394
395 return desc + "\n\n"
396
397 def xref(self, fname):
398 """
399 Converts a Documentation/ABI + basename into a ReST cross-reference.
400 """
401
402 xref = self.file_refs.get(fname)
403 if not xref:
404 return None
405 else:
406 return xref
407
408 def desc_rst(self, desc):
409 """Enrich ReST output by creating cross-references."""
410
411 # Remove title markups from the description
412 # Having titles inside ABI files will only work if extra
413 # care would be taken in order to strictly follow the same
414 # level order for each markup.
415 desc = self.re_title_mark.sub("\n\n", "\n" + desc)
416 desc = desc.rstrip(" \t\n").lstrip("\n")
417
418 # Python's regex performance for non-compiled expressions is a lot
419 # than Perl, as Perl automatically caches them at their
420 # first usage. Here, we'll need to do the same, as otherwise the
421 # performance penalty is be high
422
423 new_desc = ""
424 for d in desc.split("\n"):
425 if d == "":
426 new_desc += "\n"
427 continue
428
429 # Use cross-references for doc files where needed
430 d = self.re_doc.sub(r":doc:`/\1`", d)
431
432 # Use cross-references for ABI generated docs where needed
433 matches = self.re_abi.findall(d)
434 for m in matches:
435 abi = m[0] + m[1]
436
437 xref = self.file_refs.get(abi)
438 if not xref:
439 # This may happen if ABI is on a separate directory,
440 # like parsing ABI testing and symbol is at stable.
441 # The proper solution is to move this part of the code
442 # for it to be inside sphinx/kernel_abi.py
443 self.log.info("Didn't find ABI reference for '%s'", abi)
444 else:
445 new = self.re_escape.sub(r"\\\1", m[1])
446 d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d)
447
448 # Seek for cross reference symbols like /sys/...
449 # Need to be careful to avoid doing it on a code block
450 if d[0] not in [" ", "\t"]:
451 matches = self.re_xref_node.findall(d)
452 for m in matches:
453 # Finding ABI here is more complex due to wildcards
454 xref = self.what_refs.get(m)
455 if xref:
456 new = self.re_escape.sub(r"\\\1", m)
457 d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d)
458
459 new_desc += d + "\n"
460
461 return new_desc + "\n\n"
462
463 def doc(self, output_in_txt=False, show_symbols=True, show_file=True,
464 filter_path=None):
465 """Print ABI at stdout."""
466
467 part = None
468 for key, v in sorted(self.data.items(),
469 key=lambda x: (x[1].get("type", ""),
470 x[1].get("what"))):
471
472 wtype = v.get("type", "Symbol")
473 file_ref = v.get("file")
474 names = v.get("what", [""])
475
476 if wtype == "File":
477 if not show_file:
478 continue
479 else:
480 if not show_symbols:
481 continue
482
483 if filter_path:
484 if v.get("path") != filter_path:
485 continue
486
487 msg = ""
488
489 if wtype != "File":
490 cur_part = names[0]
491 if cur_part.find("/") >= 0:
492 match = self.re_what.match(cur_part)
493 if match:
494 symbol = match.group(1).rstrip("/")
495 cur_part = "Symbols under " + symbol
496
497 if cur_part and cur_part != part:
498 part = cur_part
499 msg += part + "\n"+ "-" * len(part) +"\n\n"
500
501 msg += f".. _{key}:\n\n"
502
503 max_len = 0
504 for i in range(0, len(names)): # pylint: disable=C0200
505 names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**"
506
507 max_len = max(max_len, len(names[i]))
508
509 msg += "+-" + "-" * max_len + "-+\n"
510 for name in names:
511 msg += f"| {name}" + " " * (max_len - len(name)) + " |\n"
512 msg += "+-" + "-" * max_len + "-+\n"
513 msg += "\n"
514
515 for ref in file_ref:
516 if wtype == "File":
517 msg += f".. _{ref[1]}:\n\n"
518 else:
519 base = os.path.basename(ref[0])
520 msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n"
521
522 if wtype == "File":
523 msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n"
524
525 desc = v.get("description")
526 if not desc and wtype != "File":
527 msg += f"DESCRIPTION MISSING for {names[0]}\n\n"
528
529 if desc:
530 if output_in_txt:
531 msg += self.desc_txt(desc)
532 else:
533 msg += self.desc_rst(desc)
534
535 symbols = v.get("symbols")
536 if symbols:
537 msg += "Has the following ABI:\n\n"
538
539 for w, label in symbols:
540 # Escape special chars from content
541 content = self.re_escape.sub(r"\\\1", w)
542
543 msg += f"- :ref:`{content} <{label}>`\n\n"
544
545 users = v.get("users")
546 if users and users.strip(" \t\n"):
547 users = users.strip("\n").replace('\n', '\n\t')
548 msg += f"Users:\n\t{users}\n\n"
549
550 ln = v.get("line_no", 1)
551
552 yield (msg, file_ref[0][0], ln)
553
554 def check_issues(self):
555 """Warn about duplicated ABI entries."""
556
557 for what, v in self.what_symbols.items():
558 files = v.get("file")
559 if not files:
560 # Should never happen if the parser works properly
561 self.log.warning("%s doesn't have a file associated", what)
562 continue
563
564 if len(files) == 1:
565 continue
566
567 f = []
568 for fname, lines in sorted(files.items()):
569 if not lines:
570 f.append(f"{fname}")
571 elif len(lines) == 1:
572 f.append(f"{fname}:{lines[0]}")
573 else:
574 m = fname + "lines "
575 m += ", ".join(str(x) for x in lines)
576 f.append(m)
577
578 self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f))
579
580 def search_symbols(self, expr):
581 """ Searches for ABI symbols."""
582
583 regex = re.compile(expr, re.I)
584
585 found_keys = 0
586 for t in sorted(self.data.items(), key=lambda x: [0]):
587 v = t[1]
588
589 wtype = v.get("type", "")
590 if wtype == "File":
591 continue
592
593 for what in v.get("what", [""]):
594 if regex.search(what):
595 found_keys += 1
596
597 kernelversion = v.get("kernelversion", "").strip(" \t\n")
598 date = v.get("date", "").strip(" \t\n")
599 contact = v.get("contact", "").strip(" \t\n")
600 users = v.get("users", "").strip(" \t\n")
601 desc = v.get("description", "").strip(" \t\n")
602
603 files = []
604 for f in v.get("file", ()):
605 files.append(f[0])
606
607 what = str(found_keys) + ". " + what
608 title_tag = "-" * len(what)
609
610 print(f"\n{what}\n{title_tag}\n")
611
612 if kernelversion:
613 print(f"Kernel version:\t\t{kernelversion}")
614
615 if date:
616 print(f"Date:\t\t\t{date}")
617
618 if contact:
619 print(f"Contact:\t\t{contact}")
620
621 if users:
622 print(f"Users:\t\t\t{users}")
623
624 print("Defined on file(s):\t" + ", ".join(files))
625
626 if desc:
627 desc = desc.strip("\n")
628 print(f"\n{desc}\n")
629
630 if not found_keys:
631 print(f"Regular expression /{expr}/ not found.")