Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
4#
5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702
6
7"""
8Classes and functions related to reading a C language source or header FILE
9and extract embedded documentation comments from it.
10"""
11
12import sys
13import re
14from pprint import pformat
15
16from kdoc.c_lex import CTokenizer, tokenizer_set_log
17from kdoc.kdoc_re import KernRe
18from kdoc.kdoc_item import KdocItem
19
20#
21# Regular expressions used to parse kernel-doc markups at KernelDoc class.
22#
23# Let's declare them in lowercase outside any class to make it easier to
24# convert from the Perl script.
25#
26# As those are evaluated at the beginning, no need to cache them
27#
28
29# Allow whitespace at end of comment start.
30doc_start = KernRe(r'^/\*\*\s*$', cache=False)
31
32doc_end = KernRe(r'\*/', cache=False)
33doc_com = KernRe(r'\s*\*\s*', cache=False)
34doc_com_body = KernRe(r'\s*\* ?', cache=False)
35doc_decl = doc_com + KernRe(r'(\w+)', cache=False)
36
37# @params and a strictly limited set of supported section names
38# Specifically:
39# Match @word:
40# @...:
41# @{section-name}:
42# while trying to not match literal block starts like "example::"
43#
44known_section_names = 'description|context|returns?|notes?|examples?'
45known_sections = KernRe(known_section_names, flags = re.I)
46doc_sect = doc_com + \
47 KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$',
48 flags=re.I, cache=False)
49
50doc_content = doc_com_body + KernRe(r'(.*)', cache=False)
51doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False)
52doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False)
53doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False)
54doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@\s*[\w][\w\.]*\s*):\s*(.*)\s*\*/\s*$', cache=False)
55
56export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False)
57export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False)
58
59type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False)
60
61#
62# Tests for the beginning of a kerneldoc block in its various forms.
63#
64doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False)
65doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef|var)\b\s*(\w*)", cache = False)
66doc_begin_func = KernRe(str(doc_com) + # initial " * '
67 r"(?:\w+\s*\*\s*)?" + # type (not captured)
68 r'(?:define\s+)?' + # possible "define" (not captured)
69 r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)"
70 r'(?:[-:].*)?$', # description (not captured)
71 cache = False)
72
73#
74# Ancillary functions
75#
76
77multi_space = KernRe(r'\s\s+')
78def trim_whitespace(s):
79 """
80 A little helper to get rid of excess white space.
81 """
82 return multi_space.sub(' ', s.strip())
83
84def trim_private_members(text):
85 """
86 Remove ``struct``/``enum`` members that have been marked "private".
87 """
88
89 tokens = CTokenizer(text)
90 return str(tokens)
91
92class state:
93 """
94 States used by the parser's state machine.
95 """
96
97 # Parser states
98 NORMAL = 0 #: Normal code.
99 NAME = 1 #: Looking for function name.
100 DECLARATION = 2 #: We have seen a declaration which might not be done.
101 BODY = 3 #: The body of the comment.
102 SPECIAL_SECTION = 4 #: Doc section ending with a blank line.
103 PROTO = 5 #: Scanning prototype.
104 DOCBLOCK = 6 #: Documentation block.
105 INLINE_NAME = 7 #: Gathering doc outside main block.
106 INLINE_TEXT = 8 #: Reading the body of inline docs.
107
108 #: Names for each parser state.
109 name = [
110 "NORMAL",
111 "NAME",
112 "DECLARATION",
113 "BODY",
114 "SPECIAL_SECTION",
115 "PROTO",
116 "DOCBLOCK",
117 "INLINE_NAME",
118 "INLINE_TEXT",
119 ]
120
121
122SECTION_DEFAULT = "Description" #: Default section.
123
124class KernelEntry:
125 """
126 Encapsulates a Kernel documentation entry.
127 """
128
129 def __init__(self, config, fname, ln):
130 self.config = config
131 self.fname = fname
132
133 self._contents = []
134 self.prototype = ""
135
136 self.warnings = []
137
138 self.parameterlist = []
139 self.parameterdescs = {}
140 self.parametertypes = {}
141 self.parameterdesc_start_lines = {}
142
143 self.sections_start_lines = {}
144 self.sections = {}
145
146 self.anon_struct_union = False
147
148 self.leading_space = None
149
150 self.fname = fname
151
152 # State flags
153 self.brcount = 0
154 self.declaration_start_line = ln + 1
155
156 #
157 # Management of section contents
158 #
159 def add_text(self, text):
160 """Add a new text to the entry contents list."""
161 self._contents.append(text)
162
163 def contents(self):
164 """Returns a string with all content texts that were added."""
165 return '\n'.join(self._contents) + '\n'
166
167 # TODO: rename to emit_message after removal of kernel-doc.pl
168 def emit_msg(self, ln, msg, *, warning=True):
169 """Emit a message."""
170
171 log_msg = f"{self.fname}:{ln} {msg}"
172
173 if not warning:
174 self.config.log.info(log_msg)
175 return
176
177 # Delegate warning output to output logic, as this way it
178 # will report warnings/info only for symbols that are output
179
180 self.warnings.append(log_msg)
181 return
182
183 def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False):
184 """
185 Begin a new section.
186 """
187 if dump:
188 self.dump_section(start_new = True)
189 self.section = title
190 self.new_start_line = line_no
191
192 def dump_section(self, start_new=True):
193 """
194 Dumps section contents to arrays/hashes intended for that purpose.
195 """
196 #
197 # If we have accumulated no contents in the default ("description")
198 # section, don't bother.
199 #
200 if self.section == SECTION_DEFAULT and not self._contents:
201 return
202 name = self.section
203 contents = self.contents()
204
205 if type_param.match(name):
206 name = type_param.group(1)
207
208 self.parameterdescs[name] = contents
209 self.parameterdesc_start_lines[name] = self.new_start_line
210
211 self.new_start_line = 0
212
213 else:
214 if name in self.sections and self.sections[name] != "":
215 # Only warn on user-specified duplicate section names
216 if name != SECTION_DEFAULT:
217 self.emit_msg(self.new_start_line,
218 f"duplicate section name '{name}'")
219 # Treat as a new paragraph - add a blank line
220 self.sections[name] += '\n' + contents
221 else:
222 self.sections[name] = contents
223 self.sections_start_lines[name] = self.new_start_line
224 self.new_start_line = 0
225
226# self.config.log.debug("Section: %s : %s", name, pformat(vars(self)))
227
228 if start_new:
229 self.section = SECTION_DEFAULT
230 self._contents = []
231
232python_warning = False
233
234class KernelDoc:
235 """
236 Read a C language source or header FILE and extract embedded
237 documentation comments.
238 """
239
240 #: Name of context section.
241 section_context = "Context"
242
243 #: Name of return section.
244 section_return = "Return"
245
246 #: String to write when a parameter is not described.
247 undescribed = "-- undescribed --"
248
249 def __init__(self, config, fname, xforms, store_src=False):
250 """Initialize internal variables"""
251
252 self.fname = fname
253 self.config = config
254 self.xforms = xforms
255 self.store_src = store_src
256
257 tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ")
258
259 # Initial state for the state machines
260 self.state = state.NORMAL
261
262 # Store entry currently being processed
263 self.entry = None
264
265 # Place all potential outputs into an array
266 self.entries = []
267
268 #
269 # We need Python 3.7 for its "dicts remember the insertion
270 # order" guarantee
271 #
272 global python_warning
273 if (not python_warning and
274 sys.version_info.major == 3 and sys.version_info.minor < 7):
275
276 self.emit_msg(0,
277 'Python 3.7 or later is required for correct results')
278 python_warning = True
279
280 def emit_msg(self, ln, msg, *, warning=True):
281 """Emit a message"""
282
283 if self.entry:
284 self.entry.emit_msg(ln, msg, warning=warning)
285 return
286
287 log_msg = f"{self.fname}:{ln} {msg}"
288
289 if warning:
290 self.config.log.warning(log_msg)
291 else:
292 self.config.log.info(log_msg)
293
294 def dump_section(self, start_new=True):
295 """
296 Dump section contents to arrays/hashes intended for that purpose.
297 """
298
299 if self.entry:
300 self.entry.dump_section(start_new)
301
302 # TODO: rename it to store_declaration after removal of kernel-doc.pl
303 def output_declaration(self, dtype, name, **args):
304 """
305 Store the entry into an entry array.
306
307 The actual output and output filters will be handled elsewhere.
308 """
309
310 item = KdocItem(name, self.fname, dtype,
311 self.entry.declaration_start_line, **args)
312 item.warnings = self.entry.warnings
313
314 # Drop empty sections
315 # TODO: improve empty sections logic to emit warnings
316 sections = self.entry.sections
317 for section in ["Description", "Return"]:
318 if section in sections and not sections[section].rstrip():
319 del sections[section]
320 item.set_sections(sections, self.entry.sections_start_lines)
321 item.set_params(self.entry.parameterlist, self.entry.parameterdescs,
322 self.entry.parametertypes,
323 self.entry.parameterdesc_start_lines)
324 self.entries.append(item)
325
326 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args))
327
328 def emit_unused_warnings(self):
329 """
330 When the parser fails to produce a valid entry, it places some
331 warnings under `entry.warnings` that will be discarded when resetting
332 the state.
333
334 Ensure that those warnings are not lost.
335
336 .. note::
337
338 Because we are calling `config.warning()` here, those
339 warnings are not filtered by the `-W` parameters: they will all
340 be produced even when `-Wreturn`, `-Wshort-desc`, and/or
341 `-Wcontents-before-sections` are used.
342
343 Allowing those warnings to be filtered is complex, because it
344 would require storing them in a buffer and then filtering them
345 during the output step of the code, depending on the
346 selected symbols.
347 """
348 if self.entry and self.entry not in self.entries:
349 for log_msg in self.entry.warnings:
350 self.config.warning(log_msg)
351
352 def reset_state(self, ln):
353 """
354 Ancillary routine to create a new entry. It initializes all
355 variables used by the state machine.
356 """
357
358 self.emit_unused_warnings()
359
360 self.entry = KernelEntry(self.config, self.fname, ln)
361
362 # State flags
363 self.state = state.NORMAL
364
365 def push_parameter(self, ln, decl_type, param, dtype,
366 org_arg, declaration_name):
367 """
368 Store parameters and their descriptions at self.entry.
369 """
370
371 if self.entry.anon_struct_union and dtype == "" and param == "}":
372 return # Ignore the ending }; from anonymous struct/union
373
374 self.entry.anon_struct_union = False
375
376 param = KernRe(r'[\[\)].*').sub('', param, count=1)
377
378 #
379 # Look at various "anonymous type" cases.
380 #
381 if dtype == '':
382 if param.endswith("..."):
383 if len(param) > 3: # there is a name provided, use that
384 param = param[:-3]
385 if not self.entry.parameterdescs.get(param):
386 self.entry.parameterdescs[param] = "variable arguments"
387
388 elif (not param) or param == "void":
389 param = "void"
390 self.entry.parameterdescs[param] = "no arguments"
391
392 elif param in ["struct", "union"]:
393 # Handle unnamed (anonymous) union or struct
394 dtype = param
395 param = "{unnamed_" + param + "}"
396 self.entry.parameterdescs[param] = "anonymous\n"
397 self.entry.anon_struct_union = True
398
399 # Warn if parameter has no description
400 # (but ignore ones starting with # as these are not parameters
401 # but inline preprocessor statements)
402 if param not in self.entry.parameterdescs and not param.startswith("#"):
403 self.entry.parameterdescs[param] = self.undescribed
404
405 if "." not in param:
406 if decl_type == 'function':
407 dname = f"{decl_type} parameter"
408 else:
409 dname = f"{decl_type} member"
410
411 self.emit_msg(ln,
412 f"{dname} '{param}' not described in '{declaration_name}'")
413
414 # Strip spaces from param so that it is one continuous string on
415 # parameterlist. This fixes a problem where check_sections()
416 # cannot find a parameter like "addr[6 + 2]" because it actually
417 # appears as "addr[6", "+", "2]" on the parameter list.
418 # However, it's better to maintain the param string unchanged for
419 # output, so just weaken the string compare in check_sections()
420 # to ignore "[blah" in a parameter string.
421
422 self.entry.parameterlist.append(param)
423 org_arg = KernRe(r'\s\s+').sub(' ', org_arg)
424 self.entry.parametertypes[param] = org_arg
425
426
427 def create_parameter_list(self, ln, decl_type, args,
428 splitter, declaration_name):
429 """
430 Creates a list of parameters, storing them at self.entry.
431 """
432
433 # temporarily replace all commas inside function pointer definition
434 arg_expr = KernRe(r'(\([^\),]+),')
435 while arg_expr.search(args):
436 args = arg_expr.sub(r"\1#", args)
437
438 for arg in args.split(splitter):
439 # Ignore argument attributes
440 arg = KernRe(r'\sPOS0?\s').sub(' ', arg)
441
442 # Strip leading/trailing spaces
443 arg = arg.strip()
444 arg = KernRe(r'\s+').sub(' ', arg, count=1)
445
446 if arg.startswith('#'):
447 # Treat preprocessor directive as a typeless variable just to fill
448 # corresponding data structures "correctly". Catch it later in
449 # output_* subs.
450
451 # Treat preprocessor directive as a typeless variable
452 self.push_parameter(ln, decl_type, arg, "",
453 "", declaration_name)
454 #
455 # The pointer-to-function case.
456 #
457 elif KernRe(r'\(.+\)\s*\(').search(arg):
458 arg = arg.replace('#', ',')
459 r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*"
460 r'([\w\[\].]*)' # Capture the name and possible [array]
461 r'\s*\)') # Make sure the trailing ")" is there
462 if r.match(arg):
463 param = r.group(1)
464 else:
465 self.emit_msg(ln, f"Invalid param: {arg}")
466 param = arg
467 dtype = arg.replace(param, '')
468 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)
469 #
470 # The array-of-pointers case. Dig the parameter name out from the middle
471 # of the declaration.
472 #
473 elif KernRe(r'\(.+\)\s*\[').search(arg):
474 r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*"
475 r'([\w.]*?)' # The actual pointer name
476 r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion]
477 if r.match(arg):
478 param = r.group(1)
479 else:
480 self.emit_msg(ln, f"Invalid param: {arg}")
481 param = arg
482 dtype = arg.replace(param, '')
483 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)
484 elif arg:
485 #
486 # Clean up extraneous spaces and split the string at commas; the first
487 # element of the resulting list will also include the type information.
488 #
489 arg = KernRe(r'\s*:\s*').sub(":", arg)
490 arg = KernRe(r'\s*\[').sub('[', arg)
491 args = KernRe(r'\s*,\s*').split(arg)
492 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0])
493 #
494 # args[0] has a string of "type a". If "a" includes an [array]
495 # declaration, we want to not be fooled by any white space inside
496 # the brackets, so detect and handle that case specially.
497 #
498 r = KernRe(r'^([^[\]]*\s+)(.*)$')
499 if r.match(args[0]):
500 args[0] = r.group(2)
501 dtype = r.group(1)
502 else:
503 # No space in args[0]; this seems wrong but preserves previous behavior
504 dtype = ''
505
506 bitfield_re = KernRe(r'(.*?):(\w+)')
507 for param in args:
508 #
509 # For pointers, shift the star(s) from the variable name to the
510 # type declaration.
511 #
512 r = KernRe(r'^(\*+)\s*(.*)')
513 if r.match(param):
514 self.push_parameter(ln, decl_type, r.group(2),
515 f"{dtype} {r.group(1)}",
516 arg, declaration_name)
517 #
518 # Perform a similar shift for bitfields.
519 #
520 elif bitfield_re.search(param):
521 if dtype != "": # Skip unnamed bit-fields
522 self.push_parameter(ln, decl_type, bitfield_re.group(1),
523 f"{dtype}:{bitfield_re.group(2)}",
524 arg, declaration_name)
525 else:
526 self.push_parameter(ln, decl_type, param, dtype,
527 arg, declaration_name)
528
529 def check_sections(self, ln, decl_name, decl_type):
530 """
531 Check for errors inside sections, emitting warnings if not found
532 parameters are described.
533 """
534 for section in self.entry.sections:
535 if section not in self.entry.parameterlist and \
536 not known_sections.search(section):
537 if decl_type == 'function':
538 dname = f"{decl_type} parameter"
539 else:
540 dname = f"{decl_type} member"
541 self.emit_msg(ln,
542 f"Excess {dname} '{section}' description in '{decl_name}'")
543
544 def check_return_section(self, ln, declaration_name, return_type):
545 """
546 If the function doesn't return void, warns about the lack of a
547 return description.
548 """
549
550 if not self.config.wreturn:
551 return
552
553 # Ignore an empty return type (It's a macro)
554 # Ignore functions with a "void" return type (but not "void *")
555 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type):
556 return
557
558 if not self.entry.sections.get("Return", None):
559 self.emit_msg(ln,
560 f"No description found for return value of '{declaration_name}'")
561
562 def split_struct_proto(self, proto):
563 """
564 Split apart a structure prototype; returns (struct|union, name,
565 members) or ``None``.
566 """
567
568 type_pattern = r'(struct|union)'
569 qualifiers = [
570 "__attribute__",
571 "__packed",
572 "__aligned",
573 "____cacheline_aligned_in_smp",
574 "____cacheline_aligned",
575 ]
576 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?"
577
578 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body)
579 if r.search(proto):
580 return (r.group(1), r.group(2), r.group(3))
581 else:
582 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;')
583 if r.search(proto):
584 return (r.group(1), r.group(3), r.group(2))
585 return None
586
587 def rewrite_struct_members(self, members):
588 """
589 Process ``struct``/``union`` members from the most deeply nested
590 outward.
591
592 Rewrite the members of a ``struct`` or ``union`` for easier formatting
593 later on. Among other things, this function will turn a member like::
594
595 struct { inner_members; } foo;
596
597 into::
598
599 struct foo; inner_members;
600 """
601
602 #
603 # The trick is in the ``^{`` below - it prevents a match of an outer
604 # ``struct``/``union`` until the inner one has been munged
605 # (removing the ``{`` in the process).
606 #
607 struct_members = KernRe(r'(struct|union)' # 0: declaration type
608 r'([^\{\};]+)' # 1: possible name
609 r'(\{)'
610 r'([^\{\}]*)' # 3: Contents of declaration
611 r'(\})'
612 r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration
613 tuples = struct_members.findall(members)
614 while tuples:
615 for t in tuples:
616 newmember = ""
617 oldmember = "".join(t) # Reconstruct the original formatting
618 dtype, name, lbr, content, rbr, rest, semi = t
619 #
620 # Pass through each field name, normalizing the form and formatting.
621 #
622 for s_id in rest.split(','):
623 s_id = s_id.strip()
624 newmember += f"{dtype} {s_id}; "
625 #
626 # Remove bitfield/array/pointer info, getting the bare name.
627 #
628 s_id = KernRe(r'[:\[].*').sub('', s_id)
629 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id)
630 #
631 # Pass through the members of this inner structure/union.
632 #
633 for arg in content.split(';'):
634 arg = arg.strip()
635 #
636 # Look for (type)(*name)(args) - pointer to function
637 #
638 r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)')
639 if r.match(arg):
640 dtype, name, extra = r.group(1), r.group(2), r.group(3)
641 # Pointer-to-function
642 if not s_id:
643 # Anonymous struct/union
644 newmember += f"{dtype}{name}{extra}; "
645 else:
646 newmember += f"{dtype}{s_id}.{name}{extra}; "
647 #
648 # Otherwise a non-function member.
649 #
650 else:
651 #
652 # Remove bitmap and array portions and spaces around commas
653 #
654 arg = KernRe(r':\s*\d+\s*').sub('', arg)
655 arg = KernRe(r'\[.*\]').sub('', arg)
656 arg = KernRe(r'\s*,\s*').sub(',', arg)
657 #
658 # Look for a normal decl - "type name[,name...]"
659 #
660 r = KernRe(r'(.*)\s+([\S+,]+)')
661 if r.search(arg):
662 for name in r.group(2).split(','):
663 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name)
664 if not s_id:
665 # Anonymous struct/union
666 newmember += f"{r.group(1)} {name}; "
667 else:
668 newmember += f"{r.group(1)} {s_id}.{name}; "
669 else:
670 newmember += f"{arg}; "
671 #
672 # At the end of the s_id loop, replace the original declaration with
673 # the munged version.
674 #
675 members = members.replace(oldmember, newmember)
676 #
677 # End of the tuple loop - search again and see if there are outer members
678 # that now turn up.
679 #
680 tuples = struct_members.findall(members)
681 return members
682
683 def format_struct_decl(self, declaration):
684 """
685 Format the ``struct`` declaration into a standard form for inclusion
686 in the resulting docs.
687 """
688
689 #
690 # Insert newlines, get rid of extra spaces.
691 #
692 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration)
693 declaration = KernRe(r'\}\s+;').sub('};', declaration)
694 #
695 # Format inline enums with each member on its own line.
696 #
697 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])')
698 while r.search(declaration):
699 declaration = r.sub(r'\1,\n\2', declaration)
700 #
701 # Now go through and supply the right number of tabs
702 # for each line.
703 #
704 def_args = declaration.split('\n')
705 level = 1
706 declaration = ""
707 for clause in def_args:
708 clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1)
709 if clause:
710 if '}' in clause and level > 1:
711 level -= 1
712 if not clause.startswith('#'):
713 declaration += "\t" * level
714 declaration += "\t" + clause + "\n"
715 if "{" in clause and "}" not in clause:
716 level += 1
717 return declaration
718
719
720 def dump_struct(self, ln, proto, source):
721 """
722 Store an entry for a ``struct`` or ``union``
723 """
724 #
725 # Do the basic parse to get the pieces of the declaration.
726 #
727 source = source
728 proto = trim_private_members(proto)
729 struct_parts = self.split_struct_proto(proto)
730 if not struct_parts:
731 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!")
732 return
733 decl_type, declaration_name, members = struct_parts
734
735 if self.entry.identifier != declaration_name:
736 self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. "
737 f"Prototype was for {decl_type} {declaration_name} instead\n")
738 return
739 #
740 # Go through the list of members applying all of our transformations.
741 #
742 members = self.xforms.apply("struct", members)
743
744 #
745 # Deal with embedded struct and union members, and drop enums entirely.
746 #
747 declaration = members
748 members = self.rewrite_struct_members(members)
749 members = re.sub(r'(\{[^\{\}]*\})', '', members)
750 #
751 # Output the result and we are done.
752 #
753 self.create_parameter_list(ln, decl_type, members, ';',
754 declaration_name)
755 self.check_sections(ln, declaration_name, decl_type)
756 self.output_declaration(decl_type, declaration_name,
757 source=source,
758 definition=self.format_struct_decl(declaration),
759 purpose=self.entry.declaration_purpose)
760
761 def dump_enum(self, ln, proto, source):
762 """
763 Store an ``enum`` inside self.entries array.
764 """
765 #
766 # Strip preprocessor directives. Note that this depends on the
767 # trailing semicolon we added in process_proto_type().
768 #
769 source = source
770 proto = trim_private_members(proto)
771 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto)
772 #
773 # Parse out the name and members of the enum. Typedef form first.
774 #
775 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;')
776 if r.search(proto):
777 declaration_name = r.group(2)
778 members = r.group(1)
779 #
780 # Failing that, look for a straight enum
781 #
782 else:
783 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}')
784 if r.match(proto):
785 declaration_name = r.group(1)
786 members = r.group(2)
787 #
788 # OK, this isn't going to work.
789 #
790 else:
791 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!")
792 return
793 #
794 # Make sure we found what we were expecting.
795 #
796 if self.entry.identifier != declaration_name:
797 if self.entry.identifier == "":
798 self.emit_msg(ln,
799 f"{proto}: wrong kernel-doc identifier on prototype")
800 else:
801 self.emit_msg(ln,
802 f"expecting prototype for enum {self.entry.identifier}. "
803 f"Prototype was for enum {declaration_name} instead")
804 return
805
806 if not declaration_name:
807 declaration_name = "(anonymous)"
808 #
809 # Parse out the name of each enum member, and verify that we
810 # have a description for it.
811 #
812 member_set = set()
813 members = KernRe(r'\([^;)]*\)').sub('', members)
814 for arg in members.split(','):
815 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg)
816 if not arg.strip():
817 continue
818
819 self.entry.parameterlist.append(arg)
820 if arg not in self.entry.parameterdescs:
821 self.entry.parameterdescs[arg] = self.undescribed
822 self.emit_msg(ln,
823 f"Enum value '{arg}' not described in enum '{declaration_name}'")
824 member_set.add(arg)
825 #
826 # Ensure that every described member actually exists in the enum.
827 #
828 for k in self.entry.parameterdescs:
829 if k not in member_set:
830 self.emit_msg(ln,
831 f"Excess enum value '@{k}' description in '{declaration_name}'")
832
833 self.output_declaration('enum', declaration_name,
834 source=source,
835 purpose=self.entry.declaration_purpose)
836
837 def dump_var(self, ln, proto, source):
838 """
839 Store variables that are part of kAPI.
840 """
841 VAR_ATTRIBS = [
842 "extern",
843 "const",
844 ]
845 OPTIONAL_VAR_ATTR = r"^(?:\b(?:" +"|".join(VAR_ATTRIBS) +r")\b\s*)*"
846
847 #
848 # Store the full prototype before modifying it
849 #
850 source = source
851 full_proto = proto
852 declaration_name = None
853
854 #
855 # Handle macro definitions
856 #
857 macro_prefixes = [
858 KernRe(r"DEFINE_[\w_]+\s*\(([\w_]+)\)"),
859 ]
860
861 for r in macro_prefixes:
862 match = r.search(proto)
863 if match:
864 declaration_name = match.group(1)
865 break
866
867 #
868 # Drop comments and macros to have a pure C prototype
869 #
870 if not declaration_name:
871 proto = self.xforms.apply("var", proto)
872
873 proto = proto.rstrip()
874
875 #
876 # Variable name is at the end of the declaration
877 #
878
879 default_val = None
880
881 r= KernRe(OPTIONAL_VAR_ATTR + r"\s*[\w_\s]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?")
882 if r.match(proto):
883 if not declaration_name:
884 declaration_name = r.group(1)
885
886 default_val = r.group(2)
887 else:
888 r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_\s]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")
889
890 if r.match(proto):
891 default_val = r.group(1)
892 if not declaration_name:
893 self.emit_msg(ln,f"{proto}: can't parse variable")
894 return
895
896 if default_val:
897 default_val = default_val.lstrip("=").strip()
898
899 self.output_declaration("var", declaration_name,
900 source=source,
901 full_proto=full_proto,
902 default_val=default_val,
903 purpose=self.entry.declaration_purpose)
904
905 def dump_declaration(self, ln, prototype, source):
906 """
907 Store a data declaration inside self.entries array.
908 """
909
910 if self.entry.decl_type == "enum":
911 self.dump_enum(ln, prototype, source)
912 elif self.entry.decl_type == "typedef":
913 self.dump_typedef(ln, prototype, source)
914 elif self.entry.decl_type in ["union", "struct"]:
915 self.dump_struct(ln, prototype, source)
916 elif self.entry.decl_type == "var":
917 self.dump_var(ln, prototype, source)
918 else:
919 # This would be a bug
920 self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}')
921
922 def dump_function(self, ln, prototype, source):
923 """
924 Store a function or function macro inside self.entries array.
925 """
926
927 source = source
928 found = func_macro = False
929 return_type = ''
930 decl_type = 'function'
931
932 #
933 # If we have a macro, remove the "#define" at the front.
934 #
935 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype)
936 if new_proto != prototype:
937 prototype = new_proto
938 #
939 # Dispense with the simple "#define A B" case here; the key
940 # is the space after the name of the symbol being defined.
941 # NOTE that the seemingly misnamed "func_macro" indicates a
942 # macro *without* arguments.
943 #
944 r = KernRe(r'^(\w+)\s+')
945 if r.search(prototype):
946 return_type = ''
947 declaration_name = r.group(1)
948 func_macro = True
949 found = True
950 else:
951 #
952 # Apply the initial transformations.
953 #
954 prototype = self.xforms.apply("func", prototype)
955
956 # Yes, this truly is vile. We are looking for:
957 # 1. Return type (may be nothing if we're looking at a macro)
958 # 2. Function name
959 # 3. Function parameters.
960 #
961 # All the while we have to watch out for function pointer parameters
962 # (which IIRC is what the two sections are for), C types (these
963 # regexps don't even start to express all the possibilities), and
964 # so on.
965 #
966 # If you mess with these regexps, it's a good idea to check that
967 # the following functions' documentation still comes out right:
968 # - parport_register_device (function pointer parameters)
969 # - atomic_set (macro)
970 # - pci_match_device, __copy_to_user (long return type)
971
972 name = r'\w+'
973 type1 = r'(?:[\w\s]+)?'
974 type2 = r'(?:[\w\s]+\*+)+'
975 #
976 # Attempt to match first on (args) with no internal parentheses; this
977 # lets us easily filter out __acquires() and other post-args stuff. If
978 # that fails, just grab the rest of the line to the last closing
979 # parenthesis.
980 #
981 proto_args = r'\(([^\(]*|.*)\)'
982 #
983 # (Except for the simple macro case) attempt to split up the prototype
984 # in the various ways we understand.
985 #
986 if not found:
987 patterns = [
988 rf'^()({name})\s*{proto_args}',
989 rf'^({type1})\s+({name})\s*{proto_args}',
990 rf'^({type2})\s*({name})\s*{proto_args}',
991 ]
992
993 for p in patterns:
994 r = KernRe(p)
995 if r.match(prototype):
996 return_type = r.group(1)
997 declaration_name = r.group(2)
998 args = r.group(3)
999 self.create_parameter_list(ln, decl_type, args, ',',
1000 declaration_name)
1001 found = True
1002 break
1003 #
1004 # Parsing done; make sure that things are as we expect.
1005 #
1006 if not found:
1007 self.emit_msg(ln,
1008 f"cannot understand function prototype: '{prototype}'")
1009 return
1010 if self.entry.identifier != declaration_name:
1011 self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). "
1012 f"Prototype was for {declaration_name}() instead")
1013 return
1014 self.check_sections(ln, declaration_name, "function")
1015 self.check_return_section(ln, declaration_name, return_type)
1016 #
1017 # Store the result.
1018 #
1019 self.output_declaration(decl_type, declaration_name,
1020 source=source,
1021 typedef=('typedef' in return_type),
1022 functiontype=return_type,
1023 purpose=self.entry.declaration_purpose,
1024 func_macro=func_macro)
1025
1026
1027 def dump_typedef(self, ln, proto, source):
1028 """
1029 Store a ``typedef`` inside self.entries array.
1030 """
1031 #
1032 # We start by looking for function typedefs.
1033 #
1034 typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*'
1035 typedef_ident = r'\*?\s*(\w\S+)\s*'
1036 typedef_args = r'\s*\((.*)\);'
1037
1038 source = source
1039
1040 typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args)
1041 typedef2 = KernRe(typedef_type + typedef_ident + typedef_args)
1042
1043 # Parse function typedef prototypes
1044 for r in [typedef1, typedef2]:
1045 if not r.match(proto):
1046 continue
1047
1048 return_type = r.group(1).strip()
1049 declaration_name = r.group(2)
1050 args = r.group(3)
1051
1052 if self.entry.identifier != declaration_name:
1053 self.emit_msg(ln,
1054 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
1055 return
1056
1057 self.create_parameter_list(ln, 'function', args, ',', declaration_name)
1058
1059 self.output_declaration('function', declaration_name,
1060 source=source,
1061 typedef=True,
1062 functiontype=return_type,
1063 purpose=self.entry.declaration_purpose)
1064 return
1065 #
1066 # Not a function, try to parse a simple typedef.
1067 #
1068 r = KernRe(r'typedef.*\s+(\w+)\s*;')
1069 if r.match(proto):
1070 declaration_name = r.group(1)
1071
1072 if self.entry.identifier != declaration_name:
1073 self.emit_msg(ln,
1074 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
1075 return
1076
1077 self.output_declaration('typedef', declaration_name,
1078 source=source,
1079 purpose=self.entry.declaration_purpose)
1080 return
1081
1082 self.emit_msg(ln, "error: Cannot parse typedef!")
1083
1084 @staticmethod
1085 def process_export(function_set, line):
1086 """
1087 process ``EXPORT_SYMBOL*`` tags
1088
1089 This method doesn't use any variable from the class, so declare it
1090 with a staticmethod decorator.
1091 """
1092
1093 # We support documenting some exported symbols with different
1094 # names. A horrible hack.
1095 suffixes = [ '_noprof' ]
1096
1097 # Note: it accepts only one EXPORT_SYMBOL* per line, as having
1098 # multiple export lines would violate Kernel coding style.
1099
1100 if export_symbol.search(line):
1101 symbol = export_symbol.group(2)
1102 elif export_symbol_ns.search(line):
1103 symbol = export_symbol_ns.group(2)
1104 else:
1105 return False
1106 #
1107 # Found an export, trim out any special suffixes
1108 #
1109 for suffix in suffixes:
1110 # Be backward compatible with Python < 3.9
1111 if symbol.endswith(suffix):
1112 symbol = symbol[:-len(suffix)]
1113 function_set.add(symbol)
1114 return True
1115
1116 def process_normal(self, ln, line, source):
1117 """
1118 STATE_NORMAL: looking for the ``/**`` to begin everything.
1119 """
1120
1121 if not doc_start.match(line):
1122 return
1123
1124 # start a new entry
1125 self.reset_state(ln)
1126
1127 # next line is always the function name
1128 self.state = state.NAME
1129
1130 def process_name(self, ln, line, source):
1131 """
1132 STATE_NAME: Looking for the "name - description" line
1133 """
1134 #
1135 # Check for a DOC: block and handle them specially.
1136 #
1137 if doc_block.search(line):
1138
1139 if not doc_block.group(1):
1140 self.entry.begin_section(ln, "Introduction")
1141 else:
1142 self.entry.begin_section(ln, doc_block.group(1))
1143
1144 self.entry.identifier = self.entry.section
1145 self.state = state.DOCBLOCK
1146 #
1147 # Otherwise we're looking for a normal kerneldoc declaration line.
1148 #
1149 elif doc_decl.search(line):
1150 self.entry.identifier = doc_decl.group(1)
1151
1152 # Test for data declaration
1153 if doc_begin_data.search(line):
1154 self.entry.decl_type = doc_begin_data.group(1)
1155 self.entry.identifier = doc_begin_data.group(2)
1156 #
1157 # Look for a function description
1158 #
1159 elif doc_begin_func.search(line):
1160 self.entry.identifier = doc_begin_func.group(1)
1161 self.entry.decl_type = "function"
1162 #
1163 # We struck out.
1164 #
1165 else:
1166 self.emit_msg(ln,
1167 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}")
1168 self.state = state.NORMAL
1169 return
1170 #
1171 # OK, set up for a new kerneldoc entry.
1172 #
1173 self.state = state.BODY
1174 self.entry.identifier = self.entry.identifier.strip(" ")
1175 # if there's no @param blocks need to set up default section here
1176 self.entry.begin_section(ln + 1)
1177 #
1178 # Find the description portion, which *should* be there but
1179 # isn't always.
1180 # (We should be able to capture this from the previous parsing - someday)
1181 #
1182 r = KernRe("[-:](.*)")
1183 if r.search(line):
1184 self.entry.declaration_purpose = trim_whitespace(r.group(1))
1185 self.state = state.DECLARATION
1186 else:
1187 self.entry.declaration_purpose = ""
1188
1189 if not self.entry.declaration_purpose and self.config.wshort_desc:
1190 self.emit_msg(ln,
1191 f"missing initial short description on line:\n{line}")
1192
1193 if not self.entry.identifier and self.entry.decl_type != "enum":
1194 self.emit_msg(ln,
1195 f"wrong kernel-doc identifier on line:\n{line}")
1196 self.state = state.NORMAL
1197
1198 if self.config.verbose:
1199 self.emit_msg(ln,
1200 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}",
1201 warning=False)
1202 #
1203 # Failed to find an identifier. Emit a warning
1204 #
1205 else:
1206 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}")
1207
1208 def is_new_section(self, ln, line):
1209 """
1210 Helper function to determine if a new section is being started.
1211 """
1212 if doc_sect.search(line):
1213 self.state = state.BODY
1214 #
1215 # Pick out the name of our new section, tweaking it if need be.
1216 #
1217 newsection = doc_sect.group(1)
1218 if newsection.lower() == 'description':
1219 newsection = 'Description'
1220 elif newsection.lower() == 'context':
1221 newsection = 'Context'
1222 self.state = state.SPECIAL_SECTION
1223 elif newsection.lower() in ["@return", "@returns",
1224 "return", "returns"]:
1225 newsection = "Return"
1226 self.state = state.SPECIAL_SECTION
1227 elif newsection[0] == '@':
1228 self.state = state.SPECIAL_SECTION
1229 #
1230 # Initialize the contents, and get the new section going.
1231 #
1232 newcontents = doc_sect.group(2)
1233 if not newcontents:
1234 newcontents = ""
1235 self.dump_section()
1236 self.entry.begin_section(ln, newsection)
1237 self.entry.leading_space = None
1238
1239 self.entry.add_text(newcontents.lstrip())
1240 return True
1241 return False
1242
1243 def is_comment_end(self, ln, line):
1244 """
1245 Helper function to detect (and effect) the end of a kerneldoc comment.
1246 """
1247 if doc_end.search(line):
1248 self.dump_section()
1249
1250 # Look for doc_com + <text> + doc_end:
1251 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/')
1252 if r.match(line):
1253 self.emit_msg(ln, f"suspicious ending line: {line}")
1254
1255 self.entry.prototype = ""
1256 self.entry.new_start_line = ln + 1
1257
1258 self.state = state.PROTO
1259 return True
1260 return False
1261
1262
1263 def process_decl(self, ln, line, source):
1264 """
1265 STATE_DECLARATION: We've seen the beginning of a declaration.
1266 """
1267 if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
1268 return
1269 #
1270 # Look for anything with the " * " line beginning.
1271 #
1272 if doc_content.search(line):
1273 cont = doc_content.group(1)
1274 #
1275 # A blank line means that we have moved out of the declaration
1276 # part of the comment (without any "special section" parameter
1277 # descriptions).
1278 #
1279 if cont == "":
1280 self.state = state.BODY
1281 #
1282 # Otherwise we have more of the declaration section to soak up.
1283 #
1284 else:
1285 self.entry.declaration_purpose = \
1286 trim_whitespace(self.entry.declaration_purpose + ' ' + cont)
1287 else:
1288 # Unknown line, ignore
1289 self.emit_msg(ln, f"bad line: {line}")
1290
1291
1292 def process_special(self, ln, line, source):
1293 """
1294 STATE_SPECIAL_SECTION: a section ending with a blank line.
1295 """
1296 #
1297 # If we have hit a blank line (only the " * " marker), then this
1298 # section is done.
1299 #
1300 if KernRe(r"\s*\*\s*$").match(line):
1301 self.entry.begin_section(ln, dump = True)
1302 self.state = state.BODY
1303 return
1304 #
1305 # Not a blank line, look for the other ways to end the section.
1306 #
1307 if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
1308 return
1309 #
1310 # OK, we should have a continuation of the text for this section.
1311 #
1312 if doc_content.search(line):
1313 cont = doc_content.group(1)
1314 #
1315 # If the lines of text after the first in a special section have
1316 # leading white space, we need to trim it out or Sphinx will get
1317 # confused. For the second line (the None case), see what we
1318 # find there and remember it.
1319 #
1320 if self.entry.leading_space is None:
1321 r = KernRe(r'^(\s+)')
1322 if r.match(cont):
1323 self.entry.leading_space = len(r.group(1))
1324 else:
1325 self.entry.leading_space = 0
1326 #
1327 # Otherwise, before trimming any leading chars, be *sure*
1328 # that they are white space. We should maybe warn if this
1329 # isn't the case.
1330 #
1331 for i in range(0, self.entry.leading_space):
1332 if cont[i] != " ":
1333 self.entry.leading_space = i
1334 break
1335 #
1336 # Add the trimmed result to the section and we're done.
1337 #
1338 self.entry.add_text(cont[self.entry.leading_space:])
1339 else:
1340 # Unknown line, ignore
1341 self.emit_msg(ln, f"bad line: {line}")
1342
1343 def process_body(self, ln, line, source):
1344 """
1345 STATE_BODY: the bulk of a kerneldoc comment.
1346 """
1347 if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
1348 return
1349
1350 if doc_content.search(line):
1351 cont = doc_content.group(1)
1352 self.entry.add_text(cont)
1353 else:
1354 # Unknown line, ignore
1355 self.emit_msg(ln, f"bad line: {line}")
1356
1357 def process_inline_name(self, ln, line, source):
1358 """STATE_INLINE_NAME: beginning of docbook comments within a prototype."""
1359
1360 if doc_inline_sect.search(line):
1361 self.entry.begin_section(ln, doc_inline_sect.group(1))
1362 self.entry.add_text(doc_inline_sect.group(2).lstrip())
1363 self.state = state.INLINE_TEXT
1364 elif doc_inline_end.search(line):
1365 self.dump_section()
1366 self.state = state.PROTO
1367 elif doc_content.search(line):
1368 self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}")
1369 self.state = state.PROTO
1370
1371 #
1372 # Don't let it add partial comments at the code, as breaks the
1373 # logic meant to remove comments from prototypes.
1374 #
1375 self.process_proto_type(ln, "/**\n" + line, source)
1376 # else ... ??
1377
1378 def process_inline_text(self, ln, line, source):
1379 """STATE_INLINE_TEXT: docbook comments within a prototype."""
1380
1381 if doc_inline_end.search(line):
1382 self.dump_section()
1383 self.state = state.PROTO
1384 elif doc_content.search(line):
1385 self.entry.add_text(doc_content.group(1))
1386 # else ... ??
1387
1388 def syscall_munge(self, ln, proto): # pylint: disable=W0613
1389 """
1390 Handle syscall definitions.
1391 """
1392
1393 is_void = False
1394
1395 # Strip newlines/CR's
1396 proto = re.sub(r'[\r\n]+', ' ', proto)
1397
1398 # Check if it's a SYSCALL_DEFINE0
1399 if 'SYSCALL_DEFINE0' in proto:
1400 is_void = True
1401
1402 # Replace SYSCALL_DEFINE with correct return type & function name
1403 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto)
1404
1405 r = KernRe(r'long\s+(sys_.*?),')
1406 if r.search(proto):
1407 proto = KernRe(',').sub('(', proto, count=1)
1408 elif is_void:
1409 proto = KernRe(r'\)').sub('(void)', proto, count=1)
1410
1411 # Now delete all of the odd-numbered commas in the proto
1412 # so that argument types & names don't have a comma between them
1413 count = 0
1414 length = len(proto)
1415
1416 if is_void:
1417 length = 0 # skip the loop if is_void
1418
1419 for ix in range(length):
1420 if proto[ix] == ',':
1421 count += 1
1422 if count % 2 == 1:
1423 proto = proto[:ix] + ' ' + proto[ix + 1:]
1424
1425 return proto
1426
1427 def tracepoint_munge(self, ln, proto):
1428 """
1429 Handle tracepoint definitions.
1430 """
1431
1432 tracepointname = None
1433 tracepointargs = None
1434
1435 # Match tracepoint name based on different patterns
1436 r = KernRe(r'TRACE_EVENT\((.*?),')
1437 if r.search(proto):
1438 tracepointname = r.group(1)
1439
1440 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),')
1441 if r.search(proto):
1442 tracepointname = r.group(1)
1443
1444 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),')
1445 if r.search(proto):
1446 tracepointname = r.group(2)
1447
1448 if tracepointname:
1449 tracepointname = tracepointname.lstrip()
1450
1451 r = KernRe(r'TP_PROTO\((.*?)\)')
1452 if r.search(proto):
1453 tracepointargs = r.group(1)
1454
1455 if not tracepointname or not tracepointargs:
1456 self.emit_msg(ln,
1457 f"Unrecognized tracepoint format:\n{proto}\n")
1458 else:
1459 proto = f"static inline void trace_{tracepointname}({tracepointargs})"
1460 self.entry.identifier = f"trace_{self.entry.identifier}"
1461
1462 return proto
1463
1464 def process_proto_function(self, ln, line, source):
1465 """Ancillary routine to process a function prototype."""
1466
1467 # strip C99-style comments to end of line
1468 line = KernRe(r"//.*$", re.S).sub('', line)
1469 #
1470 # Soak up the line's worth of prototype text, stopping at { or ; if present.
1471 #
1472 if KernRe(r'\s*#\s*define').match(line):
1473 self.entry.prototype = line
1474 elif not line.startswith('#'): # skip other preprocessor stuff
1475 r = KernRe(r'([^\{]*)')
1476 if r.match(line):
1477 self.entry.prototype += r.group(1) + " "
1478 #
1479 # If we now have the whole prototype, clean it up and declare victory.
1480 #
1481 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line):
1482 # strip comments and surrounding spaces
1483 self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip()
1484 #
1485 # Handle self.entry.prototypes for function pointers like:
1486 # int (*pcs_config)(struct foo)
1487 # by turning it into
1488 # int pcs_config(struct foo)
1489 #
1490 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)')
1491 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype)
1492 #
1493 # Handle special declaration syntaxes
1494 #
1495 if 'SYSCALL_DEFINE' in self.entry.prototype:
1496 self.entry.prototype = self.syscall_munge(ln,
1497 self.entry.prototype)
1498 else:
1499 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT')
1500 if r.search(self.entry.prototype):
1501 self.entry.prototype = self.tracepoint_munge(ln,
1502 self.entry.prototype)
1503 #
1504 # ... and we're done
1505 #
1506 self.dump_function(ln, self.entry.prototype, source)
1507 self.reset_state(ln)
1508
1509 def process_proto_type(self, ln, line, source):
1510 """
1511 Ancillary routine to process a type.
1512 """
1513
1514 # Strip C99-style comments and surrounding whitespace
1515 line = KernRe(r"//.*$", re.S).sub('', line).strip()
1516 if not line:
1517 return # nothing to see here
1518
1519 # To distinguish preprocessor directive from regular declaration later.
1520 if line.startswith('#'):
1521 line += ";"
1522 #
1523 # Split the declaration on any of { } or ;, and accumulate pieces
1524 # until we hit a semicolon while not inside {brackets}
1525 #
1526 r = KernRe(r'(.*?)([{};])')
1527 for chunk in r.split(line):
1528 if chunk: # Ignore empty matches
1529 self.entry.prototype += chunk
1530 #
1531 # This cries out for a match statement ... someday after we can
1532 # drop Python 3.9 ...
1533 #
1534 if chunk == '{':
1535 self.entry.brcount += 1
1536 elif chunk == '}':
1537 self.entry.brcount -= 1
1538 elif chunk == ';' and self.entry.brcount <= 0:
1539 self.dump_declaration(ln, self.entry.prototype, source)
1540 self.reset_state(ln)
1541 return
1542 #
1543 # We hit the end of the line while still in the declaration; put
1544 # in a space to represent the newline.
1545 #
1546 self.entry.prototype += ' '
1547
1548 def process_proto(self, ln, line, source):
1549 """STATE_PROTO: reading a function/whatever prototype."""
1550
1551 if doc_inline_oneline.search(line):
1552 self.entry.begin_section(ln, doc_inline_oneline.group(1))
1553 self.entry.add_text(doc_inline_oneline.group(2))
1554 self.dump_section()
1555
1556 elif doc_inline_start.search(line):
1557 self.state = state.INLINE_NAME
1558
1559 elif self.entry.decl_type == 'function':
1560 self.process_proto_function(ln, line, source)
1561
1562 else:
1563 self.process_proto_type(ln, line, source)
1564
1565 def process_docblock(self, ln, line, source):
1566 """STATE_DOCBLOCK: within a ``DOC:`` block."""
1567
1568 if doc_end.search(line):
1569 self.dump_section()
1570 self.output_declaration("doc", self.entry.identifier,
1571 source=source)
1572 self.reset_state(ln)
1573
1574 elif doc_content.search(line):
1575 self.entry.add_text(doc_content.group(1))
1576
1577 def parse_export(self):
1578 """
1579 Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file.
1580 """
1581
1582 export_table = set()
1583
1584 try:
1585 with open(self.fname, "r", encoding="utf8",
1586 errors="backslashreplace") as fp:
1587
1588 for line in fp:
1589 self.process_export(export_table, line)
1590
1591 except IOError:
1592 return None
1593
1594 return export_table
1595
1596 #: The state/action table telling us which function to invoke in each state.
1597 state_actions = {
1598 state.NORMAL: process_normal,
1599 state.NAME: process_name,
1600 state.BODY: process_body,
1601 state.DECLARATION: process_decl,
1602 state.SPECIAL_SECTION: process_special,
1603 state.INLINE_NAME: process_inline_name,
1604 state.INLINE_TEXT: process_inline_text,
1605 state.PROTO: process_proto,
1606 state.DOCBLOCK: process_docblock,
1607 }
1608
1609 def parse_kdoc(self):
1610 """
1611 Open and process each line of a C source file.
1612 The parsing is controlled via a state machine, and the line is passed
1613 to a different process function depending on the state. The process
1614 function may update the state as needed.
1615
1616 Besides parsing kernel-doc tags, it also parses export symbols.
1617 """
1618
1619 prev = ""
1620 prev_ln = None
1621 export_table = set()
1622 self.state = state.NORMAL
1623 source = ""
1624
1625 try:
1626 with open(self.fname, "r", encoding="utf8",
1627 errors="backslashreplace") as fp:
1628 for ln, line in enumerate(fp):
1629
1630 line = line.expandtabs().strip("\n")
1631
1632 # Group continuation lines on prototypes
1633 if self.state == state.PROTO:
1634 if line.endswith("\\"):
1635 prev += line.rstrip("\\")
1636 if not prev_ln:
1637 prev_ln = ln
1638 continue
1639
1640 if prev:
1641 ln = prev_ln
1642 line = prev + line
1643 prev = ""
1644 prev_ln = None
1645
1646 self.config.log.debug("%d %s: %s",
1647 ln, state.name[self.state],
1648 line)
1649
1650 if self.store_src:
1651 if source and self.state == state.NORMAL:
1652 source = ""
1653 elif self.state != state.NORMAL:
1654 source += line + "\n"
1655
1656 # This is an optimization over the original script.
1657 # There, when export_file was used for the same file,
1658 # it was read twice. Here, we use the already-existing
1659 # loop to parse exported symbols as well.
1660 #
1661 if (self.state != state.NORMAL) or \
1662 not self.process_export(export_table, line):
1663 prev_state = self.state
1664 # Hand this line to the appropriate state handler
1665 self.state_actions[self.state](self, ln, line, source)
1666 if prev_state == state.NORMAL and self.state != state.NORMAL:
1667 source += line + "\n"
1668
1669 self.emit_unused_warnings()
1670
1671 except OSError:
1672 self.config.log.error(f"Error: Cannot open file {self.fname}")
1673
1674 return export_table, self.entries