Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

docs: kdoc: better handle source when producing YAML output

The current logic was storing symbols source code on a list,
not linked to the actual KdocItem. While this works fine when
kernel-doc markups are OK, on places where there is a "/**"
without a valid kernel-doc markup, it ends that the 1:1 match
between source code and KdocItem doesn't happen, causing
problems to generate the YAML output.

Fix it by storing the source code directly into the KdocItem
structure.

This shouldn't affect performance or memory footprint, except
when --yaml option is used.

While here, add a __repr__() function for KdocItem, as it
helps debugging it.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <77902dafabb5c3250486aa2dc1568d5fafa95c5b.1774256269.git.mchehab+huawei@kernel.org>

authored by

Mauro Carvalho Chehab and committed by
Jonathan Corbet
99ec67a9 8326e4a2

+79 -72
+1 -7
tools/lib/python/kdoc/kdoc_files.py
··· 203 203 204 204 self.results[fname] = entries 205 205 206 - source = doc.get_source() 207 - if source: 208 - self.source[fname] = source 209 - 210 206 def process_export_file(self, fname): 211 207 """ 212 208 Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file. ··· 290 294 291 295 self.errors = 0 292 296 self.results = {} 293 - self.source = {} 294 297 295 298 self.files = set() 296 299 self.export_files = set() ··· 359 364 function_table, enable_lineno, 360 365 no_doc_sections) 361 366 362 - self.test_file.output_symbols(fname, symbols, 363 - self.source.get(fname)) 367 + self.test_file.output_symbols(fname, symbols) 364 368 365 369 continue 366 370
+5 -1
tools/lib/python/kdoc/kdoc_item.py
··· 14 14 then pass into the output modules. 15 15 """ 16 16 17 - def __init__(self, name, fname, type, start_line, **other_stuff): 17 + def __init__(self, name, fname, type, start_line, 18 + **other_stuff): 18 19 self.name = name 19 20 self.fname = fname 20 21 self.type = type ··· 60 59 61 60 def __getitem__(self, key): 62 61 return self.get(key) 62 + 63 + def __repr__(self): 64 + return f"KdocItem({self.name}, {self.fname}, {self.type}, {self.declaration_start_line})" 63 65 64 66 @classmethod 65 67 def from_dict(cls, d):
+50 -50
tools/lib/python/kdoc/kdoc_parser.py
··· 265 265 # Place all potential outputs into an array 266 266 self.entries = [] 267 267 268 - # When store_src is true, the kernel-doc source content is stored here 269 - self.source = None 270 - 271 268 # 272 269 # We need Python 3.7 for its "dicts remember the insertion 273 270 # order" guarantee ··· 717 720 return declaration 718 721 719 722 720 - def dump_struct(self, ln, proto): 723 + def dump_struct(self, ln, proto, source): 721 724 """ 722 725 Store an entry for a ``struct`` or ``union`` 723 726 """ 724 727 # 725 728 # Do the basic parse to get the pieces of the declaration. 726 729 # 730 + source = source 727 731 proto = trim_private_members(proto) 728 732 struct_parts = self.split_struct_proto(proto) 729 733 if not struct_parts: ··· 754 756 declaration_name) 755 757 self.check_sections(ln, declaration_name, decl_type) 756 758 self.output_declaration(decl_type, declaration_name, 759 + source=source, 757 760 definition=self.format_struct_decl(declaration), 758 761 purpose=self.entry.declaration_purpose) 759 762 760 - def dump_enum(self, ln, proto): 763 + def dump_enum(self, ln, proto, source): 761 764 """ 762 765 Store an ``enum`` inside self.entries array. 763 766 """ ··· 766 767 # Strip preprocessor directives. Note that this depends on the 767 768 # trailing semicolon we added in process_proto_type(). 768 769 # 770 + source = source 769 771 proto = trim_private_members(proto) 770 772 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) 771 773 # ··· 831 831 f"Excess enum value '@{k}' description in '{declaration_name}'") 832 832 833 833 self.output_declaration('enum', declaration_name, 834 + source=source, 834 835 purpose=self.entry.declaration_purpose) 835 836 836 - def dump_var(self, ln, proto): 837 + def dump_var(self, ln, proto, source): 837 838 """ 838 839 Store variables that are part of kAPI. 839 840 """ ··· 847 846 # 848 847 # Store the full prototype before modifying it 849 848 # 849 + source = source 850 850 full_proto = proto 851 851 declaration_name = None 852 852 ··· 897 895 default_val = default_val.lstrip("=").strip() 898 896 899 897 self.output_declaration("var", declaration_name, 898 + source=source, 900 899 full_proto=full_proto, 901 900 default_val=default_val, 902 901 purpose=self.entry.declaration_purpose) 903 902 904 - def dump_declaration(self, ln, prototype): 903 + def dump_declaration(self, ln, prototype, source): 905 904 """ 906 905 Store a data declaration inside self.entries array. 907 906 """ 908 907 909 908 if self.entry.decl_type == "enum": 910 - self.dump_enum(ln, prototype) 909 + self.dump_enum(ln, prototype, source) 911 910 elif self.entry.decl_type == "typedef": 912 - self.dump_typedef(ln, prototype) 911 + self.dump_typedef(ln, prototype, source) 913 912 elif self.entry.decl_type in ["union", "struct"]: 914 - self.dump_struct(ln, prototype) 913 + self.dump_struct(ln, prototype, source) 915 914 elif self.entry.decl_type == "var": 916 - self.dump_var(ln, prototype) 915 + self.dump_var(ln, prototype, source) 917 916 else: 918 917 # This would be a bug 919 918 self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') 920 919 921 - def dump_function(self, ln, prototype): 920 + def dump_function(self, ln, prototype, source): 922 921 """ 923 922 Store a function or function macro inside self.entries array. 924 923 """ 925 924 925 + source = source 926 926 found = func_macro = False 927 927 return_type = '' 928 928 decl_type = 'function' ··· 1017 1013 # Store the result. 1018 1014 # 1019 1015 self.output_declaration(decl_type, declaration_name, 1016 + source=source, 1020 1017 typedef=('typedef' in return_type), 1021 1018 functiontype=return_type, 1022 1019 purpose=self.entry.declaration_purpose, 1023 1020 func_macro=func_macro) 1024 1021 1025 1022 1026 - def dump_typedef(self, ln, proto): 1023 + def dump_typedef(self, ln, proto, source): 1027 1024 """ 1028 1025 Store a ``typedef`` inside self.entries array. 1029 1026 """ ··· 1034 1029 typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' 1035 1030 typedef_ident = r'\*?\s*(\w\S+)\s*' 1036 1031 typedef_args = r'\s*\((.*)\);' 1032 + 1033 + source = source 1037 1034 1038 1035 typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) 1039 1036 typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) ··· 1057 1050 self.create_parameter_list(ln, 'function', args, ',', declaration_name) 1058 1051 1059 1052 self.output_declaration('function', declaration_name, 1053 + source=source, 1060 1054 typedef=True, 1061 1055 functiontype=return_type, 1062 1056 purpose=self.entry.declaration_purpose) ··· 1075 1067 return 1076 1068 1077 1069 self.output_declaration('typedef', declaration_name, 1070 + source=source, 1078 1071 purpose=self.entry.declaration_purpose) 1079 1072 return 1080 1073 ··· 1113 1104 function_set.add(symbol) 1114 1105 return True 1115 1106 1116 - def process_normal(self, ln, line): 1107 + def process_normal(self, ln, line, source): 1117 1108 """ 1118 1109 STATE_NORMAL: looking for the ``/**`` to begin everything. 1119 1110 """ ··· 1127 1118 # next line is always the function name 1128 1119 self.state = state.NAME 1129 1120 1130 - def process_name(self, ln, line): 1121 + def process_name(self, ln, line, source): 1131 1122 """ 1132 1123 STATE_NAME: Looking for the "name - description" line 1133 1124 """ ··· 1260 1251 return False 1261 1252 1262 1253 1263 - def process_decl(self, ln, line): 1254 + def process_decl(self, ln, line, source): 1264 1255 """ 1265 1256 STATE_DECLARATION: We've seen the beginning of a declaration. 1266 1257 """ ··· 1289 1280 self.emit_msg(ln, f"bad line: {line}") 1290 1281 1291 1282 1292 - def process_special(self, ln, line): 1283 + def process_special(self, ln, line, source): 1293 1284 """ 1294 1285 STATE_SPECIAL_SECTION: a section ending with a blank line. 1295 1286 """ ··· 1340 1331 # Unknown line, ignore 1341 1332 self.emit_msg(ln, f"bad line: {line}") 1342 1333 1343 - def process_body(self, ln, line): 1334 + def process_body(self, ln, line, source): 1344 1335 """ 1345 1336 STATE_BODY: the bulk of a kerneldoc comment. 1346 1337 """ ··· 1354 1345 # Unknown line, ignore 1355 1346 self.emit_msg(ln, f"bad line: {line}") 1356 1347 1357 - def process_inline_name(self, ln, line): 1348 + def process_inline_name(self, ln, line, source): 1358 1349 """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" 1359 1350 1360 1351 if doc_inline_sect.search(line): ··· 1372 1363 # Don't let it add partial comments at the code, as breaks the 1373 1364 # logic meant to remove comments from prototypes. 1374 1365 # 1375 - self.process_proto_type(ln, "/**\n" + line) 1366 + self.process_proto_type(ln, "/**\n" + line, source) 1376 1367 # else ... ?? 1377 1368 1378 - def process_inline_text(self, ln, line): 1369 + def process_inline_text(self, ln, line, source): 1379 1370 """STATE_INLINE_TEXT: docbook comments within a prototype.""" 1380 1371 1381 1372 if doc_inline_end.search(line): ··· 1461 1452 1462 1453 return proto 1463 1454 1464 - def process_proto_function(self, ln, line): 1455 + def process_proto_function(self, ln, line, source): 1465 1456 """Ancillary routine to process a function prototype.""" 1466 1457 1467 1458 # strip C99-style comments to end of line ··· 1503 1494 # 1504 1495 # ... and we're done 1505 1496 # 1506 - self.dump_function(ln, self.entry.prototype) 1497 + self.dump_function(ln, self.entry.prototype, source) 1507 1498 self.reset_state(ln) 1508 1499 1509 - def process_proto_type(self, ln, line): 1500 + def process_proto_type(self, ln, line, source): 1510 1501 """ 1511 1502 Ancillary routine to process a type. 1512 1503 """ ··· 1536 1527 elif chunk == '}': 1537 1528 self.entry.brcount -= 1 1538 1529 elif chunk == ';' and self.entry.brcount <= 0: 1539 - self.dump_declaration(ln, self.entry.prototype) 1530 + self.dump_declaration(ln, self.entry.prototype, source) 1540 1531 self.reset_state(ln) 1541 1532 return 1542 1533 # ··· 1545 1536 # 1546 1537 self.entry.prototype += ' ' 1547 1538 1548 - def process_proto(self, ln, line): 1539 + def process_proto(self, ln, line, source): 1549 1540 """STATE_PROTO: reading a function/whatever prototype.""" 1550 1541 1551 1542 if doc_inline_oneline.search(line): ··· 1557 1548 self.state = state.INLINE_NAME 1558 1549 1559 1550 elif self.entry.decl_type == 'function': 1560 - self.process_proto_function(ln, line) 1551 + self.process_proto_function(ln, line, source) 1561 1552 1562 1553 else: 1563 - self.process_proto_type(ln, line) 1554 + self.process_proto_type(ln, line, source) 1564 1555 1565 - def process_docblock(self, ln, line): 1556 + def process_docblock(self, ln, line, source): 1566 1557 """STATE_DOCBLOCK: within a ``DOC:`` block.""" 1567 1558 1568 1559 if doc_end.search(line): 1569 1560 self.dump_section() 1570 - self.output_declaration("doc", self.entry.identifier) 1561 + self.output_declaration("doc", self.entry.identifier, 1562 + source=source) 1571 1563 self.reset_state(ln) 1572 1564 1573 1565 elif doc_content.search(line): ··· 1606 1596 state.DOCBLOCK: process_docblock, 1607 1597 } 1608 1598 1609 - def get_source(self): 1610 - """ 1611 - Return the file content of the lines handled by kernel-doc at the 1612 - latest parse_kdoc() run. 1613 - 1614 - Returns none if KernelDoc() was not initialized with store_src, 1615 - """ 1616 - return self.source 1617 - 1618 1599 def parse_kdoc(self): 1619 1600 """ 1620 1601 Open and process each line of a C source file. ··· 1619 1618 prev = "" 1620 1619 prev_ln = None 1621 1620 export_table = set() 1622 - self.source = [] 1623 1621 self.state = state.NORMAL 1622 + source = "" 1624 1623 1625 1624 try: 1626 1625 with open(self.fname, "r", encoding="utf8", ··· 1647 1646 ln, state.name[self.state], 1648 1647 line) 1649 1648 1650 - prev_state = self.state 1649 + if self.store_src: 1650 + if source and self.state == state.NORMAL: 1651 + source = "" 1652 + elif self.state != state.NORMAL: 1653 + source += line + "\n" 1651 1654 1652 1655 # This is an optimization over the original script. 1653 1656 # There, when export_file was used for the same file, ··· 1660 1655 # 1661 1656 if (self.state != state.NORMAL) or \ 1662 1657 not self.process_export(export_table, line): 1658 + prev_state = self.state 1663 1659 # Hand this line to the appropriate state handler 1664 - self.state_actions[self.state](self, ln, line) 1665 - 1666 - if self.store_src and prev_state != self.state or self.state != state.NORMAL: 1667 - if self.state == state.NAME: 1668 - # A "/**" was detected. Add a new source element 1669 - self.source.append({"ln": ln, "data": line + "\n"}) 1670 - else: 1671 - # Append to the existing one 1672 - self.source[-1]["data"] += line + "\n" 1660 + self.state_actions[self.state](self, ln, line, source) 1661 + if prev_state == state.NORMAL and self.state != state.NORMAL: 1662 + source += line + "\n" 1673 1663 1674 1664 self.emit_unused_warnings() 1675 1665
+14 -14
tools/lib/python/kdoc/kdoc_yaml_file.py
··· 85 85 86 86 return d 87 87 88 - def output_symbols(self, fname, symbols, source): 88 + def output_symbols(self, fname, symbols): 89 89 """ 90 90 Store source, symbols and output strings at self.tests. 91 91 """ ··· 96 96 kdoc_item = [] 97 97 expected = [] 98 98 99 - if not symbols and not source: 100 - return 101 - 102 - if not source or len(symbols) != len(source): 103 - print(f"Warning: lengths are different. Ignoring {fname}") 104 - 105 - # Folding without line numbers is too hard. 106 - # The right thing to do here to proceed would be to delete 107 - # not-handled source blocks, as len(source) should be bigger 108 - # than len(symbols) 99 + # 100 + # Source code didn't produce any symbol 101 + # 102 + if not symbols: 109 103 return 110 104 111 105 base_name = "test_" + fname.replace(".", "_").replace("/", "_") ··· 109 115 for i in range(0, len(symbols)): 110 116 arg = symbols[i] 111 117 112 - if "KdocItem" in self.yaml_content: 118 + source = arg.get("source", "") 119 + 120 + if arg and "KdocItem" in self.yaml_content: 113 121 msg = self.get_kdoc_item(arg) 122 + 123 + other_stuff = msg.get("other_stuff", {}) 124 + if "source" in other_stuff: 125 + del other_stuff["source"] 114 126 115 127 expected_dict["kdoc_item"] = msg 116 128 ··· 132 132 133 133 test = { 134 134 "name": name, 135 - "description": f"{fname} line {source[i]["ln"]}", 135 + "description": f"{fname} line {arg.declaration_start_line}", 136 136 "fname": fname, 137 - "source": source[i]["data"], 137 + "source": source, 138 138 "expected": [expected_dict] 139 139 } 140 140
+9
tools/unittests/test_kdoc_parser.py
··· 167 167 self.assertIsInstance(entry, KdocItem) 168 168 169 169 d = vars(entry) 170 + 171 + other_stuff = d.get("other_stuff", {}) 172 + if "source" in other_stuff: 173 + del other_stuff["source"] 174 + 170 175 for key, value in expected.items(): 176 + if key == "other_stuff": 177 + if "source" in value: 178 + del value["source"] 179 + 171 180 result = clean_whitespc(d[key], relax_whitespace) 172 181 value = clean_whitespc(value, relax_whitespace) 173 182