Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#!/usr/bin/env python3
2# xxpylint: disable=R0903
3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
4# SPDX-License-Identifier: GPL-2.0
5
6"""
7Convert ABI what into regular expressions
8"""
9
10import re
11import sys
12
13from pprint import pformat
14
15from abi.abi_parser import AbiParser
16from abi.helpers import AbiDebug
17
18class AbiRegex(AbiParser):
19 """
20 Extends AbiParser to search ABI nodes with regular expressions.
21
22 There some optimizations here to allow a quick symbol search:
23 instead of trying to place all symbols altogether an doing linear
24 search which is very time consuming, create a tree with one depth,
25 grouping similar symbols altogether.
26
27 Yet, sometimes a full search will be needed, so we have a special branch
28 on such group tree where other symbols are placed.
29 """
30
31 #: Escape only ASCII visible characters.
32 escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])"
33
34 #: Special group for other nodes.
35 leave_others = "others"
36
37 # Tuples with regular expressions to be compiled and replacement data
38 re_whats = [
39 # Drop escape characters that might exist
40 (re.compile("\\\\"), ""),
41
42 # Temporarily escape dot characters
43 (re.compile(r"\."), "\xf6"),
44
45 # Temporarily change [0-9]+ type of patterns
46 (re.compile(r"\[0\-9\]\+"), "\xff"),
47
48 # Temporarily change [\d+-\d+] type of patterns
49 (re.compile(r"\[0\-\d+\]"), "\xff"),
50 (re.compile(r"\[0:\d+\]"), "\xff"),
51 (re.compile(r"\[(\d+)\]"), "\xf4\\\\d+\xf5"),
52
53 # Temporarily change [0-9] type of patterns
54 (re.compile(r"\[(\d)\-(\d)\]"), "\xf4\1-\2\xf5"),
55
56 # Handle multiple option patterns
57 (re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"),
58
59 # Handle wildcards
60 (re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"),
61 (re.compile(r"/\*/"), "/.*/"),
62 (re.compile(r"/\xf6\xf6\xf6"), "/.*"),
63 (re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"),
64 (re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"),
65 (re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"),
66
67 (re.compile(r"XX+"), "\\\\w\xf7"),
68 (re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"),
69 (re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"),
70 (re.compile(r"_[AB]_"), "_\\\\w\xf7_"),
71
72 # Recover [0-9] type of patterns
73 (re.compile(r"\xf4"), "["),
74 (re.compile(r"\xf5"), "]"),
75
76 # Remove duplicated spaces
77 (re.compile(r"\s+"), r" "),
78
79 # Special case: drop comparison as in:
80 # What: foo = <something>
81 # (this happens on a few IIO definitions)
82 (re.compile(r"\s*\=.*$"), ""),
83
84 # Escape all other symbols
85 (re.compile(escape_symbols), r"\\\1"),
86 (re.compile(r"\\\\"), r"\\"),
87 (re.compile(r"\\([\[\]\(\)\|])"), r"\1"),
88 (re.compile(r"(\d+)\\(-\d+)"), r"\1\2"),
89
90 (re.compile(r"\xff"), r"\\d+"),
91
92 # Special case: IIO ABI which a parenthesis.
93 (re.compile(r"sqrt(.*)"), r"sqrt(.*)"),
94
95 # Simplify regexes with multiple .*
96 (re.compile(r"(?:\.\*){2,}"), ""),
97
98 # Recover dot characters
99 (re.compile(r"\xf6"), "\\."),
100 # Recover plus characters
101 (re.compile(r"\xf7"), "+"),
102 ]
103
104 #: Regex to check if the symbol name has a number on it.
105 re_has_num = re.compile(r"\\d")
106
107 #: Symbol name after escape_chars that are considered a devnode basename.
108 re_symbol_name = re.compile(r"(\w|\\[\.\-\:])+$")
109
110 #: List of popular group names to be skipped to minimize regex group size
111 #: Use AbiDebug.SUBGROUP_SIZE to detect those.
112 skip_names = set(["devices", "hwmon"])
113
114 def regex_append(self, what, new):
115 """
116 Get a search group for a subset of regular expressions.
117
118 As ABI may have thousands of symbols, using a for to search all
119 regular expressions is at least O(n^2). When there are wildcards,
120 the complexity increases substantially, eventually becoming exponential.
121
122 To avoid spending too much time on them, use a logic to split
123 them into groups. The smaller the group, the better, as it would
124 mean that searches will be confined to a small number of regular
125 expressions.
126
127 The conversion to a regex subset is tricky, as we need something
128 that can be easily obtained from the sysfs symbol and from the
129 regular expression. So, we need to discard nodes that have
130 wildcards.
131
132 If it can't obtain a subgroup, place the regular expression inside
133 a special group (self.leave_others).
134 """
135
136 search_group = None
137
138 for search_group in reversed(new.split("/")):
139 if not search_group or search_group in self.skip_names:
140 continue
141 if self.re_symbol_name.match(search_group):
142 break
143
144 if not search_group:
145 search_group = self.leave_others
146
147 if self.debug & AbiDebug.SUBGROUP_MAP:
148 self.log.debug("%s: mapped as %s", what, search_group)
149
150 try:
151 if search_group not in self.regex_group:
152 self.regex_group[search_group] = []
153
154 self.regex_group[search_group].append(re.compile(new))
155 if self.search_string:
156 if what.find(self.search_string) >= 0:
157 print(f"What: {what}")
158 except re.PatternError:
159 self.log.warning("Ignoring '%s' as it produced an invalid regex:\n"
160 " '%s'", what, new)
161
162 def get_regexes(self, what):
163 """
164 Given an ABI devnode, return a list of all regular expressions that
165 may match it, based on the sub-groups created by regex_append().
166 """
167
168 re_list = []
169
170 patches = what.split("/")
171 patches.reverse()
172 patches.append(self.leave_others)
173
174 for search_group in patches:
175 if search_group in self.regex_group:
176 re_list += self.regex_group[search_group]
177
178 return re_list
179
180 def __init__(self, *args, **kwargs):
181 """
182 Override init method to get verbose argument
183 """
184
185 self.regex_group = None
186 self.search_string = None
187 self.re_string = None
188
189 if "search_string" in kwargs:
190 self.search_string = kwargs.get("search_string")
191 del kwargs["search_string"]
192
193 if self.search_string:
194
195 try:
196 self.re_string = re.compile(self.search_string)
197 except re.PatternError as e:
198 msg = f"{self.search_string} is not a valid regular expression"
199 raise ValueError(msg) from e
200
201 super().__init__(*args, **kwargs)
202
203 def parse_abi(self, *args, **kwargs):
204
205 super().parse_abi(*args, **kwargs)
206
207 self.regex_group = {}
208
209 print("Converting ABI What fields into regexes...", file=sys.stderr)
210
211 for t in sorted(self.data.items(), key=lambda x: x[0]):
212 v = t[1]
213 if v.get("type") == "File":
214 continue
215
216 v["regex"] = []
217
218 for what in v.get("what", []):
219 if not what.startswith("/sys"):
220 continue
221
222 new = what
223 for r, s in self.re_whats:
224 try:
225 new = r.sub(s, new)
226 except re.PatternError as e:
227 # Help debugging troubles with new regexes
228 raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e
229
230 v["regex"].append(new)
231
232 if self.debug & AbiDebug.REGEX:
233 self.log.debug("%-90s <== %s", new, what)
234
235 # Store regex into a subgroup to speedup searches
236 self.regex_append(what, new)
237
238 if self.debug & AbiDebug.SUBGROUP_DICT:
239 self.log.debug("%s", pformat(self.regex_group))
240
241 if self.debug & AbiDebug.SUBGROUP_SIZE:
242 biggestd_keys = sorted(self.regex_group.keys(),
243 key= lambda k: len(self.regex_group[k]),
244 reverse=True)
245
246 print("Top regex subgroups:", file=sys.stderr)
247 for k in biggestd_keys[:10]:
248 print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr)