Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3
4"""
5This script helps track the translation status of the documentation
6in different locales, e.g., zh_CN. More specially, it uses `git log`
7commit to find the latest english commit from the translation commit
8(order by author date) and the latest english commits from HEAD. If
9differences occur, report the file and commits that need to be updated.
10
11The usage is as follows:
12- tools/docs/checktransupdate.py -l zh_CN
13This will print all the files that need to be updated or translated in the zh_CN locale.
14- tools/docs/checktransupdate.py Documentation/translations/zh_CN/dev-tools/testing-overview.rst
15This will only print the status of the specified file.
16- tools/docs/checktransupdate.py Documentation/translations/zh_CN/dev-tools
17This will print the status of all files under the directory.
18
19The output is something like:
20Documentation/dev-tools/kfence.rst
21No translation in the locale of zh_CN
22
23Documentation/translations/zh_CN/dev-tools/testing-overview.rst
24commit 42fb9cfd5b18 ("Documentation: dev-tools: Add link to RV docs")
251 commits needs resolving in total
26"""
27
28import os
29import re
30import time
31import logging
32from argparse import ArgumentParser, ArgumentTypeError, BooleanOptionalAction
33from datetime import datetime
34
35
36def get_origin_path(file_path):
37 """Get the origin path from the translation path"""
38 paths = file_path.split("/")
39 tidx = paths.index("translations")
40 opaths = paths[:tidx]
41 opaths += paths[tidx + 2 :]
42 return "/".join(opaths)
43
44
45def get_latest_commit_from(file_path, commit):
46 """Get the latest commit from the specified commit for the specified file"""
47 command = f"git log --pretty=format:%H%n%aD%n%cD%n%n%B {commit} -1 -- {file_path}"
48 logging.debug(command)
49 pipe = os.popen(command)
50 result = pipe.read()
51 result = result.split("\n")
52 if len(result) <= 1:
53 return None
54
55 logging.debug("Result: %s", result[0])
56
57 return {
58 "hash": result[0],
59 "author_date": datetime.strptime(result[1], "%a, %d %b %Y %H:%M:%S %z"),
60 "commit_date": datetime.strptime(result[2], "%a, %d %b %Y %H:%M:%S %z"),
61 "message": result[4:],
62 }
63
64
65def get_origin_from_trans(origin_path, t_from_head):
66 """Get the latest origin commit from the translation commit"""
67 o_from_t = get_latest_commit_from(origin_path, t_from_head["hash"])
68 while o_from_t is not None and o_from_t["author_date"] > t_from_head["author_date"]:
69 o_from_t = get_latest_commit_from(origin_path, o_from_t["hash"] + "^")
70 if o_from_t is not None:
71 logging.debug("tracked origin commit id: %s", o_from_t["hash"])
72 return o_from_t
73
74
75def get_origin_from_trans_smartly(origin_path, t_from_head):
76 """Get the latest origin commit from the formatted translation commit:
77 (1) update to commit HASH (TITLE)
78 (2) Update the translation through commit HASH (TITLE)
79 """
80 # catch flag for 12-bit commit hash
81 hash_re = r'([0-9a-f]{12})'
82 # pattern 1: contains "update to commit HASH"
83 pat_update_to = re.compile(rf'update to commit {hash_re}')
84 # pattern 2: contains "Update the translation through commit HASH"
85 pat_update_translation = re.compile(rf'Update the translation through commit {hash_re}')
86
87 origin_commit_hash = None
88 for line in t_from_head["message"]:
89 # check if the line matches the first pattern
90 match = pat_update_to.search(line)
91 if match:
92 origin_commit_hash = match.group(1)
93 break
94 # check if the line matches the second pattern
95 match = pat_update_translation.search(line)
96 if match:
97 origin_commit_hash = match.group(1)
98 break
99 if origin_commit_hash is None:
100 return None
101 o_from_t = get_latest_commit_from(origin_path, origin_commit_hash)
102 if o_from_t is not None:
103 logging.debug("tracked origin commit id: %s", o_from_t["hash"])
104 return o_from_t
105
106
107def get_commits_count_between(opath, commit1, commit2):
108 """Get the commits count between two commits for the specified file"""
109 command = f"git log --pretty=format:%H {commit1}...{commit2} -- {opath}"
110 logging.debug(command)
111 pipe = os.popen(command)
112 result = pipe.read().split("\n")
113 # filter out empty lines
114 result = list(filter(lambda x: x != "", result))
115 return result
116
117
118def pretty_output(commit):
119 """Pretty print the commit message"""
120 command = f"git log --pretty='format:%h (\"%s\")' -1 {commit}"
121 logging.debug(command)
122 pipe = os.popen(command)
123 return pipe.read()
124
125
126def valid_commit(commit):
127 """Check if the commit is valid or not"""
128 msg = pretty_output(commit)
129 return "Merge tag" not in msg
130
131def check_per_file(file_path):
132 """Check the translation status for the specified file"""
133 opath = get_origin_path(file_path)
134
135 if not os.path.isfile(opath):
136 logging.error("Cannot find the origin path for %s", file_path)
137 return
138
139 o_from_head = get_latest_commit_from(opath, "HEAD")
140 t_from_head = get_latest_commit_from(file_path, "HEAD")
141
142 if o_from_head is None or t_from_head is None:
143 logging.error("Cannot find the latest commit for %s", file_path)
144 return
145
146 o_from_t = get_origin_from_trans_smartly(opath, t_from_head)
147 # notice, o_from_t from get_*_smartly() is always more accurate than from get_*()
148 if o_from_t is None:
149 o_from_t = get_origin_from_trans(opath, t_from_head)
150
151 if o_from_t is None:
152 logging.error("Error: Cannot find the latest origin commit for %s", file_path)
153 return
154
155 if o_from_head["hash"] == o_from_t["hash"]:
156 logging.debug("No update needed for %s", file_path)
157 else:
158 logging.info(file_path)
159 commits = get_commits_count_between(
160 opath, o_from_t["hash"], o_from_head["hash"]
161 )
162 count = 0
163 for commit in commits:
164 if valid_commit(commit):
165 logging.info("commit %s", pretty_output(commit))
166 count += 1
167 logging.info("%d commits needs resolving in total\n", count)
168
169
170def valid_locales(locale):
171 """Check if the locale is valid or not"""
172 script_path = os.path.dirname(os.path.abspath(__file__))
173 linux_path = os.path.join(script_path, "../..")
174 if not os.path.isdir(f"{linux_path}/Documentation/translations/{locale}"):
175 raise ArgumentTypeError("Invalid locale: {locale}")
176 return locale
177
178
179def list_files_with_excluding_folders(folder, exclude_folders, include_suffix):
180 """List all files with the specified suffix in the folder and its subfolders"""
181 files = []
182 stack = [folder]
183
184 while stack:
185 pwd = stack.pop()
186 # filter out the exclude folders
187 if os.path.basename(pwd) in exclude_folders:
188 continue
189 # list all files and folders
190 for item in os.listdir(pwd):
191 ab_item = os.path.join(pwd, item)
192 if os.path.isdir(ab_item):
193 stack.append(ab_item)
194 else:
195 if ab_item.endswith(include_suffix):
196 files.append(ab_item)
197
198 return files
199
200
201class DmesgFormatter(logging.Formatter):
202 """Custom dmesg logging formatter"""
203 def format(self, record):
204 timestamp = time.time()
205 formatted_time = f"[{timestamp:>10.6f}]"
206 log_message = f"{formatted_time} {record.getMessage()}"
207 return log_message
208
209
210def config_logging(log_level, log_file="checktransupdate.log"):
211 """configure logging based on the log level"""
212 # set up the root logger
213 logger = logging.getLogger()
214 logger.setLevel(log_level)
215
216 # Create console handler
217 console_handler = logging.StreamHandler()
218 console_handler.setLevel(log_level)
219
220 # Create file handler
221 file_handler = logging.FileHandler(log_file)
222 file_handler.setLevel(log_level)
223
224 # Create formatter and add it to the handlers
225 formatter = DmesgFormatter()
226 console_handler.setFormatter(formatter)
227 file_handler.setFormatter(formatter)
228
229 # Add the handler to the logger
230 logger.addHandler(console_handler)
231 logger.addHandler(file_handler)
232
233
234def main():
235 """Main function of the script"""
236 script_path = os.path.dirname(os.path.abspath(__file__))
237 linux_path = os.path.join(script_path, "../..")
238
239 parser = ArgumentParser(description="Check the translation update")
240 parser.add_argument(
241 "-l",
242 "--locale",
243 default="zh_CN",
244 type=valid_locales,
245 help="Locale to check when files are not specified",
246 )
247
248 parser.add_argument(
249 "--print-missing-translations",
250 action=BooleanOptionalAction,
251 default=True,
252 help="Print files that do not have translations",
253 )
254
255 parser.add_argument(
256 '--log',
257 default='INFO',
258 choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
259 help='Set the logging level')
260
261 parser.add_argument(
262 '--logfile',
263 default='checktransupdate.log',
264 help='Set the logging file (default: checktransupdate.log)')
265
266 parser.add_argument(
267 "files", nargs="*", help="Files or directories to check, if not specified, check all files"
268 )
269 args = parser.parse_args()
270
271 # Configure logging based on the --log argument
272 log_level = getattr(logging, args.log.upper(), logging.INFO)
273 config_logging(log_level)
274
275 # Get files related to linux path
276 files = args.files
277 if len(files) == 0:
278 offical_files = list_files_with_excluding_folders(
279 os.path.join(linux_path, "Documentation"), ["translations", "output"], "rst"
280 )
281
282 for file in offical_files:
283 # split the path into parts
284 path_parts = file.split(os.sep)
285 # find the index of the "Documentation" directory
286 kindex = path_parts.index("Documentation")
287 # insert the translations and locale after the Documentation directory
288 new_path_parts = path_parts[:kindex + 1] + ["translations", args.locale] \
289 + path_parts[kindex + 1 :]
290 # join the path parts back together
291 new_file = os.sep.join(new_path_parts)
292 if os.path.isfile(new_file):
293 files.append(new_file)
294 else:
295 if args.print_missing_translations:
296 logging.info(os.path.relpath(os.path.abspath(file), linux_path))
297 logging.info("No translation in the locale of %s\n", args.locale)
298 else:
299 # check if the files are directories or files
300 new_files = []
301 for file in files:
302 if os.path.isfile(file):
303 new_files.append(file)
304 elif os.path.isdir(file):
305 # for directories, list all files in the directory and its subfolders
306 new_files.extend(list_files_with_excluding_folders(file, [], "rst"))
307 files = new_files
308
309 files = list(map(lambda x: os.path.relpath(os.path.abspath(x), linux_path), files))
310
311 # cd to linux root directory
312 os.chdir(linux_path)
313
314 for file in files:
315 check_per_file(file)
316
317
318if __name__ == "__main__":
319 main()