Skip to content

Commit a30356b

Browse files
QGraintarasmadan
authored andcommitted
tools/check_translation_update.py: check if the translations are up to date
Check if the translations in docs/translations/LANG/FILES are update with docs/FILES with detailed outputs. Translations should be committed with "Update to commit HASH (TITLE)".
1 parent b396b4b commit a30356b

File tree

1 file changed

+223
-0
lines changed

1 file changed

+223
-0
lines changed

tools/check_translation_update.py

Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
#!/usr/bin/env python
2+
# Copyright 2025 syzkaller project authors. All rights reserved.
3+
# Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
4+
# Contributed by QGrain <zhiyuzhang999@gmail.com>
5+
6+
# Intro: Due to the continuous updates of the docs, we would like to know if the translations are up to
7+
# date with the source docs. This script checks the translation files in docs/translations/ by tracking
8+
# the commit hash of the source file, which requires the formatted line "Update to commit HASH (TITLE)"
9+
# to be present in the commit message of the translation file.
10+
11+
# Usage: python tools/check_translation_update.py
12+
13+
import os
14+
import re
15+
import sys
16+
import argparse
17+
import subprocess
18+
19+
def get_git_repo_root(path):
20+
"""Get root path of the repository"""
21+
try:
22+
# Use git rev-parse --show-toplevel to find the root path (disable shell to avoid potential shell injection)
23+
result = subprocess.run(
24+
['git', 'rev-parse', '--show-toplevel'],
25+
cwd=path,
26+
capture_output=True,
27+
text=True,
28+
check=True
29+
)
30+
return result.stdout.strip()
31+
except subprocess.CalledProcessError:
32+
print(f"Error: current work directory {path} is not in a Git repo.")
33+
return None
34+
except FileNotFoundError:
35+
print("Error: 'git' command not found.")
36+
return None
37+
except Exception as e:
38+
print(f"Error: {e}")
39+
return None
40+
41+
def get_commit_date(repo_root, commit_hash):
42+
"""Get the commit date (YYYY-MM-DD hh:mm:ss) for a given commit hash."""
43+
try:
44+
result = subprocess.run(
45+
['git', 'show', '-s', '--format=%ci', commit_hash],
46+
cwd=repo_root,
47+
capture_output=True,
48+
text=True,
49+
check=True
50+
)
51+
raw_commit_date = result.stdout.strip()
52+
return extract_compact_date(raw_commit_date)
53+
except Exception as e:
54+
print(f"Error in getting commit time of {commit_hash}: {e}")
55+
return None
56+
57+
def get_latest_commit_info(repo_root, file_path):
58+
"""Get the latest commit hash and message for a given file.
59+
Args:
60+
repo_root: Git repository root path
61+
file_path: Path to the file
62+
Returns:
63+
tuple: (commit_hash, commit_date, commit_message) or (None, None, None) if not found
64+
"""
65+
try:
66+
result = subprocess.run(
67+
['git', 'log', '-1', '--format=%H%n%ci%n%B', '--', file_path],
68+
cwd=repo_root,
69+
capture_output=True,
70+
text=True,
71+
check=True
72+
)
73+
74+
lines = result.stdout.splitlines()
75+
if len(lines) >= 3:
76+
commit_hash = lines[0]
77+
commit_date = extract_compact_date(lines[1])
78+
commit_message = '\n'.join(lines[2:])
79+
return commit_hash, commit_date, commit_message
80+
81+
return None, None, None
82+
except Exception as e:
83+
print(f"Fail to get latest commit info of {file_path}: {e}")
84+
return None, None, None
85+
86+
def extract_source_commit_info(repo_root, file_path):
87+
"""Extract the source commit hash and date that this translation is based on.
88+
Args:
89+
repo_root: Git repository root path
90+
file_path: Path to the translation file
91+
Returns:
92+
tuple: (source_commit_hash, source_commit_date) or (None, None) if not found
93+
"""
94+
try:
95+
_, _, translation_commit_message = get_latest_commit_info(repo_root, file_path)
96+
97+
update_marker = 'Update to commit'
98+
update_info = ''
99+
source_commit_hash, source_commit_date = None, None
100+
101+
for line in translation_commit_message.splitlines():
102+
if update_marker in line:
103+
update_info = line.strip()
104+
break
105+
106+
match = re.search(r"Update to commit ([0-9a-fA-F]{7,12}) \(\"(.+?)\"\)", update_info)
107+
if match:
108+
source_commit_hash = match.group(1)
109+
source_commit_date = get_commit_date(repo_root, source_commit_hash)
110+
111+
return source_commit_hash, source_commit_date
112+
except Exception as e:
113+
print(f"Fail to extract source commit info of {file_path}: {e}")
114+
return None, None
115+
116+
def extract_translation_language(file_path):
117+
"""Extract the language code from the translation file path."""
118+
match = re.search(r'docs/translations/([^/]+)/', file_path)
119+
if match:
120+
return match.group(1)
121+
return None
122+
123+
def check_translation_update(repo_root, translation_file_path):
124+
"""Check if the translation file is up to date with the source file.
125+
Args:
126+
repo_root: Git repository root path
127+
translation_file_path: Path to the translation file
128+
Returns:
129+
tuple: (is_translation, support_update_check, is_update)
130+
True if the translation supports update check and is up to date, False otherwise
131+
"""
132+
# 1. Checks if it is a valid translation file and needs to be checked
133+
language = extract_translation_language(translation_file_path)
134+
if not os.path.exists(translation_file_path) or language is None or f"docs/translations/{language}/README.md" in translation_file_path:
135+
return False, False, False
136+
137+
# 2. Extract commit info of the translated source file
138+
translated_source_commit_hash, translated_source_commit_date = extract_source_commit_info(repo_root, translation_file_path)
139+
if not translated_source_commit_hash:
140+
print(f"File {translation_file_path} does not have a formatted update commit message, skip it.")
141+
return True, False, False
142+
143+
# 3. Get the latest commit info of the source file
144+
# given the translation file syzkaller/docs/translations/LANGUAGE/PATH/ORIG.md
145+
# then the source file should be syzkaller/docs/PATH/ORIG.md
146+
relative_path = os.path.relpath(translation_file_path, repo_root)
147+
if "docs/translations/" not in relative_path:
148+
print(f"File '{translation_file_path}' is not a translation, skip it.")
149+
return False, False, False
150+
151+
source_file_path = relative_path.replace(f"docs/translations/{language}/", "docs/")
152+
source_file_abs_path = os.path.join(repo_root, source_file_path)
153+
if not os.path.exists(source_file_abs_path):
154+
print(f"Source file '{source_file_abs_path}' does not exist, skip it.")
155+
return True, True, False
156+
source_commit_hash, source_commit_date, _ = get_latest_commit_info(repo_root, source_file_abs_path)
157+
158+
# 4. Compare the commit hashes between the translated source and latest source
159+
if translated_source_commit_hash[:7] != source_commit_hash[:7]:
160+
print(f"{translation_file_path} is based on {translated_source_commit_hash[:7]} ({translated_source_commit_date}), " \
161+
f"while the latest source is {source_commit_hash[:7]} ({source_commit_date}).")
162+
return True, True, False
163+
164+
return True, True, True
165+
166+
def extract_compact_date(raw_date_str):
167+
"""Extract a compact date string from a raw date string.
168+
Arg:
169+
raw_date_str: Raw date string output by '%ci' format: 'YYYY-MM-DD hh:mm:ss ZONE'
170+
Return:
171+
compact_date_str: Compact date string in format 'YYYY-MM-DD hh:mm:ss'
172+
"""
173+
compact_date_str = raw_date_str
174+
try:
175+
parts = raw_date_str.split(' ')
176+
compact_date_str = f"{parts[0]} {parts[1]}"
177+
except Exception as e:
178+
print(f"Fail to extract compact date from {raw_date_str}: {e}")
179+
return compact_date_str
180+
181+
def main():
182+
parser = argparse.ArgumentParser(description="Check the update of translation files in syzkaller/docs/translations/.")
183+
parser.add_argument("-f", "--files", nargs="+", help="one or multiple paths of translation files (test only)")
184+
parser.add_argument("-r", "--repo-root", default=".", help="root directory of syzkaller (default: current directory)")
185+
args = parser.parse_args()
186+
187+
repo_root = get_git_repo_root(args.repo_root)
188+
if not repo_root:
189+
return
190+
191+
total_cnt, support_update_check_cnt, is_update_cnt = 0, 0, 0
192+
193+
if args.files:
194+
for file_path in args.files:
195+
abs_file_path = os.path.abspath(file_path)
196+
if not abs_file_path.startswith(repo_root):
197+
print(f"File '{file_path}' is not in {repo_root}', skip it.")
198+
continue
199+
200+
is_translation, support_update_check, is_update = check_translation_update(repo_root, abs_file_path)
201+
total_cnt += int(is_translation)
202+
support_update_check_cnt += int(support_update_check)
203+
is_update_cnt += int(is_update)
204+
print(f"Summary: {support_update_check_cnt}/{total_cnt} translation files have formatted commit message that support update check, " \
205+
f"{is_update_cnt}/{support_update_check_cnt} are update to date.")
206+
sys.exit(0)
207+
208+
translation_dir = os.path.join(repo_root, 'docs', 'translations')
209+
for root, _, files in os.walk(translation_dir):
210+
for file in files:
211+
translation_path = os.path.join(root, file)
212+
# print(f"[DEBUG] {translation_path}")
213+
is_translation, support_update_check, is_update = check_translation_update(repo_root, translation_path)
214+
total_cnt += int(is_translation)
215+
support_update_check_cnt += int(support_update_check)
216+
is_update_cnt += int(is_update)
217+
print(f"Summary: {support_update_check_cnt}/{total_cnt} translation files have formatted commit message that support update check, " \
218+
f"{is_update_cnt}/{support_update_check_cnt} are update to date.")
219+
sys.exit(0)
220+
# We will add other exit code once all the previous translation commit messages are unified with the new format.
221+
222+
if __name__ == "__main__":
223+
main()

0 commit comments

Comments
 (0)