-
-
Notifications
You must be signed in to change notification settings - Fork 129
Expand file tree
/
Copy pathtranslator.py
More file actions
127 lines (105 loc) · 3.75 KB
/
Copy pathtranslator.py
File metadata and controls
127 lines (105 loc) · 3.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import asyncio
import json
import os
from googletrans import Translator
from tqdm import tqdm
TRANSLATION_DIR = "custom_components/llmvision/translations/"
REFERENCE = "en.json" # reference file to translate from
MISSING_ONLY = True # if True, only translate keys that are missing in the target translation file
GENERATE_LANGUAGES = [
"bg",
"ca",
# "zh-cn", # rename from cn to zh-cn
"cs",
"da",
"de",
"el",
"fr",
"hu",
"it",
"ja",
"lt",
"lv",
"nl",
"pl",
"pt",
"tr",
"sk",
"sl",
"sv",
]
def load_reference_data():
reference_path = os.path.join(TRANSLATION_DIR, REFERENCE)
with open(reference_path, "r", encoding="utf-8") as f:
return json.load(f)
def write_translation_file(file_path, data):
with open(file_path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)
f.write("\n")
def count_strings(value):
if isinstance(value, str):
return 1
elif isinstance(value, dict):
return sum(count_strings(v) for v in value.values())
elif isinstance(value, list):
return sum(count_strings(item) for item in value)
return 0
def get_missing(reference, existing):
"""Return the subset of reference whose keys are absent from existing."""
if not isinstance(reference, dict):
return reference
missing = {}
for k, v in reference.items():
if k not in existing:
missing[k] = v
elif isinstance(v, dict) and isinstance(existing[k], dict):
sub = get_missing(v, existing[k])
if sub:
missing[k] = sub
return missing
def merge(existing, additions):
"""Merge additions into existing, recursing into shared dict keys."""
if isinstance(existing, dict) and isinstance(additions, dict):
result = dict(existing)
for k, v in additions.items():
result[k] = merge(result[k], v) if k in result and isinstance(result.get(k), dict) else v
return result
return additions
async def translate_file(reference_data, file_path, target_language, translator):
if MISSING_ONLY and os.path.exists(file_path):
with open(file_path, "r", encoding="utf-8") as f:
existing_data = json.load(f)
to_translate = get_missing(reference_data, existing_data)
else:
existing_data = {}
to_translate = reference_data
if not to_translate:
print(f"{os.path.basename(file_path)}: nothing to translate")
return
total = count_strings(to_translate)
with tqdm(total=total, desc=os.path.basename(file_path), unit="str") as bar:
translated_additions = await translate_value(to_translate, target_language, translator, bar)
write_translation_file(file_path, merge(existing_data, translated_additions))
async def translate_value(value, target_language, translator, bar):
if isinstance(value, str):
result = await translator.translate(value, dest=target_language)
bar.update(1)
return result.text
elif isinstance(value, dict):
translated = {}
for k, v in value.items():
translated[k] = await translate_value(v, target_language, translator, bar)
return translated
elif isinstance(value, list):
return [await translate_value(item, target_language, translator, bar) for item in value]
else:
return value
async def main():
async with Translator() as translator:
reference_data = load_reference_data()
os.makedirs(TRANSLATION_DIR, exist_ok=True)
for lang in GENERATE_LANGUAGES:
file_path = f"{TRANSLATION_DIR}{lang}.json"
await translate_file(reference_data, file_path, lang, translator)
if __name__ == "__main__":
asyncio.run(main())