Skip to content

Commit f3a72f0

Browse files
committed
Updated scripts.
Updated translations.
1 parent a07a465 commit f3a72f0

File tree

6 files changed

+637
-832
lines changed

6 files changed

+637
-832
lines changed

bin/python/po_translate.py

Lines changed: 53 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,60 @@
11
#!/usr/env/bin python
22

3+
import argparse
34
from collections import Counter
45
import gc
56
import glob
67
import os
78
import polib
89
import re
910
import sys
10-
import torch
11-
from transformers import MarianMTModel, MarianTokenizer
12-
from translate import Translator
13-
14-
15-
if len(sys.argv) < 2:
16-
print(sys.argv[0], " <language>")
17-
exit(1)
18-
19-
lang = sys.argv[1]
2011

12+
#
13+
# Script version
14+
#
15+
VERSION = 1.0
2116

17+
#
18+
# Supported languages
19+
#
2220
LANGUAGES = [
2321
'de',
2422
'hi_IN',
2523
'zh-CN',
2624
]
2725

26+
#
27+
# Script description
28+
#
29+
description=f"""
30+
po_translate v{VERSION}
31+
32+
A program to translate a .po file with AI and with
33+
Google's translate engine.
34+
"""
35+
36+
parser = argparse.ArgumentParser(
37+
formatter_class=argparse.RawDescriptionHelpFormatter,
38+
description=description)
39+
40+
parser.add_argument('language', type=str,
41+
help='Language code to translate, like "en" or "zh-CN".')
42+
parser.add_argument('-g', '--google', action='store_true',
43+
help='Use google')
44+
45+
args = parser.parse_args()
46+
lang = args.language
47+
use_google = args.google
48+
2849
if not lang in LANGUAGES:
2950
print(f'Invalid language "{lang}"')
3051
print(f'Valid ones are:\n\t{", ".join(LANGUAGES)}')
3152
exit(1)
3253

3354

34-
55+
#
56+
# Text that we should not translate.
57+
#
3558
DONT_TRANSLATE = [
3659
'%d Hz.',
3760
'1:2',
@@ -89,12 +112,24 @@
89112
code = lang[0:2]
90113

91114

92-
LANGUAGES = {
115+
#
116+
# Google Languages from their two letter code
117+
#
118+
GOOGLE_LANGUAGES = {
119+
'en' : 'English',
120+
'es' : 'Spanish',
121+
'it' : 'Italian',
93122
'de' : 'German',
94123
'hi' : 'Hindi',
95124
'zh' : 'Chinese (Simplified)',
96125
}
97126

127+
#
128+
# Load the heavy imports
129+
#
130+
import torch
131+
from transformers import MarianMTModel, MarianTokenizer
132+
from translate import Translator
98133

99134
# Load the model and tokenizer for English to Simplified Chinese
100135
model_name = f"Helsinki-NLP/opus-mt-en-{code}"
@@ -104,15 +139,15 @@
104139
class POTranslator:
105140

106141

107-
def __init__(self, po_file):
108-
self.reached_google_limit = False
142+
def __init__(self, po_file, use_google):
143+
self.reached_google_limit = not use_google
109144
self.have_seen = {}
110145
self.tokenizer = MarianTokenizer.from_pretrained(model_name,
111146
clean_up_tokenization_spaces=True)
112147
self.model = MarianMTModel.from_pretrained(model_name)
113148

114149
# Initialize Google translator
115-
self.translator = Translator(to_lang=LANGUAGES[code])
150+
self.translator = Translator(to_lang=GOOGLE_LANGUAGES[code])
116151

117152
# Initialitize po translation
118153
self.translate_po(po_file)
@@ -156,7 +191,6 @@ def is_translation_invalid(self, text, threshold=3):
156191
return (False, text)
157192

158193
def translate_text_with_google(self, english):
159-
return english
160194
if len(english) < 4:
161195
return english
162196

@@ -253,7 +287,7 @@ def translate_po(self, f):
253287
for entry in po:
254288
if entry.msgid in DONT_TRANSLATE:
255289
entry.msgstr = entry.msgid
256-
elif entry.msgid == entry.msgstr:
290+
elif 'GOOGLE' == entry.msgstr:
257291
translated = self.translate_text_with_google(entry.msgid)
258292
entry.msgstr = translated
259293
elif entry.msgid and not entry.msgstr:
@@ -284,7 +318,7 @@ def __del__(self):
284318
del self.tokenizer
285319

286320
main_po = f'mrv2/po/{lang}'
287-
POTranslator(main_po)
321+
POTranslator(main_po, use_google)
288322

289323

290324
cwd = os.getcwd()
@@ -295,7 +329,7 @@ def __del__(self):
295329
plugin = plugin[:-3]
296330
plugin_po = f'mrv2/po/python/plug-ins/locale/{lang}/LC_MESSAGES/{plugin}'
297331
print('Translating plugin',plugin)
298-
POTranslator(plugin_po)
332+
POTranslator(plugin_po, use_google)
299333

300334

301335
# Clear cached data in PyTorch

0 commit comments

Comments
 (0)