11#!/usr/env/bin python
22
3+ import argparse
34from collections import Counter
45import gc
56import glob
67import os
78import polib
89import re
910import sys
10- import torch
11- from transformers import MarianMTModel , MarianTokenizer
12- from translate import Translator
13-
14-
15- if len (sys .argv ) < 2 :
16- print (sys .argv [0 ], " <language>" )
17- exit (1 )
18-
19- lang = sys .argv [1 ]
2011
12+ #
13+ # Script version
14+ #
15+ VERSION = 1.0
2116
17+ #
18+ # Supported languages
19+ #
2220LANGUAGES = [
2321 'de' ,
2422 'hi_IN' ,
2523 'zh-CN' ,
2624]
2725
26+ #
27+ # Script description
28+ #
29+ description = f"""
30+ po_translate v{ VERSION }
31+
32+ A program to translate a .po file with AI and with
33+ Google's translate engine.
34+ """
35+
36+ parser = argparse .ArgumentParser (
37+ formatter_class = argparse .RawDescriptionHelpFormatter ,
38+ description = description )
39+
40+ parser .add_argument ('language' , type = str ,
41+ help = 'Language code to translate, like "en" or "zh-CN".' )
42+ parser .add_argument ('-g' , '--google' , action = 'store_true' ,
43+ help = 'Use google' )
44+
45+ args = parser .parse_args ()
46+ lang = args .language
47+ use_google = args .google
48+
2849if not lang in LANGUAGES :
2950 print (f'Invalid language "{ lang } "' )
3051 print (f'Valid ones are:\n \t { ", " .join (LANGUAGES )} ' )
3152 exit (1 )
3253
3354
34-
55+ #
56+ # Text that we should not translate.
57+ #
3558DONT_TRANSLATE = [
3659 '%d Hz.' ,
3760 '1:2' ,
89112code = lang [0 :2 ]
90113
91114
92- LANGUAGES = {
115+ #
116+ # Google Languages from their two letter code
117+ #
118+ GOOGLE_LANGUAGES = {
119+ 'en' : 'English' ,
120+ 'es' : 'Spanish' ,
121+ 'it' : 'Italian' ,
93122 'de' : 'German' ,
94123 'hi' : 'Hindi' ,
95124 'zh' : 'Chinese (Simplified)' ,
96125}
97126
127+ #
128+ # Load the heavy imports
129+ #
130+ import torch
131+ from transformers import MarianMTModel , MarianTokenizer
132+ from translate import Translator
98133
99134# Load the model and tokenizer for English to Simplified Chinese
100135model_name = f"Helsinki-NLP/opus-mt-en-{ code } "
104139class POTranslator :
105140
106141
107- def __init__ (self , po_file ):
108- self .reached_google_limit = False
142+ def __init__ (self , po_file , use_google ):
143+ self .reached_google_limit = not use_google
109144 self .have_seen = {}
110145 self .tokenizer = MarianTokenizer .from_pretrained (model_name ,
111146 clean_up_tokenization_spaces = True )
112147 self .model = MarianMTModel .from_pretrained (model_name )
113148
114149 # Initialize Google translator
115- self .translator = Translator (to_lang = LANGUAGES [code ])
150+ self .translator = Translator (to_lang = GOOGLE_LANGUAGES [code ])
116151
117152 # Initialitize po translation
118153 self .translate_po (po_file )
@@ -156,7 +191,6 @@ def is_translation_invalid(self, text, threshold=3):
156191 return (False , text )
157192
158193 def translate_text_with_google (self , english ):
159- return english
160194 if len (english ) < 4 :
161195 return english
162196
@@ -253,7 +287,7 @@ def translate_po(self, f):
253287 for entry in po :
254288 if entry .msgid in DONT_TRANSLATE :
255289 entry .msgstr = entry .msgid
256- elif entry . msgid == entry .msgstr :
290+ elif 'GOOGLE' == entry .msgstr :
257291 translated = self .translate_text_with_google (entry .msgid )
258292 entry .msgstr = translated
259293 elif entry .msgid and not entry .msgstr :
@@ -284,7 +318,7 @@ def __del__(self):
284318 del self .tokenizer
285319
286320main_po = f'mrv2/po/{ lang } '
287- POTranslator (main_po )
321+ POTranslator (main_po , use_google )
288322
289323
290324cwd = os .getcwd ()
@@ -295,7 +329,7 @@ def __del__(self):
295329 plugin = plugin [:- 3 ]
296330 plugin_po = f'mrv2/po/python/plug-ins/locale/{ lang } /LC_MESSAGES/{ plugin } '
297331 print ('Translating plugin' ,plugin )
298- POTranslator (plugin_po )
332+ POTranslator (plugin_po , use_google )
299333
300334
301335# Clear cached data in PyTorch
0 commit comments