Skip to content

Commit 75f50db

Browse files
authored
Add files via upload
1 parent 4540621 commit 75f50db

File tree

1 file changed

+141
-156
lines changed

1 file changed

+141
-156
lines changed

translate_ai.py

Lines changed: 141 additions & 156 deletions
Original file line numberDiff line numberDiff line change
@@ -1,156 +1,141 @@
1-
import os
2-
import json
3-
from googletrans import Translator
4-
from bs4 import BeautifulSoup
5-
import time
6-
7-
translator = Translator()
8-
9-
json_dir = './locales'
10-
11-
base_language_map = {
12-
"quz": "es.json",
13-
"ayc": "es.json",
14-
"gug": "es.json",
15-
"bn": "hi.json",
16-
"ne": "hi.json",
17-
"pa": "hi.json",
18-
"ht": "fr.json",
19-
"wo": "fr.json",
20-
"ln": "fr.json",
21-
"pt_br": "pt.json",
22-
"pt_ao": "pt.json",
23-
"so": "ar.json",
24-
"ha": "ar.json",
25-
"ps": "ar.json",
26-
"fa": "ar.json",
27-
"zh_cn": "zh-cn.json",
28-
"zh_tw": "zh-tw.json"
29-
}
30-
31-
32-
def get_base_file(target_lang):
33-
return os.path.join(json_dir, base_language_map.get(target_lang, 'en.json'))
34-
35-
print(f"Checking directory: {os.path.abspath(json_dir)}")
36-
if not os.path.exists(json_dir):
37-
print(f"Directory does not exist: {json_dir}")
38-
exit()
39-
40-
def extract_text(html):
41-
soup = BeautifulSoup(html, "html.parser")
42-
return soup.get_text() if soup.get_text().strip() else html
43-
44-
def rebuild_html(original_html, translated_text):
45-
soup = BeautifulSoup(original_html, "html.parser")
46-
if soup.string:
47-
soup.string.replace_with(translated_text)
48-
return str(soup)
49-
50-
def translate_with_backoff(text, target_lang, max_retries=5):
51-
delay = 1
52-
for attempt in range(max_retries):
53-
try:
54-
translation = translator.translate(text, src='en', dest=target_lang)
55-
if translation and hasattr(translation, 'text') and translation.text.strip():
56-
return translation.text
57-
print(f"Attempt {attempt + 1}: Translation failed for text: {text}")
58-
except Exception as e:
59-
print(f"Attempt {attempt + 1}: Error for text '{text}': {e}")
60-
time.sleep(delay)
61-
delay *= 2
62-
return text
63-
64-
def create_new_language_file(target_lang):
65-
base_file = get_base_file(target_lang)
66-
print(f"Creating new file for language: {target_lang} using base file: {base_file}")
67-
try:
68-
with open(base_file, 'r', encoding='utf-8') as f:
69-
data = json.load(f)
70-
except Exception as e:
71-
print(f"Error reading base file: {e}")
72-
return
73-
74-
translated_data = {}
75-
total_keys = len(data.keys())
76-
failed_translations = []
77-
78-
for idx, key in enumerate(data.keys(), start=1):
79-
text = extract_text(key)
80-
translation = translate_with_backoff(text, target_lang)
81-
if translation == text:
82-
failed_translations.append(key)
83-
translated_data[key] = rebuild_html(key, translation)
84-
print(f"Progress: {idx}/{total_keys} ({(idx / total_keys) * 100:.2f}%)")
85-
86-
new_file_path = os.path.join(json_dir, f'{target_lang}.json')
87-
88-
with open(new_file_path, 'w', encoding='utf-8') as f:
89-
json.dump(translated_data, f, indent=4, ensure_ascii=False)
90-
91-
print(f"Created new language file: {new_file_path}")
92-
93-
if failed_translations:
94-
print("\nFailed to translate the following keys:")
95-
for key in failed_translations:
96-
print(f"- {key}")
97-
else:
98-
print("\nAll keys were translated successfully!")
99-
100-
def translate_missing(file_path, target_lang):
101-
print(f"Translating missing keys for {file_path}")
102-
try:
103-
with open(file_path, 'r', encoding='utf-8') as f:
104-
data = json.load(f)
105-
except Exception as e:
106-
print(f"Error reading file: {e}")
107-
return
108-
109-
missing_keys = {
110-
key: value for key, value in data.items()
111-
if (not value or key == value or value == extract_text(key)) and key.strip() != ""
112-
}
113-
114-
if not missing_keys:
115-
print(f"No missing translations in {file_path}")
116-
return
117-
118-
total_keys = len(missing_keys)
119-
failed_translations = []
120-
121-
for idx, key in enumerate(missing_keys.keys(), start=1):
122-
text = extract_text(key)
123-
translation = translate_with_backoff(text, target_lang)
124-
if translation == text:
125-
failed_translations.append(key)
126-
data[key] = rebuild_html(key, translation)
127-
print(f"Progress: {idx}/{total_keys} ({(idx / total_keys) * 100:.2f}%)")
128-
129-
with open(file_path, 'w', encoding='utf-8') as f:
130-
json.dump(data, f, indent=4, ensure_ascii=False)
131-
132-
print(f"Updated missing translations in {file_path}")
133-
134-
if failed_translations:
135-
print("\nFailed to translate the following keys:")
136-
for key in failed_translations:
137-
print(f"- {key}")
138-
else:
139-
print("\nAll missing keys were translated successfully!")
140-
141-
existing_files = [f for f in os.listdir(json_dir) if f.endswith('.json')]
142-
print(f"Existing JSON files: {existing_files}")
143-
144-
if not existing_files:
145-
print("No JSON files found in the directory.")
146-
147-
language_codes = [f.split('.')[0] for f in existing_files]
148-
149-
print("Prompting for target language...")
150-
target_language = input("Enter the target language code (e.g., 'it', 'ko'): ").replace("_", "-")
151-
152-
if f"{target_language}.json" not in existing_files:
153-
create_new_language_file(target_language)
154-
else:
155-
file_path = os.path.join(json_dir, f"{target_language}.json")
156-
translate_missing(file_path, target_language)
1+
import os
2+
import json
3+
from googletrans import Translator
4+
from bs4 import BeautifulSoup
5+
import time
6+
7+
translator = Translator()
8+
9+
json_dir = './locales_new'
10+
11+
base_language_map = {
12+
"quz": "es.json",
13+
"ayc": "es.json",
14+
"gug": "es.json",
15+
"bn": "hi.json",
16+
"ne": "hi.json",
17+
"pa": "hi.json",
18+
"ht": "fr.json",
19+
"wo": "fr.json",
20+
"ln": "fr.json",
21+
"pt_br": "pt.json",
22+
"pt_ao": "pt.json",
23+
"so": "ar.json",
24+
"ha": "ar.json",
25+
"ps": "ar.json",
26+
"fa": "ar.json",
27+
"zh_cn": "zh-cn.json",
28+
"zh_tw": "zh-tw.json"
29+
}
30+
31+
source_language_map = {
32+
"quz": "es",
33+
"ayc": "es",
34+
"gug": "es",
35+
"bn": "hi",
36+
"ne": "hi",
37+
"pa": "hi",
38+
"ht": "fr",
39+
"wo": "fr",
40+
"ln": "fr",
41+
"pt_br": "pt",
42+
"pt_ao": "pt",
43+
"so": "ar",
44+
"ha": "ar",
45+
"ps": "ar",
46+
"fa": "ar",
47+
"zh_cn": "zh-cn",
48+
"zh_tw": "zh-tw"
49+
}
50+
51+
52+
def get_base_file(target_lang):
53+
return os.path.join(json_dir, base_language_map.get(target_lang, 'en.json'))
54+
55+
print(f"Checking directory: {os.path.abspath(json_dir)}")
56+
if not os.path.exists(json_dir):
57+
print(f"Directory does not exist: {json_dir}")
58+
exit()
59+
60+
61+
def extract_text(html):
62+
soup = BeautifulSoup(html, "html.parser")
63+
return soup.get_text() if soup.get_text().strip() else html
64+
65+
66+
def rebuild_html(original_html, translated_text):
67+
soup = BeautifulSoup(original_html, "html.parser")
68+
if soup.string:
69+
soup.string.replace_with(translated_text)
70+
return str(soup)
71+
72+
73+
def translate_with_backoff(text, source_lang, target_lang, max_retries=5):
74+
delay = 1
75+
for attempt in range(max_retries):
76+
try:
77+
translation = translator.translate(text, src=source_lang, dest=target_lang)
78+
if translation and hasattr(translation, 'text') and translation.text.strip():
79+
return translation.text
80+
print(f"Attempt {attempt + 1}: Translation failed for text: {text}")
81+
except Exception as e:
82+
print(f"Attempt {attempt + 1}: Error for text '{text}': {e}")
83+
time.sleep(delay)
84+
delay *= 2
85+
print(f"Final attempt failed for: {text}")
86+
return text
87+
88+
89+
def create_new_language_file(target_lang):
90+
base_file = get_base_file(target_lang)
91+
source_lang = source_language_map.get(target_lang, 'en')
92+
print(f"Creating new file for language: {target_lang} using base file: {base_file}")
93+
try:
94+
with open(base_file, 'r', encoding='utf-8') as f:
95+
data = json.load(f)
96+
except Exception as e:
97+
print(f"Error reading base file: {e}")
98+
return
99+
100+
translated_data = {}
101+
total_keys = len(data.keys())
102+
failed_translations = []
103+
104+
for idx, (key, value) in enumerate(data.items(), start=1):
105+
text_to_translate = value if value and value != key else key
106+
translation = translate_with_backoff(text_to_translate, source_lang, target_lang)
107+
print(f"Original: {text_to_translate} -> Translated: {translation}") # Debug print
108+
if translation == text_to_translate:
109+
failed_translations.append(key)
110+
translated_data[key] = rebuild_html(text_to_translate, translation)
111+
progress = (idx / total_keys) * 100
112+
print(f"Progress: {progress:.2f}%")
113+
114+
new_file_path = os.path.join(json_dir, f'{target_lang}.json')
115+
116+
with open(new_file_path, 'w', encoding='utf-8') as f:
117+
json.dump(translated_data, f, indent=4, ensure_ascii=False)
118+
119+
print(f"Created new language file: {new_file_path}")
120+
121+
if failed_translations:
122+
print("\nFailed to translate the following keys:")
123+
for key in failed_translations:
124+
print(f"- {key}")
125+
else:
126+
print("\nAll keys were translated successfully!")
127+
128+
129+
existing_files = [f for f in os.listdir(json_dir) if f.endswith('.json')]
130+
print(f"Existing JSON files: {existing_files}")
131+
132+
if not existing_files:
133+
print("No JSON files found in the directory.")
134+
135+
language_codes = [f.split('.')[0] for f in existing_files]
136+
137+
print("Prompting for target language...")
138+
target_language = input("Enter the target language code (e.g., 'it', 'ko'): ").replace("_", "-")
139+
140+
if f"{target_language}.json" not in existing_files:
141+
create_new_language_file(target_language)

0 commit comments

Comments
 (0)