Skip to content

Commit 77e8f4c

Browse files
committed
connected text translation function to API endpoint
1 parent 1f1a4a9 commit 77e8f4c

File tree

3 files changed

+117
-1
lines changed

3 files changed

+117
-1
lines changed

fastapi/app/ai/translation.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
from transformers import MarianMTModel, MarianTokenizer
2+
import torch
3+
4+
def translate_text(input_text: str, target_lang: str):
5+
"""
6+
Translate text to the specified target language.
7+
8+
Supported language pairs (source -> target):
9+
- English -> Spanish, French, German, Italian, Portuguese, Dutch, Polish, Russian, Chinese, Japanese, Korean, Arabic, Hindi, Turkish
10+
- Spanish -> English, French, German, Italian, Portuguese
11+
- French -> English, Spanish, German, Italian, Portuguese
12+
- German -> English, Spanish, French, Italian, Portuguese
13+
- Italian -> English, Spanish, French, German, Portuguese
14+
- Portuguese -> English, Spanish, French, German, Italian
15+
- Dutch -> English
16+
- Polish -> English
17+
- Russian -> English
18+
- Chinese -> English
19+
- Japanese -> English
20+
- Korean -> English
21+
- Arabic -> English
22+
- Hindi -> English
23+
- Turkish -> English
24+
25+
For Romance languages (Spanish, French, Italian, Portuguese, Romanian, Catalan, etc.) to English,
26+
you can also use the generic 'ROMANCE-en' model.
27+
28+
Usage: Enter text and target language separated by comma (e.g., "Hello world,es" or "Bonjour,en")
29+
"""
30+
31+
# Clean and parse inputs
32+
target_lang = target_lang.strip().lower()
33+
34+
# Dictionary mapping target languages to model names
35+
# Format: {target_lang_code: (source_prefix, model_name)}
36+
translation_models = {
37+
# English to other languages
38+
'es': ('en', 'Helsinki-NLP/opus-mt-en-es'),
39+
'fr': ('en', 'Helsinki-NLP/opus-mt-en-fr'),
40+
'de': ('en', 'Helsinki-NLP/opus-mt-en-de'),
41+
'it': ('en', 'Helsinki-NLP/opus-mt-en-it'),
42+
'pt': ('en', 'Helsinki-NLP/opus-mt-en-pt'),
43+
'nl': ('en', 'Helsinki-NLP/opus-mt-en-nl'),
44+
'pl': ('en', 'Helsinki-NLP/opus-mt-en-pl'),
45+
'ru': ('en', 'Helsinki-NLP/opus-mt-en-ru'),
46+
'zh': ('en', 'Helsinki-NLP/opus-mt-en-zh'),
47+
'ja': ('en', 'Helsinki-NLP/opus-mt-en-jap'),
48+
'ko': ('en', 'Helsinki-NLP/opus-mt-en-ko'),
49+
'ar': ('en', 'Helsinki-NLP/opus-mt-en-ar'),
50+
'hi': ('en', 'Helsinki-NLP/opus-mt-en-hi'),
51+
'tr': ('en', 'Helsinki-NLP/opus-mt-en-tr'),
52+
53+
# Other languages to English
54+
'en': ('multi', 'Helsinki-NLP/opus-mt-ROMANCE-en'), # Generic Romance to English
55+
}
56+
57+
# Handle specific language pairs for better accuracy
58+
if target_lang == 'en':
59+
# Detect source language and use specific model if available
60+
# For simplicity, we'll use the generic ROMANCE model for Romance languages
61+
# In a real application, you might want to detect the source language first
62+
model_name = 'Helsinki-NLP/opus-mt-ROMANCE-en'
63+
source_prefix = '>>en<<'
64+
elif target_lang in translation_models:
65+
source_prefix, model_name = translation_models[target_lang]
66+
# Add target language prefix for some models
67+
if source_prefix == 'en':
68+
source_prefix = ''
69+
else:
70+
# Fallback to generic approach or raise error
71+
available_langs = ', '.join(translation_models.keys())
72+
raise ValueError(f"Target language '{target_lang}' not supported. Available: {available_langs}")
73+
74+
try:
75+
# Load tokenizer and model
76+
tokenizer = MarianTokenizer.from_pretrained(model_name)
77+
model = MarianMTModel.from_pretrained(model_name)
78+
79+
# Prepare input text
80+
if target_lang == 'en':
81+
# For Romance to English, the model expects the text as-is
82+
input_text_formatted = input_text
83+
else:
84+
# For English to other languages, format appropriately
85+
input_text_formatted = input_text
86+
87+
# Tokenize and translate
88+
inputs = tokenizer(input_text_formatted, return_tensors="pt", padding=True)
89+
with torch.no_grad():
90+
translated = model.generate(**inputs)
91+
92+
# Decode the result
93+
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
94+
return translated_text
95+
96+
except Exception as e:
97+
return f"Translation error: {str(e)}"
98+
99+
# Google Colab interactive input
100+
# try:
101+
# user_input = input("Enter text and target language (separated by comma): ")
102+
# if ',' not in user_input:
103+
# print("Please use format: 'your text,language_code' (e.g., 'Hello world,es')")
104+
# else:
105+
# text_part, lang_part = user_input.rsplit(',', 1) # Split on last comma to handle text with commas
106+
# result = translate_text(text_part.strip(), lang_part.strip())
107+
# print(f"\nTranslated text: {result}")
108+
#
109+
# except KeyboardInterrupt:
110+
# print("\nOperation cancelled by user.")
111+
# except Exception as e:
112+
# print(f"Error: {e}"

fastapi/app/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def translate_article(
7373
target_language: str = Query(...),
7474
text: str = Query(...)
7575
):
76-
pass
76+
return {"response": server.text_translate(text, target_language)}
7777

7878
@app.get("/comparison/semantic_comparison", response_model=ComparisonResponse)
7979
def compare_articles(

fastapi/app/models/server_model.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import json
77
from pathlib import Path
88
from ..ai.semantic_comparison import semantic_compare
9+
from ..ai.translation import translate_text
910
from huggingface_hub import model_info
1011

1112
def model_exists(model_name: str) -> bool:
@@ -277,3 +278,6 @@ def perform_semantic_comparison(
277278
sim_threshold,
278279
self.selected_comparison_model
279280
)
281+
282+
def text_translate(self, target_text: str, target_language: str):
283+
return translate_text(target_text, target_language)

0 commit comments

Comments
 (0)