-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathpost_translate.py
More file actions
157 lines (132 loc) · 5.8 KB
/
post_translate.py
File metadata and controls
157 lines (132 loc) · 5.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import openai
import os
import frontmatter
import sys
from dotenv import load_dotenv
# Import prompt templates
from prompts import DEFAULT_PROMPT, get_prompt_by_extension
def get_translation(llm_type, messages):
"""Call LLM and get translation results"""
model = "gpt-4o" if llm_type == "openai" else "deepseek-chat"
response = client.chat.completions.create(
model=model,
messages=messages
)
return response.choices[0].message.content
def translate_text(text, llm_type, content=DEFAULT_PROMPT):
"""Translate text using LLM"""
total_length = len(text)
translated_text = ""
print(f"Start translating the main text...")
# Translate in chunks to show progress
chunk_size = 1000
for i in range(0, total_length, chunk_size):
chunk = text[i:i + chunk_size]
translated_text += get_translation(llm_type,
[
{"role": "system", "content": content},
{"role": "user", "content": chunk}
])
progress = min((i + chunk_size) / total_length * 100, 100) # Ensure progress does not exceed 100%
print(f"Text translation progress: {progress:.2f}%") # Show progress
return translated_text
def translate_title(text, llm_type, content=DEFAULT_PROMPT):
"""Translate title using LLM"""
print(f"Start translating the title...")
return get_translation(llm_type, [
{"role": "system", "content": content},
{"role": "user", "content": text}
])
def translate_sumary(text, llm_type, content=DEFAULT_PROMPT):
"""Translate summary using LLM"""
print(f"Start translating the summary...")
return get_translation(llm_type, [
{"role": "system", "content": content},
{"role": "user", "content": text}
])
def process_hugo_post(file_path,
llm_type,
prompt=None
):
"""Read Hugo post, translate the main text, and generate English version"""
# If no prompt is provided, use the appropriate one based on file extension
if prompt is None:
prompt = get_prompt_by_extension(file_path)
with open(file_path, "r", encoding="utf-8") as f:
post = frontmatter.load(f)
# Create English version
new_metadata = post.metadata
new_metadata["title"] = translate_title(new_metadata["title"], llm_type, prompt)
if "summary" in new_metadata:
new_metadata["summary"] = translate_sumary(new_metadata["summary"], llm_type, prompt)
# Extract main text and translate
translated_content = translate_text(post.content, llm_type, prompt)
# Generate Hugo English version file
new_post = frontmatter.Post(translated_content, **new_metadata)
en_file_path = file_path.replace(".zh.md", ".en.md")
with open(en_file_path, "w", encoding="utf-8") as f:
f.write(frontmatter.dumps(new_post,sort_keys=False))
print(f"✅ Translation completed: {file_path} -> {en_file_path}")
if __name__ == "__main__":
# Load environment variables from .env file
load_dotenv(override=True)
# Check necessary environment variables
if not os.path.exists('.env'):
print("Error: .env file does not exist, please ensure it is in the current directory.")
sys.exit(1)
# Check POST_DIR environment variable
if not os.getenv("POST_DIR"):
print("Error: POST_DIR environment variable is not set, please create and modify the .env file.")
sys.exit(1)
post_path = os.getenv("POST_DIR")
# Check if the post file exists
if not os.path.exists(post_path):
print(f"Error: Post file does not exist at path: {post_path}")
sys.exit(1)
# Check if the post file has the correct extension
if not post_path.endswith(".zh.md"):
print(f"Error: Post file should have .zh.md extension: {post_path}")
sys.exit(1)
# Get LLM type from environment variable, default to OpenAI
llm_type = os.getenv("LLM_TYPE", "openai").lower()
# Validate LLM type
if llm_type not in ["openai", "deepseek"]:
print(f"Error: Invalid LLM_TYPE '{llm_type}'. Must be 'openai' or 'deepseek'.")
sys.exit(1)
# Initialize client based on LLM type
if not os.getenv("OPENAI_API_KEY"):
print("Error: OPENAI_API_KEY environment variable is not set, unable to initialize LLM client.")
sys.exit(1)
client = None
if llm_type == "openai":
client = openai.OpenAI(
api_key=os.getenv("OPENAI_API_KEY"),
)
print("Using OpenAI for translation...")
else: # deepseek
client = openai.OpenAI(
api_key=os.getenv("OPENAI_API_KEY"),
base_url=os.getenv("DEEPSEEK_API_BASE", "https://api.deepseek.com/v1"),
)
print("Using Deepseek for translation...")
# Get the appropriate default prompt based on file extension
file_extension = os.path.splitext(post_path)[1].lower()
default_prompt = get_prompt_by_extension(post_path)
# Display the default prompt with file type information
file_type = file_extension[1:].upper() if file_extension else "default"
print(f"\n默认提示词 ({file_type} template):")
print("-" * 80)
print(default_prompt)
print("-" * 80)
# Ask user if they want to use the default prompt
use_default = input("\n是否使用默认提示词? (y/n): ").lower().strip()
final_prompt = default_prompt
if use_default != 'y' and use_default != 'yes':
print("\n请输入您的自定义提示词 (Enter your custom prompt):")
custom_prompt = input("> ").strip()
if custom_prompt: # Only use custom prompt if not empty
final_prompt = custom_prompt
else:
print("输入为空,将使用默认提示词。")
# Call process_hugo_post with the selected prompt
process_hugo_post(post_path, llm_type, final_prompt)