Skip to content

Commit 0f204ce

Browse files
committed
Merge pull request #1 from MethosGR/MethosGR-patch-1
Add files via upload
1 parent 7245115 commit 0f204ce

File tree

2 files changed

+132
-0
lines changed

2 files changed

+132
-0
lines changed

Multilingual Excel conversion.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
import pandas as pd
2+
import argparse
3+
from tqdm import tqdm
4+
from collections import defaultdict
5+
import warnings
6+
import json
7+
8+
def detect_columns(df, source_lang, target_langs):
9+
"""
10+
Dynamically identify source and target language columns based on user-defined mappings.
11+
Returns a tuple of (source_col, target_cols) or (None, None) if no valid columns are found.
12+
"""
13+
source_col = None
14+
target_cols = {}
15+
target_counts = defaultdict(int) # Track duplicate target columns
16+
17+
for col in df.columns:
18+
col_lower = col.lower() # Case-insensitive matching
19+
# Check if the column matches the source language
20+
if source_lang.lower() in col_lower:
21+
source_col = col
22+
# Check if the column matches any target language
23+
for lang in target_langs:
24+
if lang.lower() in col_lower:
25+
target_counts[lang] += 1
26+
suffix = f"_{target_counts[lang]}" if target_counts[lang] > 1 else ""
27+
target_cols[col] = f"{lang}{suffix}"
28+
29+
if not source_col or not target_cols:
30+
return None, None
31+
32+
return source_col, target_cols
33+
34+
def convert_to_multilingual_excel(input_path, output_path, source_lang, target_langs, metadata_fields):
35+
"""
36+
Convert an Excel file to a multilingual format for Phrase TMS.
37+
Processes all sheets dynamically and handles errors gracefully.
38+
"""
39+
try:
40+
# Load the Excel file
41+
xls = pd.ExcelFile(input_path)
42+
43+
# Dictionary to store processed data from all sheets
44+
processed_sheets = {}
45+
46+
# Process each sheet in the file
47+
for sheet_name in tqdm(xls.sheet_names, desc="Processing sheets"):
48+
df = xls.parse(sheet_name=sheet_name)
49+
50+
# Detect source and target columns dynamically
51+
source_col, target_cols = detect_columns(df, source_lang, target_langs)
52+
if not source_col or not target_cols:
53+
warnings.warn(f"Skipping sheet '{sheet_name}': No valid source or target columns found.")
54+
continue
55+
56+
# Select relevant columns (metadata + source + target columns)
57+
existing_metadata = [col for col in metadata_fields if col in df.columns]
58+
selected_columns = existing_metadata + [source_col] + list(target_cols.keys())
59+
60+
if not selected_columns:
61+
warnings.warn(f"Skipping sheet '{sheet_name}': No valid columns selected.")
62+
continue
63+
64+
df_multilingual = df[selected_columns]
65+
66+
# Rename columns for Phrase TMS compatibility
67+
df_multilingual = df_multilingual.rename(columns={source_col: source_lang, **target_cols})
68+
69+
# Store processed sheet
70+
processed_sheets[sheet_name] = df_multilingual
71+
72+
# Save the processed file with multiple sheets
73+
with pd.ExcelWriter(output_path) as writer:
74+
for sheet, df in processed_sheets.items():
75+
df.to_excel(writer, sheet_name=sheet, index=False)
76+
77+
print(f"Multilingual Excel file with multiple sheets saved to: {output_path}")
78+
79+
except Exception as e:
80+
print(f"An error occurred: {e}")
81+
raise
82+
83+
if __name__ == "__main__":
84+
# Set up argument parser
85+
parser = argparse.ArgumentParser(description="Convert Excel file to Phrase TMS multilingual format with multiple sheets dynamically.")
86+
parser.add_argument("input", help="Path to the input Excel file")
87+
parser.add_argument("output", help="Path to the output Excel file")
88+
parser.add_argument("--source", required=True, help="Source language (e.g., 'de' for German)")
89+
parser.add_argument("--targets", required=True, help="Comma-separated list of target languages (e.g., 'en,pl,cs')")
90+
parser.add_argument("--metadata", required=True, help="Comma-separated list of metadata fields (e.g., 'Teaserart,Überschrift,Reitername')")
91+
parser.add_argument("--config", help="Path to a JSON config file (optional)")
92+
93+
args = parser.parse_args()
94+
95+
# Load configuration from JSON file if provided
96+
if args.config:
97+
with open(args.config, "r") as f:
98+
config = json.load(f)
99+
source_lang = config.get("source_lang", args.source)
100+
target_langs = config.get("target_langs", args.targets.split(","))
101+
metadata_fields = config.get("metadata_fields", args.metadata.split(","))
102+
else:
103+
source_lang = args.source
104+
target_langs = args.targets.split(",")
105+
metadata_fields = args.metadata.split(",")
106+
107+
# Warn about processing untrusted files
108+
warnings.warn("Ensure the input file is from a trusted source to avoid security risks.")
109+
110+
# Convert the Excel file
111+
convert_to_multilingual_excel(args.input, args.output, source_lang, target_langs, metadata_fields)

Script execution instructions.txt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
Example Usage
2+
Command-Line Arguments
3+
bash
4+
Copy
5+
python script.py input.xlsx output.xlsx --source de --targets en,pl,cs --metadata Teaserart,Überschrift,Reitername
6+
JSON Config File
7+
Create a config.json file:
8+
9+
json
10+
Copy
11+
{
12+
"source_lang": "de",
13+
"target_langs": ["en", "pl", "cs"],
14+
"metadata_fields": ["Teaserart", "Überschrift", "Reitername"]
15+
}
16+
Run the script with the config file:
17+
18+
bash
19+
Copy
20+
python script.py input.xlsx output.xlsx --config config.json
21+
Output

0 commit comments

Comments
 (0)