1+ import pandas as pd
2+ import argparse
3+ from tqdm import tqdm
4+ from collections import defaultdict
5+ import warnings
6+ import json
7+
8+ def detect_columns (df , source_lang , target_langs ):
9+ """
10+ Dynamically identify source and target language columns based on user-defined mappings.
11+ Returns a tuple of (source_col, target_cols) or (None, None) if no valid columns are found.
12+ """
13+ source_col = None
14+ target_cols = {}
15+ target_counts = defaultdict (int ) # Track duplicate target columns
16+
17+ for col in df .columns :
18+ col_lower = col .lower () # Case-insensitive matching
19+ # Check if the column matches the source language
20+ if source_lang .lower () in col_lower :
21+ source_col = col
22+ # Check if the column matches any target language
23+ for lang in target_langs :
24+ if lang .lower () in col_lower :
25+ target_counts [lang ] += 1
26+ suffix = f"_{ target_counts [lang ]} " if target_counts [lang ] > 1 else ""
27+ target_cols [col ] = f"{ lang } { suffix } "
28+
29+ if not source_col or not target_cols :
30+ return None , None
31+
32+ return source_col , target_cols
33+
34+ def convert_to_multilingual_excel (input_path , output_path , source_lang , target_langs , metadata_fields ):
35+ """
36+ Convert an Excel file to a multilingual format for Phrase TMS.
37+ Processes all sheets dynamically and handles errors gracefully.
38+ """
39+ try :
40+ # Load the Excel file
41+ xls = pd .ExcelFile (input_path )
42+
43+ # Dictionary to store processed data from all sheets
44+ processed_sheets = {}
45+
46+ # Process each sheet in the file
47+ for sheet_name in tqdm (xls .sheet_names , desc = "Processing sheets" ):
48+ df = xls .parse (sheet_name = sheet_name )
49+
50+ # Detect source and target columns dynamically
51+ source_col , target_cols = detect_columns (df , source_lang , target_langs )
52+ if not source_col or not target_cols :
53+ warnings .warn (f"Skipping sheet '{ sheet_name } ': No valid source or target columns found." )
54+ continue
55+
56+ # Select relevant columns (metadata + source + target columns)
57+ existing_metadata = [col for col in metadata_fields if col in df .columns ]
58+ selected_columns = existing_metadata + [source_col ] + list (target_cols .keys ())
59+
60+ if not selected_columns :
61+ warnings .warn (f"Skipping sheet '{ sheet_name } ': No valid columns selected." )
62+ continue
63+
64+ df_multilingual = df [selected_columns ]
65+
66+ # Rename columns for Phrase TMS compatibility
67+ df_multilingual = df_multilingual .rename (columns = {source_col : source_lang , ** target_cols })
68+
69+ # Store processed sheet
70+ processed_sheets [sheet_name ] = df_multilingual
71+
72+ # Save the processed file with multiple sheets
73+ with pd .ExcelWriter (output_path ) as writer :
74+ for sheet , df in processed_sheets .items ():
75+ df .to_excel (writer , sheet_name = sheet , index = False )
76+
77+ print (f"Multilingual Excel file with multiple sheets saved to: { output_path } " )
78+
79+ except Exception as e :
80+ print (f"An error occurred: { e } " )
81+ raise
82+
83+ if __name__ == "__main__" :
84+ # Set up argument parser
85+ parser = argparse .ArgumentParser (description = "Convert Excel file to Phrase TMS multilingual format with multiple sheets dynamically." )
86+ parser .add_argument ("input" , help = "Path to the input Excel file" )
87+ parser .add_argument ("output" , help = "Path to the output Excel file" )
88+ parser .add_argument ("--source" , required = True , help = "Source language (e.g., 'de' for German)" )
89+ parser .add_argument ("--targets" , required = True , help = "Comma-separated list of target languages (e.g., 'en,pl,cs')" )
90+ parser .add_argument ("--metadata" , required = True , help = "Comma-separated list of metadata fields (e.g., 'Teaserart,Überschrift,Reitername')" )
91+ parser .add_argument ("--config" , help = "Path to a JSON config file (optional)" )
92+
93+ args = parser .parse_args ()
94+
95+ # Load configuration from JSON file if provided
96+ if args .config :
97+ with open (args .config , "r" ) as f :
98+ config = json .load (f )
99+ source_lang = config .get ("source_lang" , args .source )
100+ target_langs = config .get ("target_langs" , args .targets .split ("," ))
101+ metadata_fields = config .get ("metadata_fields" , args .metadata .split ("," ))
102+ else :
103+ source_lang = args .source
104+ target_langs = args .targets .split ("," )
105+ metadata_fields = args .metadata .split ("," )
106+
107+ # Warn about processing untrusted files
108+ warnings .warn ("Ensure the input file is from a trusted source to avoid security risks." )
109+
110+ # Convert the Excel file
111+ convert_to_multilingual_excel (args .input , args .output , source_lang , target_langs , metadata_fields )
0 commit comments