1+ """
2+ Phrase TMS Bulk Import Script with Progress Tracking
3+ - Features memory-safe streaming CSV processing
4+ - Real-time progress statistics
5+ - Enterprise-grade error handling
6+ """
7+
8+ import os
9+ import csv
10+ import logging
11+ from time import sleep
12+ from dotenv import load_dotenv
13+ from typing import Dict , Any , Generator
14+ import requests
15+ from requests .adapters import HTTPAdapter
16+ from urllib3 .util .retry import Retry
17+ from tqdm import tqdm
18+
19+ # Configuration
20+ load_dotenv ()
21+ BASE_URL = "https://cloud.memsource.com/web/api/v1" # Verified correct version
22+ MAX_RETRIES = 3
23+ BACKOFF_FACTOR = 1
24+ TIMEOUT = 30
25+ CSV_FIELDS = {
26+ 'domain' : ['name' , 'timezone' ],
27+ 'subdomain' : ['name' , 'parent_domain_id' ],
28+ 'client' : ['name' ],
29+ 'business_unit' : ['name' , 'client_id' ]
30+ }
31+
32+ # Configure logging
33+ logging .basicConfig (
34+ level = logging .INFO ,
35+ format = '%(asctime)s - %(levelname)s - %(message)s' ,
36+ handlers = [
37+ logging .FileHandler ('bulk_import.log' ),
38+ logging .StreamHandler ()
39+ ]
40+ )
41+
42+
43+ class PhraseTMSClient :
44+ """Enhanced API client with connection pooling and smart retries"""
45+
46+ def __init__ (self ):
47+ self .session = requests .Session ()
48+ retry = Retry (
49+ total = MAX_RETRIES ,
50+ backoff_factor = BACKOFF_FACTOR ,
51+ status_forcelist = [500 , 502 , 503 , 504 ],
52+ allowed_methods = ['POST' , 'PUT' , 'GET' , 'DELETE' ]
53+ )
54+ adapter = HTTPAdapter (max_retries = retry )
55+ self .session .mount ('https://' , adapter )
56+ self .token = self ._authenticate ()
57+
58+ def _authenticate (self ) -> str :
59+ """Secure credential handling with environment variables"""
60+ credentials = {
61+ 'userName' : os .getenv ('PHRASE_USER' ),
62+ 'password' : os .getenv ('PHRASE_PASSWORD' )
63+ }
64+ response = self .session .post (
65+ f"{ BASE_URL } /auth/login" ,
66+ json = credentials ,
67+ timeout = TIMEOUT
68+ )
69+ response .raise_for_status ()
70+ return response .json ()['token' ]
71+
72+ def create_entity (self , entity_type : str , data : Dict [str , Any ]) -> Dict [str , Any ]:
73+ """Generic entity creation with conflict detection"""
74+ endpoints = {
75+ 'domain' : '/domains' ,
76+ 'subdomain' : lambda d : f"/domains/{ d ['parent_domain_id' ]} /subDomains" ,
77+ 'client' : '/clients' ,
78+ 'business_unit' : '/businessUnits'
79+ }
80+
81+ url = BASE_URL + (
82+ endpoints [entity_type ](data ) if callable (endpoints [entity_type ])
83+ else endpoints [entity_type ]
84+ )
85+
86+ response = self .session .post (
87+ url ,
88+ json = data ,
89+ headers = {'Authorization' : f'ApiToken { self .token } ' },
90+ timeout = TIMEOUT
91+ )
92+
93+ if response .status_code == 409 :
94+ logging .debug (f"Entity conflict: { data .get ('name' )} " )
95+ return {'status' : 'conflict' }
96+
97+ response .raise_for_status ()
98+ return response .json ()
99+
100+
101+ def validate_row (entity_type : str , row : Dict [str , str ]) -> bool :
102+ """Structural validation of CSV rows"""
103+ required = CSV_FIELDS [entity_type ]
104+ missing = [field for field in required if not row .get (field )]
105+ if missing :
106+ logging .warning (f"Missing fields: { missing } in { row .get ('name' )} " )
107+ return False
108+ return True
109+
110+
111+ def count_csv_rows (file_path : str , delimiter : str ) -> int :
112+ """Memory-efficient row counting"""
113+ with open (file_path , 'r' , encoding = 'utf-8' ) as f :
114+ reader = csv .reader (f , delimiter = delimiter )
115+ next (reader , None ) # Skip header
116+ return sum (1 for _ in reader )
117+
118+
119+ def process_csv (file_path : str , delimiter : str ) -> Generator [Dict [str , str ], None , None ]:
120+ """Streaming CSV parser with normalization"""
121+ with open (file_path , 'r' , encoding = 'utf-8' , newline = '' ) as f :
122+ reader = csv .DictReader (f , delimiter = delimiter )
123+ for row in reader :
124+ yield {k .strip ().lower (): v .strip () for k , v in row .items ()}
125+
126+
127+ def bulk_import (file_path : str , delimiter : str , dry_run : bool = False ):
128+ """Main import workflow with progress tracking"""
129+ client = PhraseTMSClient ()
130+ stats = {'success' : 0 , 'errors' : 0 , 'skipped' : 0 }
131+
132+ total_rows = count_csv_rows (file_path , delimiter )
133+
134+ with tqdm (
135+ total = total_rows ,
136+ desc = "🚀 Importing" ,
137+ unit = "row" ,
138+ bar_format = "{l_bar}{bar:20}{r_bar}" ,
139+ dynamic_ncols = True
140+ ) as pbar :
141+ for row in process_csv (file_path , delimiter ):
142+ try :
143+ entity_type = row .get ('type' , '' ).lower ()
144+ if not entity_type or entity_type not in CSV_FIELDS :
145+ stats ['errors' ] += 1
146+ logging .error (f"Invalid type: { row .get ('type' )} " )
147+ continue
148+
149+ if not validate_row (entity_type , row ):
150+ stats ['errors' ] += 1
151+ continue
152+
153+ if dry_run :
154+ stats ['success' ] += 1
155+ continue
156+
157+ result = client .create_entity (entity_type , row )
158+ if result .get ('status' ) == 'conflict' :
159+ stats ['skipped' ] += 1
160+ elif result :
161+ stats ['success' ] += 1
162+ else :
163+ stats ['skipped' ] += 1
164+
165+ except Exception as e :
166+ stats ['errors' ] += 1
167+ logging .debug (f"Row error: { str (e )} " )
168+ sleep (0.5 ) # Error cooldown
169+
170+ finally :
171+ pbar .update (1 )
172+ pbar .set_postfix (
173+ success = stats ['success' ],
174+ errors = stats ['errors' ],
175+ skipped = stats ['skipped' ],
176+ refresh = False
177+ )
178+
179+ del row # Memory management
180+
181+ logging .info ("\n 🔥 Final Statistics:" )
182+ logging .info (f"✅ Success: { stats ['success' ]} " )
183+ logging .info (f"⚠️ Skipped: { stats ['skipped' ]} " )
184+ logging .info (f"❌ Errors: { stats ['errors' ]} " )
185+
186+
187+ if __name__ == "__main__" :
188+ import argparse
189+
190+ parser = argparse .ArgumentParser (description = 'Phrase TMS Bulk Import Tool' )
191+ parser .add_argument ('file' , help = 'CSV file path' )
192+ parser .add_argument ('--delimiter' , default = ',' , help = 'CSV delimiter' )
193+ parser .add_argument ('--dry-run' , action = 'store_true' , help = 'Simulate import' )
194+ args = parser .parse_args ()
195+
196+ try :
197+ bulk_import (
198+ args .file ,
199+ args .delimiter ,
200+ args .dry_run
201+ )
202+ except KeyboardInterrupt :
203+ logging .info ("\n 🛑 Operation cancelled by user" )
204+ except Exception as e :
205+ logging .error (f"💥 Catastrophic failure: { str (e )} " )
0 commit comments