1+ """ 
2+ Phrase TMS Bulk Import Script with Progress Tracking 
3+ - Features memory-safe streaming CSV processing 
4+ - Real-time progress statistics 
5+ - Enterprise-grade error handling 
6+ """ 
7+ 
8+ import  os 
9+ import  csv 
10+ import  logging 
11+ from  time  import  sleep 
12+ from  dotenv  import  load_dotenv 
13+ from  typing  import  Dict , Any , Generator 
14+ import  requests 
15+ from  requests .adapters  import  HTTPAdapter 
16+ from  urllib3 .util .retry  import  Retry 
17+ from  tqdm  import  tqdm 
18+ 
19+ # Configuration 
20+ load_dotenv ()
21+ BASE_URL  =  "https://cloud.memsource.com/web/api/v1"   # Verified correct version 
22+ MAX_RETRIES  =  3 
23+ BACKOFF_FACTOR  =  1 
24+ TIMEOUT  =  30 
25+ CSV_FIELDS  =  {
26+     'domain' : ['name' , 'timezone' ],
27+     'subdomain' : ['name' , 'parent_domain_id' ],
28+     'client' : ['name' ],
29+     'business_unit' : ['name' , 'client_id' ]
30+ }
31+ 
32+ # Configure logging 
33+ logging .basicConfig (
34+     level = logging .INFO ,
35+     format = '%(asctime)s - %(levelname)s - %(message)s' ,
36+     handlers = [
37+         logging .FileHandler ('bulk_import.log' ),
38+         logging .StreamHandler ()
39+     ]
40+ )
41+ 
42+ 
43+ class  PhraseTMSClient :
44+     """Enhanced API client with connection pooling and smart retries""" 
45+ 
46+     def  __init__ (self ):
47+         self .session  =  requests .Session ()
48+         retry  =  Retry (
49+             total = MAX_RETRIES ,
50+             backoff_factor = BACKOFF_FACTOR ,
51+             status_forcelist = [500 , 502 , 503 , 504 ],
52+             allowed_methods = ['POST' , 'PUT' , 'GET' , 'DELETE' ]
53+         )
54+         adapter  =  HTTPAdapter (max_retries = retry )
55+         self .session .mount ('https://' , adapter )
56+         self .token  =  self ._authenticate ()
57+ 
58+     def  _authenticate (self ) ->  str :
59+         """Secure credential handling with environment variables""" 
60+         credentials  =  {
61+             'userName' : os .getenv ('PHRASE_USER' ),
62+             'password' : os .getenv ('PHRASE_PASSWORD' )
63+         }
64+         response  =  self .session .post (
65+             f"{ BASE_URL }  ,
66+             json = credentials ,
67+             timeout = TIMEOUT 
68+         )
69+         response .raise_for_status ()
70+         return  response .json ()['token' ]
71+ 
72+     def  create_entity (self , entity_type : str , data : Dict [str , Any ]) ->  Dict [str , Any ]:
73+         """Generic entity creation with conflict detection""" 
74+         endpoints  =  {
75+             'domain' : '/domains' ,
76+             'subdomain' : lambda  d : f"/domains/{ d ['parent_domain_id' ]}  ,
77+             'client' : '/clients' ,
78+             'business_unit' : '/businessUnits' 
79+         }
80+ 
81+         url  =  BASE_URL  +  (
82+             endpoints [entity_type ](data ) if  callable (endpoints [entity_type ])
83+             else  endpoints [entity_type ]
84+         )
85+ 
86+         response  =  self .session .post (
87+             url ,
88+             json = data ,
89+             headers = {'Authorization' : f'ApiToken { self .token }  },
90+             timeout = TIMEOUT 
91+         )
92+ 
93+         if  response .status_code  ==  409 :
94+             logging .debug (f"Entity conflict: { data .get ('name' )}  )
95+             return  {'status' : 'conflict' }
96+ 
97+         response .raise_for_status ()
98+         return  response .json ()
99+ 
100+ 
101+ def  validate_row (entity_type : str , row : Dict [str , str ]) ->  bool :
102+     """Structural validation of CSV rows""" 
103+     required  =  CSV_FIELDS [entity_type ]
104+     missing  =  [field  for  field  in  required  if  not  row .get (field )]
105+     if  missing :
106+         logging .warning (f"Missing fields: { missing } { row .get ('name' )}  )
107+         return  False 
108+     return  True 
109+ 
110+ 
111+ def  count_csv_rows (file_path : str , delimiter : str ) ->  int :
112+     """Memory-efficient row counting""" 
113+     with  open (file_path , 'r' , encoding = 'utf-8' ) as  f :
114+         reader  =  csv .reader (f , delimiter = delimiter )
115+         next (reader , None )  # Skip header 
116+         return  sum (1  for  _  in  reader )
117+ 
118+ 
119+ def  process_csv (file_path : str , delimiter : str ) ->  Generator [Dict [str , str ], None , None ]:
120+     """Streaming CSV parser with normalization""" 
121+     with  open (file_path , 'r' , encoding = 'utf-8' , newline = '' ) as  f :
122+         reader  =  csv .DictReader (f , delimiter = delimiter )
123+         for  row  in  reader :
124+             yield  {k .strip ().lower (): v .strip () for  k , v  in  row .items ()}
125+ 
126+ 
127+ def  bulk_import (file_path : str , delimiter : str , dry_run : bool  =  False ):
128+     """Main import workflow with progress tracking""" 
129+     client  =  PhraseTMSClient ()
130+     stats  =  {'success' : 0 , 'errors' : 0 , 'skipped' : 0 }
131+ 
132+     total_rows  =  count_csv_rows (file_path , delimiter )
133+ 
134+     with  tqdm (
135+             total = total_rows ,
136+             desc = "🚀 Importing" ,
137+             unit = "row" ,
138+             bar_format = "{l_bar}{bar:20}{r_bar}" ,
139+             dynamic_ncols = True 
140+     ) as  pbar :
141+         for  row  in  process_csv (file_path , delimiter ):
142+             try :
143+                 entity_type  =  row .get ('type' , '' ).lower ()
144+                 if  not  entity_type  or  entity_type  not  in CSV_FIELDS :
145+                     stats ['errors' ] +=  1 
146+                     logging .error (f"Invalid type: { row .get ('type' )}  )
147+                     continue 
148+ 
149+                 if  not  validate_row (entity_type , row ):
150+                     stats ['errors' ] +=  1 
151+                     continue 
152+ 
153+                 if  dry_run :
154+                     stats ['success' ] +=  1 
155+                     continue 
156+ 
157+                 result  =  client .create_entity (entity_type , row )
158+                 if  result .get ('status' ) ==  'conflict' :
159+                     stats ['skipped' ] +=  1 
160+                 elif  result :
161+                     stats ['success' ] +=  1 
162+                 else :
163+                     stats ['skipped' ] +=  1 
164+ 
165+             except  Exception  as  e :
166+                 stats ['errors' ] +=  1 
167+                 logging .debug (f"Row error: { str (e )}  )
168+                 sleep (0.5 )  # Error cooldown 
169+ 
170+             finally :
171+                 pbar .update (1 )
172+                 pbar .set_postfix (
173+                     success = stats ['success' ],
174+                     errors = stats ['errors' ],
175+                     skipped = stats ['skipped' ],
176+                     refresh = False 
177+                 )
178+ 
179+             del  row   # Memory management 
180+ 
181+     logging .info ("\n 🔥 Final Statistics:" )
182+     logging .info (f"✅ Success: { stats ['success' ]}  )
183+     logging .info (f"⚠️  Skipped: { stats ['skipped' ]}  )
184+     logging .info (f"❌ Errors: { stats ['errors' ]}  )
185+ 
186+ 
187+ if  __name__  ==  "__main__" :
188+     import  argparse 
189+ 
190+     parser  =  argparse .ArgumentParser (description = 'Phrase TMS Bulk Import Tool' )
191+     parser .add_argument ('file' , help = 'CSV file path' )
192+     parser .add_argument ('--delimiter' , default = ',' , help = 'CSV delimiter' )
193+     parser .add_argument ('--dry-run' , action = 'store_true' , help = 'Simulate import' )
194+     args  =  parser .parse_args ()
195+ 
196+     try :
197+         bulk_import (
198+             args .file ,
199+             args .delimiter ,
200+             args .dry_run 
201+         )
202+     except  KeyboardInterrupt :
203+         logging .info ("\n 🛑 Operation cancelled by user" )
204+     except  Exception  as  e :
205+         logging .error (f"💥 Catastrophic failure: { str (e )}  )
0 commit comments