@@ -50,6 +50,8 @@ def __init__(self, download_directory=None, headless=True):
5050
5151 # Create target directory if it doesn't exist
5252 os .makedirs (self .target_directory , exist_ok = True )
53+ print (f"Script directory: { script_dir } " )
54+ print (f"Project root: { project_root } " )
5355 print (f"Target directory for processed files: { self .target_directory } " )
5456
5557 # Convert to absolute path and ensure it exists
@@ -59,33 +61,37 @@ def __init__(self, download_directory=None, headless=True):
5961
6062 chrome_options = Options ()
6163 if headless :
62- chrome_options .add_argument ('--headless' )
64+ chrome_options .add_argument ('--headless=new' ) # Use new headless mode
6365
6466 chrome_options .add_argument ('--no-sandbox' )
6567 chrome_options .add_argument ('--disable-dev-shm-usage' )
6668 chrome_options .add_argument ('--disable-gpu' )
67- chrome_options .add_argument ('--remote-debugging-port=9222' )
6869 chrome_options .add_argument ('--window-size=1920,1200' )
69- chrome_options .add_argument ('--start-maximized' )
7070
71- # Additional options to prevent permission issues
71+ # Remove problematic flags that can cause crashes
72+ # REMOVED: --disable-javascript (WITS requires JavaScript!)
73+ # REMOVED: --disable-images (can cause issues)
74+ # REMOVED: --disable-plugins
75+ # REMOVED: --single-process (can cause instability)
76+
77+ # Keep essential flags
7278 chrome_options .add_argument ('--disable-extensions' )
73- chrome_options .add_argument ('--disable-plugins' )
74- chrome_options .add_argument ('--disable-images' )
75- chrome_options .add_argument ('--disable-javascript' )
76- chrome_options .add_argument ('--single-process' )
7779 chrome_options .add_argument ('--disable-background-networking' )
7880 chrome_options .add_argument ('--disable-default-apps' )
7981 chrome_options .add_argument ('--disable-sync' )
8082
8183 # Add user-data-dir to avoid permission issues
8284 chrome_options .add_argument ('--user-data-dir=/tmp/chrome-user-data' )
8385
84- # For Docker environments
86+ # For Docker/server environments
8587 chrome_options .add_argument ('--disable-background-timer-throttling' )
8688 chrome_options .add_argument ('--disable-backgrounding-occluded-windows' )
8789 chrome_options .add_argument ('--disable-renderer-backgrounding' )
8890
91+ # Disable automation detection
92+ chrome_options .add_experimental_option ("excludeSwitches" , ["enable-automation" ])
93+ chrome_options .add_experimental_option ('useAutomationExtension' , False )
94+
8995 # Configure downloads with absolute path
9096 prefs = {
9197 "download.default_directory" : self .download_directory ,
@@ -99,10 +105,32 @@ def __init__(self, download_directory=None, headless=True):
99105
100106 # Initialize service and driver with error handling
101107 try :
102- self .service = QuietService (executable_path = "chromedriver" )
108+ # First, try to find chromedriver in the script's directory
109+ script_dir = os .path .dirname (os .path .abspath (__file__ ))
110+ local_chromedriver = os .path .join (script_dir , 'chromedriver' )
111+
112+ # Check if chromedriver exists locally
113+ if os .path .exists (local_chromedriver ):
114+ print (f"Using local chromedriver: { local_chromedriver } " )
115+ # Make sure it's executable
116+ os .chmod (local_chromedriver , 0o755 )
117+ self .service = QuietService (executable_path = local_chromedriver )
118+ else :
119+ # Fall back to system chromedriver
120+ print (f"Local chromedriver not found at: { local_chromedriver } " )
121+ print ("Trying system chromedriver..." )
122+ self .service = QuietService (executable_path = "chromedriver" )
123+
103124 self .driver = webdriver .Chrome (service = self .service , options = chrome_options )
125+ print ("Chrome driver initialized successfully" )
126+
104127 except Exception as e :
105128 print (f"Error initializing Chrome driver: { e } " )
129+ print ("\n Troubleshooting:" )
130+ print (f"1. Place chromedriver in: { script_dir } " )
131+ print (f"2. Make it executable: chmod +x { os .path .join (script_dir , 'chromedriver' )} " )
132+ print ("3. Or install system-wide: sudo apt install chromium-chromedriver" )
133+ print ("4. Or install webdriver-manager: pip3 install webdriver-manager" )
106134 raise
107135
108136 self .base_url = "https://wits.worldbank.org/WITS/WITS/QuickQuery/FindTariff/FindTariff.aspx?Page=FindATariff"
@@ -779,6 +807,8 @@ def get_csv_file_from_zip(self):
779807 # Move CSV to Spring Boot resources directory instead of download directory
780808 csv_filename = os .path .basename (csv_path )
781809 final_csv_path = os .path .join (self .target_directory , csv_filename )
810+ print (f"Moving CSV from: { csv_path } " )
811+ print (f"Moving CSV to: { final_csv_path } " )
782812 shutil .move (csv_path , final_csv_path )
783813 print (f"Moved CSV file to: { final_csv_path } " )
784814
@@ -797,7 +827,7 @@ def rename_csv_file(self, csv_path, country_code, year):
797827 """
798828 try :
799829 # Create new filename
800- new_filename = f"HS2017{ country_code } { year } .csv"
830+ new_filename = f"HS2017{ country_code } Year { year } .csv"
801831 new_path = os .path .join (os .path .dirname (csv_path ), new_filename )
802832
803833 # Rename the file
@@ -932,10 +962,10 @@ def main():
932962
933963 # Map country code to market name (expand this mapping as needed)
934964 country_mapping = {
935- 'USA' : 'United States' ,
936- 'CHN' : 'China' ,
965+ 'USA' : 'United States' , #Pass
966+ 'CHN' : 'China' ,
937967 'JPN' : 'Japan' ,
938- 'DEU' : 'Germany' ,
968+ 'DEU' : 'Germany' , #Fail
939969 'IND' : 'India' ,
940970 'GBR' : 'United Kingdom' ,
941971 'FRA' : 'France' ,
0 commit comments