@@ -647,7 +647,7 @@ def _fetch_metadata_for_accession_list(
647647 logger .info ("Fetching metadata for %d accessions in %d batch(es) with exponential backoff retries" ,
648648 len (accessions ), len (batches ))
649649
650- for batch_num , batch in enumerate (batches , 1 ):
650+ for batch_num , batch in tqdm ( enumerate (batches , 1 ), total = len ( batches ), desc = "Fetching accession batches" , unit = "batch" , disable = len ( batches ) == 1 ):
651651 logger .info ("Processing accession batch %d/%d (%d accessions)" ,
652652 batch_num , len (batches ), len (batch ))
653653
@@ -683,11 +683,11 @@ def fetch_batch_metadata():
683683
684684 if success and batch_reports :
685685 all_reports .extend (batch_reports )
686- logger . info ( "✅ Batch %d : Retrieved %d records" , batch_num , len (batch_reports ))
686+ tqdm . write ( f "✅ Batch { batch_num } : Retrieved { len (batch_reports )} records" )
687687 else :
688688 # Batch failed or returned empty
689689 error_msg = error_info ['error' ] if error_info else "No data returned"
690- logger . warning ( "❌ Batch %d failed after %d retries: %s" , batch_num , API_MAX_RETRIES , error_msg )
690+ tqdm . write ( f "❌ Batch { batch_num } failed after { API_MAX_RETRIES } retries: { error_msg } " )
691691
692692 # Build URL with applied filters for manual retry
693693 base_url = f"{ NCBI_API_BASE } /virus/accession/{ accession_string } /dataset_report"
@@ -933,6 +933,7 @@ def fetch_virus_metadata(
933933 all_reports = [] # Will store all metadata records across all pages
934934 page_token = None # Token for accessing subsequent pages
935935 page_count = 0 # Track number of pages processed for logging
936+ pages_pbar = None # Progress bar for pagination (created when we know total pages)
936937
937938 # Create a temporary file to stream metadata as it arrives from the API
938939 # This prevents large datasets from consuming all system RAM
@@ -951,7 +952,6 @@ def fetch_virus_metadata(
951952 loop = True
952953 while loop :
953954 page_count += 1
954- logger .info ("Fetching page %d of results..." , page_count )
955955
956956 # Add pagination token if we're not on the first page
957957 if page_token :
@@ -989,7 +989,15 @@ def fetch_single_page():
989989 if success and page_data :
990990 # Extract the virus reports from the response
991991 reports = page_data .get ('reports' , [])
992- logger .info ("Page %d contains %d virus records" , page_count , len (reports ))
992+ # Create progress bar on first page when we know total pages
993+ if pages_pbar is None and page_count == 1 :
994+ total_pages = page_data .get ('total_count' , 0 )
995+ if total_pages > 0 :
996+ total_pages = (total_pages + API_PAGE_SIZE - 1 ) // API_PAGE_SIZE
997+ pages_pbar = tqdm (total = max (total_pages , 1 ), desc = "Fetching pages" , unit = "page" , leave = False )
998+ if pages_pbar :
999+ pages_pbar .update (1 )
1000+ pages_pbar .set_postfix ({"records" : len (all_reports )})
9931001
9941002 # Stream reports to temporary file if available
9951003 if metadata_file and reports :
@@ -1007,7 +1015,8 @@ def fetch_single_page():
10071015 # Check if there are more pages to retrieve
10081016 next_page_token = page_data .get ('next_page_token' )
10091017 if not next_page_token :
1010- logger .debug ("No more pages available, pagination complete" )
1018+ if pages_pbar :
1019+ pages_pbar .close ()
10111020 loop = False
10121021 break
10131022
@@ -1067,7 +1076,9 @@ def fetch_single_page():
10671076 logger .error (error_msg )
10681077 logger .error ("=" * 80 )
10691078
1070- # Close temporary file before raising exception
1079+ # Close temporary file and progress bar before raising exception
1080+ if pages_pbar :
1081+ pages_pbar .close ()
10711082 if metadata_file :
10721083 try :
10731084 metadata_file .close ()
@@ -1249,6 +1260,8 @@ def fetch_single_page():
12491260 'records_retrieved' : len (all_reports ),
12501261 })
12511262
1263+ if pages_pbar :
1264+ pages_pbar .close ()
12521265 loop = False
12531266 break
12541267 else :
@@ -1346,7 +1359,7 @@ def fetch_virus_metadata_chunked(
13461359 logger .info (f"Will process { total_chunks } year(s) from { start_year } to { current_year } " )
13471360 logger .info ("=" * 80 )
13481361
1349- for year in range (start_year , current_year + 1 ):
1362+ for year in tqdm ( range (start_year , current_year + 1 ), total = total_chunks , desc = "Fetching yearly chunks" , unit = "year" ):
13501363 chunk_start = f"{ year } -01-01"
13511364 chunk_end = f"{ year } -12-31"
13521365
@@ -1355,11 +1368,7 @@ def fetch_virus_metadata_chunked(
13551368 chunk_end = current_date .strftime ("%Y-%m-%d" )
13561369
13571370 chunk_num = year - start_year + 1
1358- logger .info ("" )
1359- logger .info ("=" * 80 )
1360- logger .info (f"📥 Chunk { chunk_num } /{ total_chunks } : Fetching data for year { year } " )
1361- logger .info (f" Date range: { chunk_start } to { chunk_end } " )
1362- logger .info ("=" * 80 )
1371+ tqdm .write (f"📥 Chunk { chunk_num } /{ total_chunks } : Fetching data for year { year } ({ chunk_start } to { chunk_end } )" )
13631372
13641373 try :
13651374 # Fetch metadata for this date chunk
@@ -1385,8 +1394,7 @@ def fetch_virus_metadata_chunked(
13851394 chunk_count = len (chunk_reports )
13861395 all_reports .extend (chunk_reports )
13871396
1388- logger .info (f"✅ Chunk { chunk_num } /{ total_chunks } complete: Retrieved { chunk_count :,} records" )
1389- logger .info (f" Running total: { len (all_reports ):,} records" )
1397+ tqdm .write (f"✅ Chunk { chunk_num } /{ total_chunks } : Retrieved { chunk_count :,} records (total: { len (all_reports ):,} )" )
13901398
13911399 # Add a small delay between chunks to be respectful to NCBI servers
13921400 if year < current_year :
0 commit comments