Skip to content

Commit f609b81

Browse files
committed
add software version to command summary file
1 parent a778098 commit f609b81

File tree

1 file changed

+69
-3
lines changed

1 file changed

+69
-3
lines changed

gget/gget_virus.py

Lines changed: 69 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@
111111

112112
# File Size Configuration
113113
BYTES_PER_MB = 1024 * 1024 # Bytes in a megabyte for file size display diviser
114-
MIN_VALID_ZIP_SIZE = 100 * 1024 # 100 KB in bytes (minimum size for a valid ZIP file from cached downloads)
114+
MIN_VALID_ZIP_SIZE = 5 * 1024 # 100 KB in bytes (minimum size for a valid ZIP file from cached downloads)
115115
MIN_VALID_FASTA_SIZE_MB = 0.1 # Minimum size in MB for a valid FASTA file (100 KB)
116116

117117
# URL Length Configuration
@@ -144,7 +144,6 @@
144144
# Cache for the datasets path to avoid repeated checks
145145
_datasets_path_cache = None
146146

147-
148147
# =============================================================================
149148
# HELPER FUNCTIONS FOR RETRIES AND ERROR TRACKING
150149
# =============================================================================
@@ -384,6 +383,49 @@ def _get_datasets_path():
384383
)
385384

386385

386+
def _get_datasets_version():
387+
"""
388+
Get the version of the NCBI datasets CLI if available.
389+
390+
Attempts to retrieve the version string from the datasets binary.
391+
Returns None if datasets is not available or version check fails.
392+
393+
Returns:
394+
str or None: Version string from datasets (e.g., "16.11.0") or None if unavailable.
395+
"""
396+
try:
397+
datasets_path = _get_datasets_path()
398+
result = subprocess.run(
399+
[datasets_path, "--version"],
400+
capture_output=True,
401+
text=True,
402+
timeout=SUBPROCESS_VERSION_TIMEOUT,
403+
)
404+
if result.returncode == 0:
405+
# Extract version from output (e.g., "datasets version 16.11.0")
406+
version_output = result.stdout.strip()
407+
logger.debug("Datasets version output: %s", version_output)
408+
return version_output
409+
except (RuntimeError, subprocess.TimeoutExpired, OSError) as e:
410+
logger.debug("Could not retrieve datasets version: %s", e)
411+
412+
return None
413+
414+
415+
def _get_gget_version():
416+
"""
417+
Get the version of gget.
418+
419+
Returns:
420+
str: Version string (e.g., "1.2.0") or "unknown" if not available.
421+
"""
422+
try:
423+
from . import __version__
424+
return __version__
425+
except (ImportError, AttributeError):
426+
return "unknown"
427+
428+
387429
def _get_modified_virus_name(virus_name, attempt=1):
388430
"""
389431
Modify the virus name for retry attempts when the NCBI server is unreachable.
@@ -3160,10 +3202,12 @@ def save_command_summary(
31603202
total_final_sequences,
31613203
output_files,
31623204
filtered_metadata,
3205+
datasets_version,
31633206
success=True,
31643207
error_message=None,
31653208
failed_commands=None,
3166-
genbank_error=None
3209+
genbank_error=None,
3210+
gget_version=None
31673211
):
31683212
"""
31693213
Save a summary file documenting the command execution and results.
@@ -3172,6 +3216,10 @@ def save_command_summary(
31723216
output files, and any errors encountered.
31733217
"""
31743218

3219+
# Get versions if not provided
3220+
if gget_version is None:
3221+
gget_version = _get_gget_version()
3222+
31753223
summary_file = os.path.join(outfolder, "command_summary.txt")
31763224

31773225
try:
@@ -3185,6 +3233,15 @@ def save_command_summary(
31853233
f.write(f"Execution Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
31863234
f.write(f"Output Folder: {outfolder}\n\n")
31873235

3236+
# Version information
3237+
f.write("-" * 80 + "\n")
3238+
f.write("SOFTWARE VERSIONS\n")
3239+
f.write("-" * 80 + "\n")
3240+
f.write(f"gget version: {gget_version}\n")
3241+
if datasets_version is not None:
3242+
f.write(f"{datasets_version}\n")
3243+
f.write("\n")
3244+
31883245
# Command line
31893246
f.write("-" * 80 + "\n")
31903247
f.write("COMMAND LINE\n")
@@ -5348,6 +5405,7 @@ def virus(
53485405
cached_metadata_dict = None
53495406
used_cached_download = False
53505407
cached_zip_file = None # Track zip file path for cleanup
5408+
datasets_version = None # Track datasets version for logging and summary
53515409

53525410
# For SARS-CoV-2 queries, use cached data packages with hierarchical fallback
53535411
if _skip_cache:
@@ -5375,6 +5433,7 @@ def virus(
53755433
applied_filters = download_result[1]
53765434
missing_filters = download_result[2]
53775435
cached_zip_file = zip_file # Track for cleanup
5436+
datasets_version = _get_datasets_version()
53785437

53795438
cached_fasta_file, cached_metadata_dict, used_cached_download = process_cached_download(
53805439
zip_file, virus_type="SARS-CoV-2"
@@ -5421,6 +5480,7 @@ def virus(
54215480
applied_filters = download_result[1]
54225481
missing_filters = download_result[2]
54235482
cached_zip_file = zip_file # Track for cleanup
5483+
datasets_version = _get_datasets_version()
54245484

54255485
cached_fasta_file, cached_metadata_dict, used_cached_download = process_cached_download(
54265486
zip_file, virus_type="Alphainfluenza"
@@ -5566,6 +5626,7 @@ def virus(
55665626
total_final_sequences=0,
55675627
output_files={},
55685628
filtered_metadata=[],
5629+
datasets_version=datasets_version,
55695630
success=False,
55705631
error_message=str(e),
55715632
failed_commands=failed_commands,
@@ -5584,6 +5645,7 @@ def virus(
55845645
total_final_sequences=0,
55855646
output_files={},
55865647
filtered_metadata=[],
5648+
datasets_version=datasets_version,
55875649
success=True,
55885650
error_message="No virus records found matching the specified criteria (API returned 0 records)",
55895651
failed_commands=failed_commands
@@ -5663,6 +5725,7 @@ def virus(
56635725
total_final_sequences=0,
56645726
output_files=output_files_dict,
56655727
filtered_metadata=[],
5728+
datasets_version=datasets_version,
56665729
success=True,
56675730
error_message="No sequences passed the metadata filters",
56685731
failed_commands=failed_commands
@@ -5913,6 +5976,7 @@ def virus(
59135976
total_final_sequences=total_final_sequences,
59145977
output_files=output_files_dict,
59155978
filtered_metadata=final_metadata_for_summary,
5979+
datasets_version=datasets_version,
59165980
success=True,
59175981
error_message=None,
59185982
failed_commands=failed_commands,
@@ -5939,6 +6003,7 @@ def virus(
59396003
total_final_sequences=0,
59406004
output_files=output_files_dict,
59416005
filtered_metadata=[],
6006+
datasets_version=datasets_version,
59426007
success=True,
59436008
error_message="No sequences passed all filters",
59446009
failed_commands=failed_commands
@@ -6060,6 +6125,7 @@ def virus(
60606125
total_final_sequences=total_final_sequences,
60616126
output_files=output_files_dict,
60626127
filtered_metadata=final_metadata_for_summary,
6128+
datasets_version=datasets_version,
60636129
success=False,
60646130
error_message=str(e),
60656131
failed_commands=failed_commands

0 commit comments

Comments
 (0)