Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 110 additions & 2 deletions pysradb/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,16 @@ def _print_save_df(df, saveto=None):

###################### metadata ##############################
def metadata(
srp_id, assay, desc, detailed, expand, saveto, enrich=False, enrich_backend=None
srp_id,
assay,
desc,
detailed,
expand,
saveto,
enrich=False,
enrich_backend=None,
input_file=None,
output_dir=None,
):
# Validate that at least one ID was provided
if not srp_id:
Expand All @@ -200,6 +209,89 @@ def metadata(

client = SRAweb()

# If input_file is provided, read IDs from file
if input_file:
if not os.path.exists(input_file):
console.print(f"[red]Error: Input file '{input_file}' not found[/red]")
return

with open(input_file, "r") as f:
file_ids = [line.strip() for line in f if line.strip()]

if not file_ids:
console.print(f"[red]Error: No IDs found in '{input_file}'[/red]")
return

# If output_dir is specified, process each ID separately
if output_dir:
os.makedirs(output_dir, exist_ok=True)
console.print(
f"[blue]Processing {len(file_ids)} IDs from '{input_file}'[/blue]"
)
console.print(f"[blue]Output directory: '{output_dir}'[/blue]")
Comment on lines +225 to +231
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: Output directory creation does not handle permission or invalid path errors.

Wrap os.makedirs in a try/except block to handle and report errors if directory creation fails.

Suggested change
# If output_dir is specified, process each ID separately
if output_dir:
os.makedirs(output_dir, exist_ok=True)
console.print(
f"[blue]Processing {len(file_ids)} IDs from '{input_file}'[/blue]"
)
console.print(f"[blue]Output directory: '{output_dir}'[/blue]")
# If output_dir is specified, process each ID separately
if output_dir:
try:
os.makedirs(output_dir, exist_ok=True)
except Exception as e:
console.print(f"[red]Error: Failed to create output directory '{output_dir}': {e}[/red]")
sradb.close()
return
console.print(
f"[blue]Processing {len(file_ids)} IDs from '{input_file}'[/blue]"
)
console.print(f"[blue]Output directory: '{output_dir}'[/blue]")


for idx, accession_id in enumerate(file_ids, 1):
console.print(
f"[green]Processing {idx}/{len(file_ids)}: {accession_id}[/green]"
)

# Determine if GSE or SRP/SRX/etc
is_gse = isinstance(
accession_id, str
) and accession_id.upper().startswith("GSE")

try:
if is_gse:
df = client.geo_metadata(
accession_id,
sample_attribute=desc,
detailed=detailed,
enrich=enrich,
enrich_backend=(
enrich_backend if enrich_backend else "ollama/phi3"
),
)
else:
df = client.sra_metadata(
accession_id,
assay=assay,
detailed=detailed,
sample_attribute=desc,
expand_sample_attributes=expand,
enrich=enrich,
enrich_backend=(
enrich_backend if enrich_backend else "ollama/phi3"
),
)

if df is not None and not df.empty:
# Save to individual file
output_file = os.path.join(
output_dir, f"{accession_id}_metadata.csv"
)
df.to_csv(output_file, index=False)
console.print(
f"[green] → Saved to {output_file} ({len(df)} rows)[/green]"
)
else:
console.print(
f"[yellow] → No metadata found for {accession_id}[/yellow]"
)

except Exception as e:
console.print(
f"[red] → Error processing {accession_id}: {str(e)}[/red]"
)

console.print(
f"[blue]Batch processing complete. Files saved to '{output_dir}'[/blue]"
)
return
else:
# output_dir not specified, use file IDs as regular input
srp_id = file_ids

# Original single/multiple ID processing (when no input_file or no output_dir)
srp_ids = []
gse_ids = []
for accession in srp_id:
Expand Down Expand Up @@ -840,7 +932,21 @@ def parse_args(args=None):
help="LLM backend for enrichment (e.g., 'ollama/phi3', 'ollama/llama3.2'). "
"If not specified, uses default backend",
)
subparser.add_argument("srp_id", nargs="+")
subparser.add_argument(
"--input-file",
type=str,
default=None,
help="Path to file containing list of IDs (one per line). "
"When used with --output-dir, processes each ID separately",
)
subparser.add_argument(
"--output-dir",
type=str,
default=None,
help="Directory to save individual metadata files (one per ID). "
"Requires --input-file. Files are named as {ID}_metadata.csv",
)
subparser.add_argument("srp_id", nargs="*", default=[])
Copy link

Copilot AI Nov 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The srp_id argument changed from nargs='+' (one or more required) to nargs='*' (zero or more optional). This breaks the existing CLI contract when --input-file is not provided, as users could now call pysradb metadata without any IDs and get no validation error until later in the code (line 291-293). Consider adding validation after line 288 to ensure srp_id is not empty when input_file is not provided.

Copilot uses AI. Check for mistakes.
subparser.set_defaults(func=metadata)

# pysradb download
Expand Down Expand Up @@ -1507,6 +1613,8 @@ def parse_args(args=None):
args.saveto,
args.enrich,
args.enrich_backend,
args.input_file,
args.output_dir,
)
elif args.command == "download":
download(
Expand Down
Loading
Loading