Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
# IDE
.vscode/

# dbGaP digest cache populated by `dm-bip fetch-digests`
.dbgap-cache/

# MkDocs build output
site/

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ dependencies = [
"schema-automator==0.5.5",
"linkml-map==0.5.2",
"typing-extensions>=4.0,<5",
"defusedxml>=0.7,<1",
]

[project.scripts]
Expand Down
55 changes: 55 additions & 0 deletions src/dm_bip/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,5 +104,60 @@ def prepare_metadata(
typer.echo(f"Output written to {result}")


@app.command()
def fetch_digests(
cohort_key: Annotated[
Optional[str],
typer.Argument(help="Cohort key (e.g. jhs, aric). Omit with --list to list cohorts."),
] = None,
cache_dir: Annotated[Path, typer.Option("--cache-dir", help="Local cache directory")] = Path(".dbgap-cache"),
refresh: Annotated[bool, typer.Option("--refresh", help="Force re-fetch of cached files")] = False,
list_cohorts: Annotated[bool, typer.Option("--list", help="List available cohorts and exit")] = False,
):
"""Fetch dbGaP variable digest files (data_dict.xml, var_report.xml) for a cohort."""
from dm_bip.prepare_study.fetch_digests import fetch_digests as _fetch
from dm_bip.prepare_study.fetch_digests import load_cohorts

cohorts = load_cohorts(cache_dir=cache_dir, refresh=refresh)

if list_cohorts:
for key, cohort in sorted(cohorts.items()):
typer.echo(f" {key:<12} {cohort.study_id}.{cohort.data_version} {cohort.display_name}")
return

if cohort_key is None:
typer.echo("Error: cohort_key is required (use --list to see options)")
raise typer.Exit(code=2)

if cohort_key not in cohorts:
typer.echo(f"Unknown cohort '{cohort_key}'. Available: {', '.join(sorted(cohorts))}")
raise typer.Exit(code=2)

result = _fetch(cohorts[cohort_key], cache_root=cache_dir, refresh=refresh)
typer.echo(
f"Cached {len(result.data_dicts)} data_dict.xml + {len(result.var_reports)} var_report.xml "
f"under {result.cache_root}"
)


@app.command()
def parse_digests(
cohort_key: Annotated[str, typer.Argument(help="Cohort key (e.g. jhs, aric)")],
cache_dir: Annotated[Path, typer.Option("--cache-dir", help="dbGaP digest cache (input)")] = Path(".dbgap-cache"),
output_dir: Annotated[Path, typer.Option("--output-dir", "-o", help="Output root directory")] = Path("output"),
refresh_cohorts: Annotated[bool, typer.Option("--refresh-cohorts", help="Re-fetch cohorts.yaml")] = False,
):
"""Convert cached dbGaP digests for a cohort into schema-automator canonical-DD TSVs."""
from dm_bip.prepare_study.fetch_digests import load_cohorts, parse_cached_digests

cohorts = load_cohorts(cache_dir=cache_dir, refresh=refresh_cohorts)
if cohort_key not in cohorts:
typer.echo(f"Unknown cohort '{cohort_key}'. Available: {', '.join(sorted(cohorts))}")
raise typer.Exit(code=2)

written = parse_cached_digests(cohorts[cohort_key], cache_root=cache_dir, output_root=output_dir)
typer.echo(f"Wrote {len(written)} data dictionaries under {output_dir / cohort_key / 'dd'}")


if __name__ == "__main__":
app()
Loading
Loading