microbiomedata · codytodonnell · Dec 8, 2025 · Dec 11, 2025 · Dec 11, 2025 · Dec 12, 2025
diff --git a/nmdc_server/api.py b/nmdc_server/api.py
@@ -1,4 +1,5 @@
 import csv
+import io
 import json
 import logging
 import time
@@ -7,12 +8,15 @@
 from io import BytesIO, StringIO
 from typing import Any, Dict, List, Optional, Union, cast
 from uuid import UUID, uuid4
+import zipfile
 
 import httpx
 import requests
 from fastapi import APIRouter, Depends, Header, HTTPException, Query, Response, status
 from fastapi.responses import JSONResponse
 from linkml_runtime.utils.schemaview import SchemaView
+from nmdc_api_utilities.biosample_search import BiosampleSearch
+from nmdc_api_utilities.study_search import StudySearch
 from nmdc_schema.nmdc import SubmissionStatusEnum
 from sqlalchemy.orm import Session
 from starlette.responses import StreamingResponse
@@ -352,6 +356,71 @@ async def get_biosample(biosample_id: str, db: Session = Depends(get_db)):
         raise HTTPException(status_code=404, detail="Biosample not found")
     return db_biosample
 
+# Get a single record of biosample source data via the Runtime API
+# based on the supplied ID
+@router.get(
+    "/biosample/{biosample_id}/source",
+    tags=["biosample"],
+)
+async def get_biosample_source(biosample_id: str):
+    biosample_search = BiosampleSearch()
+    source_biosample = biosample_search.get_record_by_id(biosample_id)
+    if source_biosample is None:
+        raise HTTPException(status_code=404, detail="Biosample not found in source database")
+    return source_biosample
+
+
+# Get a list of biosample source data via the Runtime API
+# based on supplied conditions
+@router.post(
+    "/biosample/search/source",
+    tags=["biosample"],
+)
+async def search_biosample_source(
+    q: query.SearchQuery = query.SearchQuery(),
+    db: Session = Depends(get_db),
+):
+    biosample_search = BiosampleSearch()
+    biosample_ids = crud.search_biosample(db, q.conditions, []).with_entities(models.Biosample.id).all()
+    results = biosample_search.get_records_by_id([id for (id,) in biosample_ids])
+    if not results:
+        raise HTTPException(status_code=404, detail="Could not retrieve source data for biosamples")
+    return results
+
+
+# Download multiple metadata lists as a zip file given a list of endpoint labels.
+# Endpoint labels are mapped to functions that retrieve JSON
+@router.post(
+    "/download_metadata", 
+    tags=["bulk_download"]
+)
+async def download_metadata(
+    q: query.MultiSearchQuery,
+    db: Session = Depends(get_db)
+):
+
+    endpoint_map = {
+        "biosamples": search_biosample_source,
+        "studies": search_study_source,
+    }
+
+    zip_buffer = io.BytesIO()
+
+    with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
+        for endpoint_name in q.endpoints:
+            if endpoint_name in endpoint_map:
+                data = await endpoint_map[endpoint_name](q, db)
+                json_str = json.dumps(data, indent=2, ensure_ascii=False)
+                zip_file.writestr(f"{endpoint_name}.json", json_str.encode('utf-8'))
+
+    zip_buffer.seek(0)
+
+    return Response(
+        content=zip_buffer.getvalue(),
+        media_type="application/zip",
+        headers={"Content-Disposition": "attachment; filename=metadata.zip"}
+    )
+
 
 @router.get(
     "/envo/tree",
@@ -546,6 +615,36 @@ async def get_study_image(study_id: str, db: Session = Depends(get_db)):
         raise HTTPException(status_code=404, detail="No image exists for this study")
     return StreamingResponse(BytesIO(image), media_type="image/jpeg")
 
+# Study source data via the Runtime API
+@router.get(
+    "/study/{study_id}/source",
+    tags=["study"],
+)
+async def get_study_source(study_id: str):
+    study_search = StudySearch()
+    source_study = study_search.get_record_by_id(study_id)
+    if source_study is None:
+        raise HTTPException(status_code=404, detail="Study not found in the source database")
+    return source_study
+
+
+# Get a list of study source data via the Runtime API
+# based on supplied conditions
+@router.post(
+    "/study/search/source",
+    tags=["study"],
+)
+async def search_study_source(
+    q: query.SearchQuery = query.SearchQuery(),
+    db: Session = Depends(get_db),
+):
+    study_search = StudySearch()
+    study_ids = crud.search_study(db, q.conditions).with_entities(models.Study.id).all()
+    results = study_search.get_records_by_id([id for (id,) in study_ids])
+    if not results:
+        raise HTTPException(status_code=404, detail="Could not retrieve source data for studies")
+    return results
+
 
 # data_generation
 # Note the intermingling of the terms "data generation" and "omics processing."

diff --git a/nmdc_server/query.py b/nmdc_server/query.py
@@ -821,6 +821,7 @@ def omics_processing_for_biosample_ids(self, db: Session, biosample_ids):
 
 class BiosampleQuerySchema(BaseQuerySchema):
     data_object_filter: List[DataObjectFilter] = []
+    endpoints: List[str] = []
 
     @property
     def table(self) -> Table:
@@ -1039,6 +1040,10 @@ class SearchQuery(BaseModel):
     conditions: List[ConditionSchema] = []
 
 
+class MultiSearchQuery(SearchQuery):
+    endpoints: List[str] = []
+
+
 class ConditionResultSchema(SimpleConditionSchema):
     model_config = ConfigDict(from_attributes=True)
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -98,6 +98,7 @@ dependencies = [
     "mkdocs-redirects==1.2.1",
     "mypy==1.11.2",
     "mypy-extensions==1.0.0",
+    "nmdc_api_utilities==0.5.0",
     "nmdc-schema==v11.14.0-rc.1",
     "nmdc-submission-schema==11.13.0",
     "nmdc-geoloc-tools==0.3.0",

diff --git a/web/src/components.d.ts b/web/src/components.d.ts
@@ -1,8 +1,11 @@
 /* eslint-disable */
 // @ts-nocheck
+// biome-ignore lint: disable
+// oxlint-disable
+// ------
 // Generated by unplugin-vue-components
 // Read more: https://github.com/vuejs/core/pull/3399
-// biome-ignore lint: disable
+
 export {}
 
 /* prettier-ignore */