Skip to content

Generate and Register JSON Schemas #28

Generate and Register JSON Schemas

Generate and Register JSON Schemas #28

name: Generate and Register JSON Schemas
on:
push:
tags:
- 'v*' # Trigger on version tags
workflow_dispatch: # Allow manual triggering
inputs:
version:
description: 'Version to release (e.g., 1.0.0)'
required: true
default: '0.1.0'
env:
PYTHON_VERSION: '3.11'
ORGANIZATION_NAME: 'HTAN2Organization'
jobs:
generate-and-register-schemas:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0 # Needed for version detection
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: latest
virtualenvs-create: true
virtualenvs-in-project: true
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v4
with:
path: .venv
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
- name: Install dependencies
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction --no-root
- name: Determine version
id: version
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
echo "version=v${{ github.event.inputs.version }}" >> $GITHUB_OUTPUT
else
# Extract version from git tag
VERSION=${GITHUB_REF#refs/tags/}
echo "version=$VERSION" >> $GITHUB_OUTPUT
fi
echo "Version: ${{ steps.version.outputs.version }}"
- name: Generate JSON Schemas
run: |
# Create versioned output directory
mkdir -p JSON_Schemas/${{ steps.version.outputs.version }}
# Generate schemas for individual Clinical domains
echo "Generating Clinical domain schemas..."
# Demographics
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/Clinical/domains/demographics.yaml" \
--class-name "Demographics" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.Demographics-${{ steps.version.outputs.version }}-schema.json"
# Diagnosis
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/Clinical/domains/diagnosis.yaml" \
--class-name "Diagnosis" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.Diagnosis-${{ steps.version.outputs.version }}-schema.json"
# Therapy
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/Clinical/domains/therapy.yaml" \
--class-name "Therapy" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.Therapy-${{ steps.version.outputs.version }}-schema.json"
# FollowUp
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/Clinical/domains/followup.yaml" \
--class-name "FollowUp" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.FollowUp-${{ steps.version.outputs.version }}-schema.json"
# MolecularTest
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/Clinical/domains/molecular.yaml" \
--class-name "MolecularTest" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.MolecularTest-${{ steps.version.outputs.version }}-schema.json"
# Exposure
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/Clinical/domains/exposure.yaml" \
--class-name "Exposure" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.Exposure-${{ steps.version.outputs.version }}-schema.json"
# FamilyHistory
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/Clinical/domains/family_history.yaml" \
--class-name "FamilyHistory" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.FamilyHistory-${{ steps.version.outputs.version }}-schema.json"
# VitalStatus
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/Clinical/domains/vital_status.yaml" \
--class-name "VitalStatus" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.VitalStatus-${{ steps.version.outputs.version }}-schema.json"
# WES Level 1 (file-based schema)
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/WES/domains/level_1.yaml" \
--class-name "BulkWESLevel1" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.BulkWESLevel1-${{ steps.version.outputs.version }}-schema.json"
# WES Level 2
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/WES/domains/level_2.yaml" \
--class-name "BulkWESLevel2" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.BulkWESLevel2-${{ steps.version.outputs.version }}-schema.json"
# WES Level 3
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/WES/domains/level_3.yaml" \
--class-name "BulkWESLevel3" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.BulkWESLevel3-${{ steps.version.outputs.version }}-schema.json"
# Biospecimen
echo "Generating Biospecimen schema..."
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/Biospecimen/domains/biospecimen.yaml" \
--class-name "BiospecimenData" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.BiospecimenData-${{ steps.version.outputs.version }}-schema.json"
# scRNA-seq Level 1
echo "Generating scRNA-seq schemas..."
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/scRNA-seq/domains/level_1.yaml" \
--class-name "scRNALevel1" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.scRNALevel1-${{ steps.version.outputs.version }}-schema.json"
# scRNA-seq Level 2
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/scRNA-seq/domains/level_2.yaml" \
--class-name "scRNALevel2" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.scRNALevel2-${{ steps.version.outputs.version }}-schema.json"
# scRNA-seq Level 3/4
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/scRNA-seq/domains/level_3_4.yaml" \
--class-name "scRNALevel3_4" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.scRNALevel3_4-${{ steps.version.outputs.version }}-schema.json"
# SpatialOmics Level 1
echo "Generating SpatialOmics schemas..."
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/SpatialOmics/domains/level_1.yaml" \
--class-name "SpatialLevel1" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.SpatialLevel1-${{ steps.version.outputs.version }}-schema.json"
# SpatialOmics Level 3
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/SpatialOmics/domains/level_3.yaml" \
--class-name "SpatialLevel3" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.SpatialLevel3-${{ steps.version.outputs.version }}-schema.json"
# SpatialOmics Level 4
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/SpatialOmics/domains/level_4.yaml" \
--class-name "SpatialLevel4" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.SpatialLevel4-${{ steps.version.outputs.version }}-schema.json"
# SpatialOmics Panel
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/SpatialOmics/domains/spatial_panel.yaml" \
--class-name "SpatialPanel" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.SpatialPanel-${{ steps.version.outputs.version }}-schema.json"
# MultiplexMicroscopy Level 2
echo "Generating MultiplexMicroscopy schemas..."
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/MultiplexMicroscopy/domains/level_2.yaml" \
--class-name "MultiplexMicroscopyLevel2" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.MultiplexMicroscopyLevel2-${{ steps.version.outputs.version }}-schema.json"
# MultiplexMicroscopy Level 3
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/MultiplexMicroscopy/domains/level_3.yaml" \
--class-name "MultiplexMicroscopyLevel3" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.MultiplexMicroscopyLevel3-${{ steps.version.outputs.version }}-schema.json"
# MultiplexMicroscopy Level 4
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/MultiplexMicroscopy/domains/level_4.yaml" \
--class-name "MultiplexMicroscopyLevel4" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.MultiplexMicroscopyLevel4-${{ steps.version.outputs.version }}-schema.json"
# DigitalPathology
echo "Generating DigitalPathology schema..."
poetry run python scripts/linkml_to_flat_synapse_jsonschema.py \
"modules/DigitalPathology/domains/digital_pathology.yaml" \
--class-name "DigitalPathologyData" \
--output "JSON_Schemas/${{ steps.version.outputs.version }}/HTAN.DigitalPathologyData-${{ steps.version.outputs.version }}-schema.json"
- name: Register schemas in Synapse
env:
SYNAPSE_USERNAME: ${{ secrets.SYNAPSE_USERNAME }}
SYNAPSE_AUTH_TOKEN: ${{ secrets.SYNAPSE_AUTH_TOKEN }}
run: |
# Register each generated schema
for schema_file in JSON_Schemas/${{ steps.version.outputs.version }}/*.json; do
if [ -f "$schema_file" ]; then
schema_name=$(basename "$schema_file" .json)
echo "Processing schema: $schema_name"
poetry run python scripts/synapse_json_schema_bind.py \
-p "$schema_file" \
-n "${{ env.ORGANIZATION_NAME }}" \
--no_bind
fi
done
- name: Create Pull Request for Schemas
if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch'
env:
GH_TOKEN: ${{ github.token }}
run: |
# Configure git
git config --local user.email "action@github.com"
git config --local user.name "GitHub Action"
# Create a new branch for this release
branch_name="schemas-${{ steps.version.outputs.version }}"
git checkout -b "$branch_name"
# Add the versioned schema directory
git add JSON_Schemas/${{ steps.version.outputs.version }}/
# Check if there are changes to commit
if git diff --staged --quiet; then
echo "No schema changes to commit"
else
git commit -m "feat: add JSON schemas for release ${{ steps.version.outputs.version }}"
git push origin "$branch_name"
# Create pull request
gh pr create \
--title "Add JSON schemas for release ${{ steps.version.outputs.version }}" \
--body "## JSON Schema Release ${{ steps.version.outputs.version }}
This PR adds the generated JSON schemas for release ${{ steps.version.outputs.version }}.
All schemas are organized in: \`JSON_Schemas/${{ steps.version.outputs.version }}/\`
**Auto-generated by release workflow**" \
--base main \
--head "$branch_name"
fi
# Schema binding has been moved to htan2-project-setup repository
# This workflow handles schema generation and registration only
- name: Upload generated schemas as artifacts
uses: actions/upload-artifact@v4
with:
name: json-schemas-${{ steps.version.outputs.version }}
path: JSON_Schemas/
retention-days: 30
- name: Create Release
if: startsWith(github.ref, 'refs/tags/')
env:
GH_TOKEN: ${{ github.token }}
run: |
gh release create ${{ github.ref_name }} \
--title "Release ${{ steps.version.outputs.version }}" \
--notes "## JSON Schema Release ${{ steps.version.outputs.version }}
This release includes updated JSON schemas for the HTAN2 data model.
### Schema Organization:
All schemas are organized in versioned directories: \`JSON_Schemas/${{ steps.version.outputs.version }}/\`
### Generated Schemas:
**Clinical Module:**
- Demographics Schema
- Diagnosis Schema
- Therapy Schema
- FollowUp Schema
- MolecularTest Schema
- Exposure Schema
- FamilyHistory Schema
- VitalStatus Schema
**Biospecimen Module:**
- BiospecimenData Schema
**WES Module:**
- BulkWESLevel1 Schema
- BulkWESLevel2 Schema
- BulkWESLevel3 Schema
**scRNA-seq Module:**
- scRNALevel1 Schema
- scRNALevel2 Schema
- scRNALevel3_4 Schema
**SpatialOmics Module:**
- SpatialLevel1 Schema
- SpatialLevel3 Schema
- SpatialLevel4 Schema
- SpatialPanel Schema
**MultiplexMicroscopy Module:**
- MultiplexMicroscopyLevel2 Schema
- MultiplexMicroscopyLevel3 Schema
- MultiplexMicroscopyLevel4 Schema
**DigitalPathology Module:**
- DigitalPathologyData Schema
**Note**: Schema registration and binding to project folders is handled by the htan2-project-setup repository."
- name: Upload schemas as release assets
if: startsWith(github.ref, 'refs/tags/')
env:
GH_TOKEN: ${{ github.token }}
run: |
# Upload each schema file as a release asset
for schema_file in JSON_Schemas/${{ steps.version.outputs.version }}/*.json; do
if [ -f "$schema_file" ]; then
echo "Uploading $schema_file as release asset..."
gh release upload ${{ github.ref_name }} "$schema_file"
fi
done