fix: add -k flag to curl for Ensembl Plants SSL certificate issue #22
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Validate Workflows | |
| on: | |
| push: | |
| branches: [main] | |
| pull_request: | |
| branches: [main] | |
| schedule: | |
| - cron: '0 6 * * 1' # Weekly on Monday 06:00 UTC | |
| jobs: | |
| validate-cwl: | |
| name: Validate CWL | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.12" | |
| - name: Install cwltool | |
| run: pip install cwltool | |
| - name: Validate all CWL files | |
| run: | | |
| status=0 | |
| while IFS= read -r -d '' cwl_file; do | |
| if cwltool --validate "${cwl_file}" 2>&1; then | |
| echo "OK: ${cwl_file}" | |
| else | |
| echo "FAIL: ${cwl_file}" | |
| status=1 | |
| fi | |
| done < <(find workflows/ tools/ -name "*.cwl" -print0) | |
| exit $status | |
| validate-agent-yaml: | |
| name: Validate agent.yaml | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.12" | |
| - name: Install dependencies | |
| run: pip install jsonschema pyyaml | |
| - name: Validate agent.yaml files | |
| run: | | |
| python3 << 'PYEOF' | |
| import yaml, jsonschema, glob, sys | |
| with open('schemas/agent-spec.schema.yaml') as f: | |
| schema = yaml.safe_load(f) | |
| agent_files = sorted(glob.glob('workflows/*/agent.yaml')) | |
| if not agent_files: | |
| print("ERROR: No agent.yaml files found") | |
| sys.exit(1) | |
| errors = [] | |
| for agent_file in agent_files: | |
| with open(agent_file) as f: | |
| doc = yaml.safe_load(f) | |
| try: | |
| jsonschema.validate(doc, schema) | |
| print(f'OK: {agent_file}') | |
| except jsonschema.ValidationError as e: | |
| errors.append(f'FAIL: {agent_file}: {e.message}') | |
| print(f'FAIL: {agent_file}: {e.message}') | |
| print(f'\n{len(agent_files)} agent.yaml files validated, {len(errors)} errors') | |
| if errors: | |
| sys.exit(1) | |
| PYEOF | |
| check-structure: | |
| name: Check workflow structure | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Verify each workflow has required files | |
| run: | | |
| status=0 | |
| for dir in workflows/*/; do | |
| name=$(basename "$dir") | |
| missing="" | |
| [ ! -f "${dir}main.cwl" ] && missing="${missing} main.cwl" | |
| [ ! -f "${dir}agent.yaml" ] && missing="${missing} agent.yaml" | |
| [ ! -d "${dir}examples" ] && missing="${missing} examples/" | |
| [ ! -d "${dir}tests" ] && missing="${missing} tests/" | |
| if [ -n "$missing" ]; then | |
| echo "FAIL: ${name} — missing:${missing}" | |
| status=1 | |
| else | |
| echo "OK: ${name}" | |
| fi | |
| done | |
| exit $status | |
| validate-ro-crate: | |
| name: Validate RO-Crate | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.12" | |
| - name: Install dependencies | |
| run: pip install rocrate | |
| - name: Validate Workflow Run RO-Crates | |
| run: | | |
| python3 << 'PYEOF' | |
| import glob, json, sys | |
| crate_files = sorted( | |
| glob.glob('workflows/*/tests/ro-crate/ro-crate-metadata.json') + | |
| glob.glob('workflows/*/tests/ro-crate/*/ro-crate-metadata.json') | |
| ) | |
| if not crate_files: | |
| print("ERROR: No RO-Crate files found") | |
| sys.exit(1) | |
| errors = [] | |
| for path in crate_files: | |
| parts = path.split('/') | |
| pipeline = parts[1] | |
| # Include test name for per-test subdirectories | |
| if len(parts) > 5: | |
| pipeline = f"{parts[1]}/{parts[4]}" | |
| try: | |
| with open(path) as f: | |
| data = json.load(f) | |
| graph = data.get('@graph', []) | |
| if not graph: | |
| errors.append(f'FAIL: {pipeline}: empty @graph') | |
| continue | |
| types = {e.get('@type') if isinstance(e.get('@type'), str) else str(e.get('@type', '?')) for e in graph} | |
| has_dataset = any('Dataset' in str(e.get('@type', '')) for e in graph) | |
| has_workflow = any('ComputationalWorkflow' in str(e.get('@type', '')) for e in graph) | |
| has_action = any('CreateAction' in str(e.get('@type', '')) for e in graph) | |
| if not has_dataset: | |
| errors.append(f'FAIL: {pipeline}: missing Dataset entity') | |
| elif not has_workflow: | |
| errors.append(f'FAIL: {pipeline}: missing ComputationalWorkflow entity') | |
| elif not has_action: | |
| errors.append(f'FAIL: {pipeline}: missing CreateAction entity') | |
| else: | |
| print(f'OK: {pipeline} ({len(graph)} entities)') | |
| except Exception as e: | |
| errors.append(f'FAIL: {pipeline}: {e}') | |
| print(f'\n{len(crate_files)} RO-Crate files validated, {len(errors)} errors') | |
| for e in errors: | |
| print(e) | |
| if errors: | |
| sys.exit(1) | |
| PYEOF | |
| validate-references: | |
| name: Validate reference URLs | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.12" | |
| - name: Install dependencies | |
| run: pip install pyyaml | |
| - name: Check Ensembl URLs | |
| run: | | |
| python3 << 'PYEOF' | |
| import yaml, subprocess, sys | |
| with open('references/genomes.yaml') as f: | |
| genomes = yaml.safe_load(f) | |
| errors = [] | |
| for g in genomes: | |
| name = f"{g['organism']} ({g['assembly']})" | |
| for key in ['fasta', 'gtf']: | |
| url = g.get('sources', {}).get('ensembl', {}).get(key) | |
| if not url: | |
| continue | |
| result = subprocess.run( | |
| ['curl', '-skI', '-o', '/dev/null', '-w', '%{http_code}', '--max-time', '30', url], | |
| capture_output=True, text=True, timeout=60 | |
| ) | |
| code = result.stdout.strip() | |
| if code in ('200', '226', '301', '302'): | |
| print(f'OK: {name} {key} ({code})') | |
| else: | |
| errors.append(f'FAIL: {name} {key} -> HTTP {code}') | |
| print(f'FAIL: {name} {key} -> HTTP {code}') | |
| print(f'\n{len(genomes)} genomes checked, {len(errors)} errors') | |
| if errors: | |
| for e in errors: | |
| print(e) | |
| sys.exit(1) | |
| PYEOF | |
| - name: Check iGenomes S3 paths | |
| run: | | |
| python3 << 'PYEOF' | |
| import yaml, subprocess, sys | |
| with open('references/genomes.yaml') as f: | |
| genomes = yaml.safe_load(f) | |
| errors = [] | |
| for g in genomes: | |
| name = f"{g['organism']} ({g['assembly']})" | |
| igenomes = g.get('sources', {}).get('igenomes', {}) | |
| base = igenomes.get('base') | |
| if not base: | |
| continue | |
| result = subprocess.run( | |
| ['aws', 's3', 'ls', '--no-sign-request', base], | |
| capture_output=True, text=True, timeout=30 | |
| ) | |
| if result.returncode == 0: | |
| print(f'OK: {name} iGenomes base') | |
| else: | |
| errors.append(f'FAIL: {name} iGenomes base not accessible') | |
| print(f'FAIL: {name} iGenomes base not accessible') | |
| if errors: | |
| for e in errors: | |
| print(e) | |
| sys.exit(1) | |
| else: | |
| print(f'\nAll iGenomes paths verified') | |
| PYEOF |