Skip to content

fix: add -k flag to curl for Ensembl Plants SSL certificate issue #22

fix: add -k flag to curl for Ensembl Plants SSL certificate issue

fix: add -k flag to curl for Ensembl Plants SSL certificate issue #22

Workflow file for this run

name: Validate Workflows
on:
push:
branches: [main]
pull_request:
branches: [main]
schedule:
- cron: '0 6 * * 1' # Weekly on Monday 06:00 UTC
jobs:
validate-cwl:
name: Validate CWL
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install cwltool
run: pip install cwltool
- name: Validate all CWL files
run: |
status=0
while IFS= read -r -d '' cwl_file; do
if cwltool --validate "${cwl_file}" 2>&1; then
echo "OK: ${cwl_file}"
else
echo "FAIL: ${cwl_file}"
status=1
fi
done < <(find workflows/ tools/ -name "*.cwl" -print0)
exit $status
validate-agent-yaml:
name: Validate agent.yaml
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install dependencies
run: pip install jsonschema pyyaml
- name: Validate agent.yaml files
run: |
python3 << 'PYEOF'
import yaml, jsonschema, glob, sys
with open('schemas/agent-spec.schema.yaml') as f:
schema = yaml.safe_load(f)
agent_files = sorted(glob.glob('workflows/*/agent.yaml'))
if not agent_files:
print("ERROR: No agent.yaml files found")
sys.exit(1)
errors = []
for agent_file in agent_files:
with open(agent_file) as f:
doc = yaml.safe_load(f)
try:
jsonschema.validate(doc, schema)
print(f'OK: {agent_file}')
except jsonschema.ValidationError as e:
errors.append(f'FAIL: {agent_file}: {e.message}')
print(f'FAIL: {agent_file}: {e.message}')
print(f'\n{len(agent_files)} agent.yaml files validated, {len(errors)} errors')
if errors:
sys.exit(1)
PYEOF
check-structure:
name: Check workflow structure
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Verify each workflow has required files
run: |
status=0
for dir in workflows/*/; do
name=$(basename "$dir")
missing=""
[ ! -f "${dir}main.cwl" ] && missing="${missing} main.cwl"
[ ! -f "${dir}agent.yaml" ] && missing="${missing} agent.yaml"
[ ! -d "${dir}examples" ] && missing="${missing} examples/"
[ ! -d "${dir}tests" ] && missing="${missing} tests/"
if [ -n "$missing" ]; then
echo "FAIL: ${name} — missing:${missing}"
status=1
else
echo "OK: ${name}"
fi
done
exit $status
validate-ro-crate:
name: Validate RO-Crate
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install dependencies
run: pip install rocrate
- name: Validate Workflow Run RO-Crates
run: |
python3 << 'PYEOF'
import glob, json, sys
crate_files = sorted(
glob.glob('workflows/*/tests/ro-crate/ro-crate-metadata.json') +
glob.glob('workflows/*/tests/ro-crate/*/ro-crate-metadata.json')
)
if not crate_files:
print("ERROR: No RO-Crate files found")
sys.exit(1)
errors = []
for path in crate_files:
parts = path.split('/')
pipeline = parts[1]
# Include test name for per-test subdirectories
if len(parts) > 5:
pipeline = f"{parts[1]}/{parts[4]}"
try:
with open(path) as f:
data = json.load(f)
graph = data.get('@graph', [])
if not graph:
errors.append(f'FAIL: {pipeline}: empty @graph')
continue
types = {e.get('@type') if isinstance(e.get('@type'), str) else str(e.get('@type', '?')) for e in graph}
has_dataset = any('Dataset' in str(e.get('@type', '')) for e in graph)
has_workflow = any('ComputationalWorkflow' in str(e.get('@type', '')) for e in graph)
has_action = any('CreateAction' in str(e.get('@type', '')) for e in graph)
if not has_dataset:
errors.append(f'FAIL: {pipeline}: missing Dataset entity')
elif not has_workflow:
errors.append(f'FAIL: {pipeline}: missing ComputationalWorkflow entity')
elif not has_action:
errors.append(f'FAIL: {pipeline}: missing CreateAction entity')
else:
print(f'OK: {pipeline} ({len(graph)} entities)')
except Exception as e:
errors.append(f'FAIL: {pipeline}: {e}')
print(f'\n{len(crate_files)} RO-Crate files validated, {len(errors)} errors')
for e in errors:
print(e)
if errors:
sys.exit(1)
PYEOF
validate-references:
name: Validate reference URLs
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install dependencies
run: pip install pyyaml
- name: Check Ensembl URLs
run: |
python3 << 'PYEOF'
import yaml, subprocess, sys
with open('references/genomes.yaml') as f:
genomes = yaml.safe_load(f)
errors = []
for g in genomes:
name = f"{g['organism']} ({g['assembly']})"
for key in ['fasta', 'gtf']:
url = g.get('sources', {}).get('ensembl', {}).get(key)
if not url:
continue
result = subprocess.run(
['curl', '-skI', '-o', '/dev/null', '-w', '%{http_code}', '--max-time', '30', url],
capture_output=True, text=True, timeout=60
)
code = result.stdout.strip()
if code in ('200', '226', '301', '302'):
print(f'OK: {name} {key} ({code})')
else:
errors.append(f'FAIL: {name} {key} -> HTTP {code}')
print(f'FAIL: {name} {key} -> HTTP {code}')
print(f'\n{len(genomes)} genomes checked, {len(errors)} errors')
if errors:
for e in errors:
print(e)
sys.exit(1)
PYEOF
- name: Check iGenomes S3 paths
run: |
python3 << 'PYEOF'
import yaml, subprocess, sys
with open('references/genomes.yaml') as f:
genomes = yaml.safe_load(f)
errors = []
for g in genomes:
name = f"{g['organism']} ({g['assembly']})"
igenomes = g.get('sources', {}).get('igenomes', {})
base = igenomes.get('base')
if not base:
continue
result = subprocess.run(
['aws', 's3', 'ls', '--no-sign-request', base],
capture_output=True, text=True, timeout=30
)
if result.returncode == 0:
print(f'OK: {name} iGenomes base')
else:
errors.append(f'FAIL: {name} iGenomes base not accessible')
print(f'FAIL: {name} iGenomes base not accessible')
if errors:
for e in errors:
print(e)
sys.exit(1)
else:
print(f'\nAll iGenomes paths verified')
PYEOF