Skip to content

Commit 4ef9532

Browse files
committed
Switch GWAS download to FTP zip and re-enable GWAS in workflow
1 parent 05af088 commit 4ef9532

2 files changed

Lines changed: 9 additions & 8 deletions

File tree

.github/workflows/run_generate_combined_gene_table.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@ name: Generate Combined Gene Table
22

33
on:
44
workflow_dispatch:
5-
schedule:
6-
- cron: '0 2 * * 0,3' # Twice weekly: Sundays and Wednesdays at 2 AM UTC
75

86
jobs:
97
generate-gene-table:
@@ -94,7 +92,7 @@ jobs:
9492
OMIM_KEY: ${{ secrets.OMIM_KEY }}
9593
HADOOP_CONF_DIR: /tmp/hadoop-conf
9694
run: |
97-
python3 annotation_utils/generate_combined_gene_table.py --skip-gwas
95+
python3 annotation_utils/generate_combined_gene_table.py
9896
9997
- name: Add phenotype summary using AI
10098
env:

annotation_utils/get_gwas_catalog.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,28 @@
11
# download the GWAS catalog and parse gene disease relationships for the subset of records where the MONDO term is a rare disease term
22

3+
import io
34
import pandas as pd
45
import requests
56
import os
7+
import zipfile
68

79
from annotation_utils.cache_utils import cache_data_table
810
from annotation_utils.get_mondo_ontology import get_mondo_ontology
911

10-
GWAS_CATALOG_URL = "https://www.ebi.ac.uk/gwas/api/search/downloads/alternative"
12+
GWAS_CATALOG_URL = "https://ftp.ebi.ac.uk/pub/databases/gwas/releases/latest/gwas-catalog-associations_ontology-annotated-full.zip"
1113

1214
@cache_data_table
1315
def _download_gwas_catalog():
14-
"""
15-
Download the GWAS catalog and return it as a pandas DataFrame
16-
"""
16+
"""Download the GWAS catalog and return it as a pandas DataFrame."""
1717
import time as _time
1818

1919
for attempt in range(1, 4):
2020
try:
2121
print(f"Downloading GWAS catalog (attempt {attempt})")
22-
return pd.read_table(GWAS_CATALOG_URL)
22+
r = requests.get(GWAS_CATALOG_URL, timeout=120)
23+
r.raise_for_status()
24+
with zipfile.ZipFile(io.BytesIO(r.content)) as zf:
25+
return pd.read_table(zf.open(zf.namelist()[0]))
2326
except Exception as e:
2427
if attempt < 3:
2528
print(f" Failed: {e}. Retrying in 30 seconds...")

0 commit comments

Comments
 (0)