forked from hhx465453939/OpenClaw-Medical-Skills
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_gwas_tools.py
More file actions
275 lines (232 loc) · 9.88 KB
/
test_gwas_tools.py
File metadata and controls
275 lines (232 loc) · 9.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
"""
Phase 2: Tool Testing - Verify GWAS tools before documentation
CRITICAL: Test ALL tools BEFORE writing skill documentation to verify:
1. Parameter names (don't assume!)
2. Response formats (data structure)
3. Real example queries work
NOTE: Some tools have oneOf validation bugs - use validate=False when needed
"""
from tooluniverse.tools import (
gwas_search_associations,
gwas_get_associations_for_trait,
gwas_search_snps,
gwas_get_snp_by_id,
gwas_get_study_by_id,
gwas_search_studies,
OpenTargets_get_variant_info,
OpenTargets_get_variant_credible_sets,
OpenTargets_search_gwas_studies_by_disease,
OpenTargets_get_gwas_study,
OpenTargets_get_study_credible_sets,
)
import json
def test_gwas_catalog_tools():
"""Test GWAS Catalog tools"""
print("\n=== Testing GWAS Catalog Tools ===\n")
# Test 1: gwas_get_associations_for_trait (sorted by p-value)
print("1. Testing gwas_get_associations_for_trait with 'type 2 diabetes'...")
try:
result = gwas_get_associations_for_trait(
disease_trait="type 2 diabetes",
size=5,
validate=False # Skip validation due to oneOf bug
)
data = result['data']
print(f" ✓ Returned {len(data)} associations")
if data:
assoc = data[0]
print(f" - Top association: p={assoc.get('p_value')}, genes={assoc.get('mapped_genes', [])[:3]}")
print(f" - Available fields: {list(assoc.keys())[:10]}")
except Exception as e:
print(f" ✗ FAILED: {e}")
# Test 2: gwas_get_snp_by_id
print("\n2. Testing gwas_get_snp_by_id for rs7903146 (TCF7L2, T2D)...")
try:
result = gwas_get_snp_by_id(rs_id="rs7903146")
print(f" ✓ SNP data retrieved: {result.get('rs_id')}")
print(f" - MAF: {result.get('maf')}, consequence: {result.get('most_severe_consequence')}")
print(f" - Mapped genes: {result.get('mapped_genes')}")
except Exception as e:
print(f" ✗ FAILED: {e}")
# Test 3: gwas_search_snps (gene mapping)
print("\n3. Testing gwas_search_snps for TCF7L2...")
try:
result = gwas_search_snps(
mapped_gene="TCF7L2",
size=5,
validate=False
)
data = result['data']
print(f" ✓ Returned {len(data)} SNPs")
if data:
snp = data[0]
print(f" - Sample SNP: {snp.get('rs_id')}, consequence={snp.get('most_severe_consequence')}")
except Exception as e:
print(f" ✗ FAILED: {e}")
# Test 4: gwas_get_study_by_id
print("\n4. Testing gwas_get_study_by_id for GCST000392 (T1D)...")
try:
result = gwas_get_study_by_id(study_id="GCST000392")
print(f" ✓ Study retrieved: {result.get('disease_trait')}")
print(f" - Sample size: {result.get('initial_sample_size')}")
print(f" - Has summary stats: {result.get('full_summary_stats_available')}")
except Exception as e:
print(f" ✗ FAILED: {e}")
def test_opentargets_tools():
"""Test Open Targets Genetics tools"""
print("\n\n=== Testing Open Targets Genetics Tools ===\n")
# Test 1: OpenTargets_get_variant_info
print("1. Testing OpenTargets_get_variant_info...")
try:
result = OpenTargets_get_variant_info(
variantId="10_112998590_C_T" # rs7903146 (TCF7L2, T2D)
)
variant = result['data']['variant']
print(f" ✓ Variant: {variant['id']}, rsIDs={variant.get('rsIds')}")
print(f" - Consequence: {variant.get('mostSevereConsequence', {}).get('label')}")
print(f" - Allele frequencies: {len(variant.get('alleleFrequencies', []))} populations")
except Exception as e:
print(f" ✗ FAILED: {e}")
# Test 2: OpenTargets_get_variant_credible_sets
print("\n2. Testing OpenTargets_get_variant_credible_sets...")
try:
result = OpenTargets_get_variant_credible_sets(
variantId="10_112998590_C_T",
size=3
)
credible_sets = result['data']['variant']['credibleSets']
count = credible_sets.get('count', 0)
print(f" ✓ Found {count} credible sets")
if credible_sets.get('rows'):
cs = credible_sets['rows'][0]
print(f" - Study: {cs['study'].get('traitFromSource')}")
print(f" - L2G predictions: {len(cs.get('l2GPredictions', {}).get('rows', []))} genes")
except Exception as e:
print(f" ✗ FAILED: {e}")
# Test 3: OpenTargets_search_gwas_studies_by_disease
print("\n3. Testing OpenTargets_search_gwas_studies_by_disease...")
try:
result = OpenTargets_search_gwas_studies_by_disease(
diseaseIds=["MONDO_0005148"], # Type 2 diabetes
size=3
)
studies = result['data']['studies']
count = studies.get('count', 0)
print(f" ✓ Found {count} studies for T2D")
if studies.get('rows'):
study = studies['rows'][0]
print(f" - Sample: {study['id']}, n={study.get('nSamples')}")
except Exception as e:
print(f" ✗ FAILED: {e}")
# Test 4: OpenTargets_get_gwas_study
print("\n4. Testing OpenTargets_get_gwas_study...")
try:
result = OpenTargets_get_gwas_study(studyId="GCST000392")
study = result['data']['study']
print(f" ✓ Study: {study['id']}, trait={study.get('traitFromSource')}")
print(f" - N={study.get('nSamples')}, has sumstats={study.get('hasSumstats')}")
except Exception as e:
print(f" ✗ FAILED: {e}")
# Test 5: OpenTargets_get_study_credible_sets
print("\n5. Testing OpenTargets_get_study_credible_sets...")
try:
result = OpenTargets_get_study_credible_sets(
studyIds=["GCST000392"],
size=5
)
credible_sets = result['data']['credibleSets']
count = credible_sets.get('count', 0)
print(f" ✓ Found {count} credible sets for GCST000392")
if credible_sets.get('rows'):
cs = credible_sets['rows'][0]
print(f" - Lead variant: {cs.get('variant', {}).get('id')}")
print(f" - L2G genes: {[t['target']['approvedSymbol'] for t in cs.get('l2GPredictions', {}).get('rows', [])[:3]]}")
except Exception as e:
print(f" ✗ FAILED: {e}")
def test_trait_to_gene_workflow():
"""Test complete trait-to-gene discovery workflow"""
print("\n\n=== Testing Complete Trait-to-Gene Workflow ===\n")
trait = "type 2 diabetes"
print(f"Discovering genes for trait: {trait}\n")
# Step 1: Search associations
print("Step 1: Searching GWAS associations...")
try:
assoc_result = gwas_get_associations_for_trait(
disease_trait=trait,
size=50,
validate=False # Skip validation due to oneOf bug
)
associations = assoc_result['data']
print(f" ✓ Found {len(associations)} associations")
# Step 2: Extract mapped genes
print("\nStep 2: Extracting mapped genes from associations...")
gene_to_snps = {}
gene_to_min_p = {}
for assoc in associations:
p_value = assoc.get('p_value')
snps = assoc.get('snp_allele', [])
genes = assoc.get('mapped_genes', [])
if p_value and p_value < 5e-8: # Genome-wide significance
for gene in genes:
if gene not in gene_to_snps:
gene_to_snps[gene] = []
gene_to_min_p[gene] = p_value
gene_to_snps[gene].extend([s.get('rs_id') for s in snps if s.get('rs_id')])
gene_to_min_p[gene] = min(gene_to_min_p[gene], p_value)
print(f" ✓ Found {len(gene_to_snps)} genes with genome-wide significant associations")
# Step 3: Rank by significance
print("\nStep 3: Ranking genes by significance...")
ranked_genes = sorted(gene_to_min_p.items(), key=lambda x: x[1])[:10]
print("\n Top 10 genes associated with type 2 diabetes:")
for i, (gene, p_val) in enumerate(ranked_genes, 1):
snp_count = len(set(gene_to_snps[gene]))
print(f" {i:2d}. {gene:10s} p={p_val:.2e} ({snp_count} SNPs)")
print("\n✓ Workflow complete!")
except Exception as e:
print(f" ✗ FAILED: {e}")
def verify_tool_availability():
"""Verify all required tools are loaded"""
from tooluniverse import ToolUniverse
tu = ToolUniverse()
tu.load_tools()
print("\n=== Verifying Tool Availability ===\n")
required_tools = [
# GWAS Catalog
"gwas_search_associations",
"gwas_get_associations_for_trait",
"gwas_search_snps",
"gwas_get_snp_by_id",
"gwas_get_study_by_id",
"gwas_search_studies",
# Open Targets
"OpenTargets_get_variant_info",
"OpenTargets_get_variant_credible_sets",
"OpenTargets_search_gwas_studies_by_disease",
"OpenTargets_get_gwas_study",
"OpenTargets_get_study_credible_sets",
]
all_available = True
for tool in required_tools:
available = tool in tu.all_tool_dict
status = "✓" if available else "✗"
print(f"{status} {tool}")
if not available:
all_available = False
print(f"\n{'✓ All tools available!' if all_available else '✗ Some tools missing!'}")
return all_available
if __name__ == "__main__":
print("=" * 70)
print("GWAS Trait-to-Gene Discovery: Tool Testing")
print("=" * 70)
# Phase 1: Verify tools are loaded
if not verify_tool_availability():
print("\n✗ CRITICAL: Not all tools available. Cannot proceed.")
exit(1)
# Phase 2: Test individual tools
test_gwas_catalog_tools()
test_opentargets_tools()
# Phase 3: Test complete workflow
test_trait_to_gene_workflow()
print("\n" + "=" * 70)
print("Testing complete! Proceed to documentation phase.")
print("=" * 70)