Skip to content

Commit cb7d5a4

Browse files
committed
checkpoint
1 parent c736b8c commit cb7d5a4

File tree

1 file changed

+109
-18
lines changed

1 file changed

+109
-18
lines changed

gget/gget_opentargets.py

Lines changed: 109 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,27 @@
99

1010
logger = set_up_logger() # export GGET_LOGLEVEL=DEBUG
1111

12-
QUERY_STRING_DISEASES = ""
12+
QUERY_STRING_DISEASES = """
13+
query target($ensemblId: String!) {
14+
target(ensemblId: $ensemblId) {
15+
associatedDiseases {
16+
rows {
17+
score
18+
disease {
19+
id
20+
name
21+
description
22+
}
23+
}
24+
}
25+
}
26+
}
27+
"""
1328

1429
QUERY_STRING_DRUGS = """
1530
query target($ensemblId: String!) {
1631
target(ensemblId: $ensemblId) {
17-
id
18-
approvedSymbol
1932
drugAndClinicalCandidates {
20-
count
2133
rows {
2234
drug {
2335
id
@@ -47,15 +59,42 @@
4759
}
4860
"""
4961

50-
QUERY_STRING_TRACTABILITY = ""
62+
QUERY_STRING_TRACTABILITY = """
63+
query target($ensemblId: String!) {
64+
target(ensemblId: $ensemblId) {
65+
tractability {
66+
modality
67+
label
68+
value
69+
}
70+
}
71+
}
72+
"""
5173
QUERY_STRING_PHARMACOGENETICS = ""
5274
QUERY_STRING_EXPRESSION = ""
53-
QUERY_STRING_DEPMAP = ""
75+
QUERY_STRING_DEPMAP = """
76+
query target($ensemblId: String!) {
77+
target(ensemblId: $ensemblId) {
78+
depMapEssentiality {
79+
tissueId
80+
tissueName
81+
screens {
82+
cellLineName
83+
mutation
84+
expression
85+
diseaseFromSource
86+
depmapId
87+
geneEffect
88+
}
89+
}
90+
}
91+
}
92+
"""
5493
QUERY_STRING_INTERACTIONS = ""
5594

5695
RESOURCES = {"diseases", "drugs", "tractability", "pharmacogenetics", "expression", "depmap", "interactions"}
5796

58-
def collapse_singletons(obj):
97+
def _collapse_singletons(obj):
5998
"""
6099
Recursively collapse:
61100
- nested single-element lists
@@ -74,21 +113,25 @@ def flatten(x):
74113
yield el
75114

76115
flat = list(flatten(obj))
116+
flat = [el for el in flat if el is not None]
77117

78118
# if exactly one element → recurse
79119
if len(flat) == 1:
80-
return collapse_singletons(flat[0])
120+
return _collapse_singletons(flat[0])
81121

82122
# otherwise recurse inside but keep structure
83-
return [collapse_singletons(el) for el in flat]
123+
return [_collapse_singletons(el) for el in flat]
84124

85125
# -------------------------
86126
# Case 2: dict
87127
# -------------------------
88128
if isinstance(obj, dict):
89129
# recurse into values
90-
obj = {k: collapse_singletons(v) for k, v in obj.items()}
130+
obj = {k: _collapse_singletons(v) for k, v in obj.items()}
91131

132+
if len(obj) == 0:
133+
return None
134+
92135
# if single key → collapse
93136
if len(obj) == 1:
94137
return next(iter(obj.values()))
@@ -100,6 +143,43 @@ def flatten(x):
100143
# -------------------------
101144
return obj
102145

146+
def _propagate_context(rows, rows_path_and_keep_layer):
147+
"""
148+
Traverse nested structure while copying parent-level scalar fields
149+
into each child element.
150+
"""
151+
for key, keep_layer in rows_path_and_keep_layer:
152+
rows_sub = rows[key]
153+
154+
if keep_layer:
155+
# -------------------------
156+
# extract parent scalar fields
157+
# -------------------------
158+
parent_fields = {
159+
k: v for k, v in rows.items()
160+
if k != key and not isinstance(v, (list, dict))
161+
}
162+
163+
# -------------------------
164+
# propagate into children
165+
# -------------------------
166+
if isinstance(rows_sub, list):
167+
new_rows = []
168+
for el in rows_sub:
169+
if isinstance(el, dict):
170+
new_rows.append({**parent_fields, **el})
171+
else:
172+
new_rows.append(el)
173+
rows_sub = new_rows
174+
175+
elif isinstance(rows_sub, dict):
176+
rows_sub = {**parent_fields, **rows_sub}
177+
178+
# move down
179+
rows = rows_sub
180+
181+
return rows
182+
103183
def opentargets(
104184
ensembl_id,
105185
resource="diseases",
@@ -133,19 +213,25 @@ def opentargets(
133213
"""
134214

135215
if resource == "diseases":
136-
raise NotImplementedError("The 'diseases' resource is currently not supported. Please check back in a future update.")
216+
query_string = QUERY_STRING_DISEASES
217+
rows_path_and_keep_layer = [("associatedDiseases", False), ("rows", False), ("disease", True)]
137218
elif resource == "drugs":
138219
query_string = QUERY_STRING_DRUGS
139-
rows_path = ["drugAndClinicalCandidates", "rows"]
220+
rows_path_and_keep_layer = [("drugAndClinicalCandidates", False), ("rows", False)]
140221
elif resource == "tractability":
141-
raise NotImplementedError("The 'tractability' resource is currently not supported. Please check back in a future update.")
222+
query_string = QUERY_STRING_TRACTABILITY
223+
rows_path_and_keep_layer = [("tractability", False)]
142224
elif resource == "pharmacogenetics":
225+
query_string = QUERY_STRING_PHARMACOGENETICS
143226
raise NotImplementedError("The 'pharmacogenetics' resource is currently not supported. Please check back in a future update.")
144227
elif resource == "expression":
228+
query_string = QUERY_STRING_EXPRESSION
145229
raise NotImplementedError("The 'expression' resource is currently not supported. Please check back in a future update.")
146230
elif resource == "depmap":
147-
raise NotImplementedError("The 'depmap' resource is currently not supported. Please check back in a future update.")
231+
query_string = QUERY_STRING_DEPMAP
232+
rows_path_and_keep_layer = [("depMapEssentiality", False), ("screens", True)]
148233
elif resource == "interactions":
234+
query_string = QUERY_STRING_INTERACTIONS
149235
raise NotImplementedError("The 'interactions' resource is currently not supported. Please check back in a future update.")
150236
else:
151237
raise ValueError(f"'resource' argument specified as {resource}. Expected one of: {', '.join(RESOURCES)}")
@@ -172,20 +258,25 @@ def opentargets(
172258
# if json:
173259
# return api_response
174260

175-
rows = api_response["data"]["target"]
261+
api_target = api_response["data"]["target"]
176262

177-
for i in range(len(rows_path)):
178-
rows = rows[rows_path[i]]
263+
rows = _propagate_context(
264+
api_target,
265+
rows_path_and_keep_layer
266+
)
179267

180268
# ---------------------------
181269
# If JSON → return normalized JSON
182270
# ---------------------------
183271
df = pd.json_normalize(rows)
272+
df = df.drop_duplicates()
273+
df = df.dropna(axis=1, how="all") # drop any all-NaN columns
274+
df = df.dropna(axis=0, how="all") # drop any all-NaN rows
184275

185276
if limit is not None:
186277
df = df.head(limit)
187278

188-
df = df.map(collapse_singletons) # drug.mechanismsOfAction.rows --> drug.mechanismsOfAction.mechanismOfAction
279+
df = df.map(_collapse_singletons)
189280

190281
if wrap_text:
191282
for col in df.columns:

0 commit comments

Comments
 (0)