99
1010logger = set_up_logger () # export GGET_LOGLEVEL=DEBUG
1111
12- QUERY_STRING_DISEASES = ""
12+ QUERY_STRING_DISEASES = """
13+ query target($ensemblId: String!) {
14+ target(ensemblId: $ensemblId) {
15+ associatedDiseases {
16+ rows {
17+ score
18+ disease {
19+ id
20+ name
21+ description
22+ }
23+ }
24+ }
25+ }
26+ }
27+ """
1328
1429QUERY_STRING_DRUGS = """
1530query target($ensemblId: String!) {
1631 target(ensemblId: $ensemblId) {
17- id
18- approvedSymbol
1932 drugAndClinicalCandidates {
20- count
2133 rows {
2234 drug {
2335 id
4759}
4860"""
4961
50- QUERY_STRING_TRACTABILITY = ""
62+ QUERY_STRING_TRACTABILITY = """
63+ query target($ensemblId: String!) {
64+ target(ensemblId: $ensemblId) {
65+ tractability {
66+ modality
67+ label
68+ value
69+ }
70+ }
71+ }
72+ """
5173QUERY_STRING_PHARMACOGENETICS = ""
5274QUERY_STRING_EXPRESSION = ""
53- QUERY_STRING_DEPMAP = ""
75+ QUERY_STRING_DEPMAP = """
76+ query target($ensemblId: String!) {
77+ target(ensemblId: $ensemblId) {
78+ depMapEssentiality {
79+ tissueId
80+ tissueName
81+ screens {
82+ cellLineName
83+ mutation
84+ expression
85+ diseaseFromSource
86+ depmapId
87+ geneEffect
88+ }
89+ }
90+ }
91+ }
92+ """
5493QUERY_STRING_INTERACTIONS = ""
5594
5695RESOURCES = {"diseases" , "drugs" , "tractability" , "pharmacogenetics" , "expression" , "depmap" , "interactions" }
5796
58- def collapse_singletons (obj ):
97+ def _collapse_singletons (obj ):
5998 """
6099 Recursively collapse:
61100 - nested single-element lists
@@ -74,21 +113,25 @@ def flatten(x):
74113 yield el
75114
76115 flat = list (flatten (obj ))
116+ flat = [el for el in flat if el is not None ]
77117
78118 # if exactly one element → recurse
79119 if len (flat ) == 1 :
80- return collapse_singletons (flat [0 ])
120+ return _collapse_singletons (flat [0 ])
81121
82122 # otherwise recurse inside but keep structure
83- return [collapse_singletons (el ) for el in flat ]
123+ return [_collapse_singletons (el ) for el in flat ]
84124
85125 # -------------------------
86126 # Case 2: dict
87127 # -------------------------
88128 if isinstance (obj , dict ):
89129 # recurse into values
90- obj = {k : collapse_singletons (v ) for k , v in obj .items ()}
130+ obj = {k : _collapse_singletons (v ) for k , v in obj .items ()}
91131
132+ if len (obj ) == 0 :
133+ return None
134+
92135 # if single key → collapse
93136 if len (obj ) == 1 :
94137 return next (iter (obj .values ()))
@@ -100,6 +143,43 @@ def flatten(x):
100143 # -------------------------
101144 return obj
102145
146+ def _propagate_context (rows , rows_path_and_keep_layer ):
147+ """
148+ Traverse nested structure while copying parent-level scalar fields
149+ into each child element.
150+ """
151+ for key , keep_layer in rows_path_and_keep_layer :
152+ rows_sub = rows [key ]
153+
154+ if keep_layer :
155+ # -------------------------
156+ # extract parent scalar fields
157+ # -------------------------
158+ parent_fields = {
159+ k : v for k , v in rows .items ()
160+ if k != key and not isinstance (v , (list , dict ))
161+ }
162+
163+ # -------------------------
164+ # propagate into children
165+ # -------------------------
166+ if isinstance (rows_sub , list ):
167+ new_rows = []
168+ for el in rows_sub :
169+ if isinstance (el , dict ):
170+ new_rows .append ({** parent_fields , ** el })
171+ else :
172+ new_rows .append (el )
173+ rows_sub = new_rows
174+
175+ elif isinstance (rows_sub , dict ):
176+ rows_sub = {** parent_fields , ** rows_sub }
177+
178+ # move down
179+ rows = rows_sub
180+
181+ return rows
182+
103183def opentargets (
104184 ensembl_id ,
105185 resource = "diseases" ,
@@ -133,19 +213,25 @@ def opentargets(
133213 """
134214
135215 if resource == "diseases" :
136- raise NotImplementedError ("The 'diseases' resource is currently not supported. Please check back in a future update." )
216+ query_string = QUERY_STRING_DISEASES
217+ rows_path_and_keep_layer = [("associatedDiseases" , False ), ("rows" , False ), ("disease" , True )]
137218 elif resource == "drugs" :
138219 query_string = QUERY_STRING_DRUGS
139- rows_path = ["drugAndClinicalCandidates" , "rows" ]
220+ rows_path_and_keep_layer = [( "drugAndClinicalCandidates" , False ), ( "rows" , False ) ]
140221 elif resource == "tractability" :
141- raise NotImplementedError ("The 'tractability' resource is currently not supported. Please check back in a future update." )
222+ query_string = QUERY_STRING_TRACTABILITY
223+ rows_path_and_keep_layer = [("tractability" , False )]
142224 elif resource == "pharmacogenetics" :
225+ query_string = QUERY_STRING_PHARMACOGENETICS
143226 raise NotImplementedError ("The 'pharmacogenetics' resource is currently not supported. Please check back in a future update." )
144227 elif resource == "expression" :
228+ query_string = QUERY_STRING_EXPRESSION
145229 raise NotImplementedError ("The 'expression' resource is currently not supported. Please check back in a future update." )
146230 elif resource == "depmap" :
147- raise NotImplementedError ("The 'depmap' resource is currently not supported. Please check back in a future update." )
231+ query_string = QUERY_STRING_DEPMAP
232+ rows_path_and_keep_layer = [("depMapEssentiality" , False ), ("screens" , True )]
148233 elif resource == "interactions" :
234+ query_string = QUERY_STRING_INTERACTIONS
149235 raise NotImplementedError ("The 'interactions' resource is currently not supported. Please check back in a future update." )
150236 else :
151237 raise ValueError (f"'resource' argument specified as { resource } . Expected one of: { ', ' .join (RESOURCES )} " )
@@ -172,20 +258,25 @@ def opentargets(
172258 # if json:
173259 # return api_response
174260
175- rows = api_response ["data" ]["target" ]
261+ api_target = api_response ["data" ]["target" ]
176262
177- for i in range (len (rows_path )):
178- rows = rows [rows_path [i ]]
263+ rows = _propagate_context (
264+ api_target ,
265+ rows_path_and_keep_layer
266+ )
179267
180268 # ---------------------------
181269 # If JSON → return normalized JSON
182270 # ---------------------------
183271 df = pd .json_normalize (rows )
272+ df = df .drop_duplicates ()
273+ df = df .dropna (axis = 1 , how = "all" ) # drop any all-NaN columns
274+ df = df .dropna (axis = 0 , how = "all" ) # drop any all-NaN rows
184275
185276 if limit is not None :
186277 df = df .head (limit )
187278
188- df = df .map (collapse_singletons ) # drug.mechanismsOfAction.rows --> drug.mechanismsOfAction.mechanismOfAction
279+ df = df .map (_collapse_singletons )
189280
190281 if wrap_text :
191282 for col in df .columns :
0 commit comments