11import json as json_
2+ import textwrap
23import pandas as pd
34import requests
45import json
56
67from .constants import OPENTARGETS_GRAPHQL_API
78from .utils import set_up_logger , wrap_cols_func , graphql_query , json_list_to_df
89
9- logger = set_up_logger ()
10+ logger = set_up_logger () # export GGET_LOGLEVEL=DEBUG
1011
1112QUERY_STRING_DISEASES = ""
1213
3132 synonyms
3233 tradeNames
3334 maximumClinicalStage
34- }
35- diseases {
36- disease {
37- id
38- name
35+ indications {
36+ rows {
37+ disease {
38+ id
39+ name
40+ }
41+ }
3942 }
4043 }
4144 }
5053QUERY_STRING_DEPMAP = ""
5154QUERY_STRING_INTERACTIONS = ""
5255
53- RESOURCES_TO_QUERY = {"diseases" : QUERY_STRING_DISEASES , "drugs" : QUERY_STRING_DRUGS , "tractability" : QUERY_STRING_TRACTABILITY , "pharmacogenetics" : QUERY_STRING_PHARMACOGENETICS , "expression" : QUERY_STRING_EXPRESSION , "depmap" : QUERY_STRING_DEPMAP , "interactions" : QUERY_STRING_INTERACTIONS }
54- RESOURCES = set (RESOURCES_TO_QUERY .keys ())
56+ RESOURCES = {"diseases" , "drugs" , "tractability" , "pharmacogenetics" , "expression" , "depmap" , "interactions" }
57+
58+ def collapse_singletons (obj ):
59+ """
60+ Recursively collapse:
61+ - nested single-element lists
62+ - single dicts with one key → value
63+ """
64+ # -------------------------
65+ # Case 1: list
66+ # -------------------------
67+ if isinstance (obj , list ):
68+ # flatten nested lists
69+ def flatten (x ):
70+ for el in x :
71+ if isinstance (el , list ):
72+ yield from flatten (el )
73+ else :
74+ yield el
75+
76+ flat = list (flatten (obj ))
77+
78+ # if exactly one element → recurse
79+ if len (flat ) == 1 :
80+ return collapse_singletons (flat [0 ])
81+
82+ # otherwise recurse inside but keep structure
83+ return [collapse_singletons (el ) for el in flat ]
84+
85+ # -------------------------
86+ # Case 2: dict
87+ # -------------------------
88+ if isinstance (obj , dict ):
89+ # recurse into values
90+ obj = {k : collapse_singletons (v ) for k , v in obj .items ()}
91+
92+ # if single key → collapse
93+ if len (obj ) == 1 :
94+ return next (iter (obj .values ()))
95+
96+ return obj
97+
98+ # -------------------------
99+ # Base case
100+ # -------------------------
101+ return obj
55102
56103def opentargets (
57104 ensembl_id ,
58105 resource = "diseases" ,
59106 limit = None ,
60107 verbose = True ,
61108 wrap_text = False ,
62- filters = None ,
63- filter_mode = "and" ,
64109 json = False ,
65110):
66111 """
@@ -81,63 +126,78 @@ def opentargets(
81126 Note: Not compatible with the 'tractability' and 'depmap' resources.
82127 - verbose Print progress messages (default: True).
83128 - wrap_text If True, displays data frame with wrapped text for easy reading. Default: False.
84- - filters Filters to apply to the data. Supported filters by resource:
85- "diseases": None
86- "drugs": disease_id (e.g. "EFO_0000274")
87- "tractability": None
88- "pharmacogenetics": drug_id (e.g. "CHEMBL535")
89- "expression": tissue_id (e.g. "UBERON_0002245"), anatomical_system (e.g. "nervous system"), organ (e.g. "brain")
90- "depmap": tissue_id (e.g. "UBERON_0002245")
91- "interactions": protein_a_id (e.g. "ENSP00000304915"), protein_b_id (e.g. "ENSP00000379111"), gene_b_id (e.g. "ENSG00000077238")
92- - filter_mode For resources that support multiple types of filters, this argument specifies how to combine them.
93129 - json If True, returns results in JSON format instead of as a Data Frame. Default: False.
94130
95131
96132 Returns requested information in DataFrame format.
97133 """
98134
99- # Wrap everything into a list
100- if filters is not None :
101- filters = {k : v if isinstance (v , list ) else [v ] for k , v in filters .items ()}
102-
103- query_string = RESOURCES_TO_QUERY .get (resource )
104- if query_string is None :
135+ if resource == "diseases" :
136+ raise NotImplementedError ("The 'diseases' resource is currently not supported. Please check back in a future update." )
137+ elif resource == "drugs" :
138+ query_string = QUERY_STRING_DRUGS
139+ rows_path = ["drugAndClinicalCandidates" , "rows" ]
140+ elif resource == "tractability" :
141+ raise NotImplementedError ("The 'tractability' resource is currently not supported. Please check back in a future update." )
142+ elif resource == "pharmacogenetics" :
143+ raise NotImplementedError ("The 'pharmacogenetics' resource is currently not supported. Please check back in a future update." )
144+ elif resource == "expression" :
145+ raise NotImplementedError ("The 'expression' resource is currently not supported. Please check back in a future update." )
146+ elif resource == "depmap" :
147+ raise NotImplementedError ("The 'depmap' resource is currently not supported. Please check back in a future update." )
148+ elif resource == "interactions" :
149+ raise NotImplementedError ("The 'interactions' resource is currently not supported. Please check back in a future update." )
150+ else :
105151 raise ValueError (f"'resource' argument specified as { resource } . Expected one of: { ', ' .join (RESOURCES )} " )
106-
107- # if resource == "diseases":
108- # raise NotImplementedError("The 'diseases' resource is currently not supported. Please check back in a future update.")
109- # elif resource == "drugs":
110- # query_string = QUERY_STRING_DRUGS
111- # elif resource == "tractability":
112- # raise NotImplementedError("The 'tractability' resource is currently not supported. Please check back in a future update.")
113- # elif resource == "pharmacogenetics":
114- # raise NotImplementedError("The 'pharmacogenetics' resource is currently not supported. Please check back in a future update.")
115- # elif resource == "expression":
116- # raise NotImplementedError("The 'expression' resource is currently not supported. Please check back in a future update.")
117- # elif resource == "depmap":
118- # raise NotImplementedError("The 'depmap' resource is currently not supported. Please check back in a future update.")
119- # elif resource == "interactions":
120- # raise NotImplementedError("The 'interactions' resource is currently not supported. Please check back in a future update.")
121- # else:
122- # raise ValueError(f"'resource' argument specified as {resource}. Expected one of: {', '.join(RESOURCES)}")
123-
124- # Set variables object of arguments to be passed to endpoint
152+
125153 variables = {"ensemblId" : ensembl_id }
126154
127- # Perform POST request and check status code of response
128- r = requests .post (OPENTARGETS_GRAPHQL_API , json = {"query" : query_string , "variables" : variables })
129- print (r .status_code )
155+ if verbose :
156+ logger .info (f"Querying OpenTargets for { resource } associated with { ensembl_id } ..." )
157+ logger .debug (f"GraphQL query string:\n { query_string } \n \n With variables:\n { variables } " )
158+
159+ r = requests .post (
160+ OPENTARGETS_GRAPHQL_API ,
161+ json = {"query" : query_string , "variables" : variables },
162+ )
130163
131- # Transform API response from JSON into Python dictionary and print in console
132164 api_response = json_ .loads (r .text )
133- print (api_response )
134-
135- # df['score'].apply(lambda x: round(x, 10) if isinstance(x, float) else x)[0]
136165
166+ if "errors" in api_response :
167+ raise ValueError (api_response ["errors" ])
168+
169+ if verbose :
170+ logger .debug (f"Raw API response:\n { json_ .dumps (api_response , indent = 2 )} " )
171+
137172 # if json:
138- # return json_.loads(df.to_json(orient="records", force_ascii=False))
139- # else:
140- # return df
173+ # return api_response
174+
175+ rows = api_response ["data" ]["target" ]
176+
177+ for i in range (len (rows_path )):
178+ rows = rows [rows_path [i ]]
179+
180+ # ---------------------------
181+ # If JSON → return normalized JSON
182+ # ---------------------------
183+ df = pd .json_normalize (rows )
184+
185+ if limit is not None :
186+ df = df .head (limit )
187+
188+ df = df .map (collapse_singletons ) # drug.mechanismsOfAction.rows --> drug.mechanismsOfAction.mechanismOfAction
189+
190+ if wrap_text :
191+ for col in df .columns :
192+ if df [col ].dtype == object :
193+ df [col ] = df [col ].apply (
194+ lambda x : textwrap .fill (str (x ), width = 40 ) if isinstance (x , str ) else x
195+ )
196+
197+ if json :
198+ return json_ .loads (df .to_json (orient = "records" , force_ascii = False ))
199+
200+ return df
141201
142202
143203
0 commit comments