Skip to content

Commit c736b8c

Browse files
committed
drugs working
1 parent 2819132 commit c736b8c

File tree

1 file changed

+114
-54
lines changed

1 file changed

+114
-54
lines changed

gget/gget_opentargets.py

Lines changed: 114 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import json as json_
2+
import textwrap
23
import pandas as pd
34
import requests
45
import json
56

67
from .constants import OPENTARGETS_GRAPHQL_API
78
from .utils import set_up_logger, wrap_cols_func, graphql_query, json_list_to_df
89

9-
logger = set_up_logger()
10+
logger = set_up_logger() # export GGET_LOGLEVEL=DEBUG
1011

1112
QUERY_STRING_DISEASES = ""
1213

@@ -31,11 +32,13 @@
3132
synonyms
3233
tradeNames
3334
maximumClinicalStage
34-
}
35-
diseases {
36-
disease {
37-
id
38-
name
35+
indications {
36+
rows {
37+
disease {
38+
id
39+
name
40+
}
41+
}
3942
}
4043
}
4144
}
@@ -50,17 +53,59 @@
5053
QUERY_STRING_DEPMAP = ""
5154
QUERY_STRING_INTERACTIONS = ""
5255

53-
RESOURCES_TO_QUERY = {"diseases": QUERY_STRING_DISEASES, "drugs": QUERY_STRING_DRUGS, "tractability": QUERY_STRING_TRACTABILITY, "pharmacogenetics": QUERY_STRING_PHARMACOGENETICS, "expression": QUERY_STRING_EXPRESSION, "depmap": QUERY_STRING_DEPMAP, "interactions": QUERY_STRING_INTERACTIONS}
54-
RESOURCES = set(RESOURCES_TO_QUERY.keys())
56+
RESOURCES = {"diseases", "drugs", "tractability", "pharmacogenetics", "expression", "depmap", "interactions"}
57+
58+
def collapse_singletons(obj):
59+
"""
60+
Recursively collapse:
61+
- nested single-element lists
62+
- single dicts with one key → value
63+
"""
64+
# -------------------------
65+
# Case 1: list
66+
# -------------------------
67+
if isinstance(obj, list):
68+
# flatten nested lists
69+
def flatten(x):
70+
for el in x:
71+
if isinstance(el, list):
72+
yield from flatten(el)
73+
else:
74+
yield el
75+
76+
flat = list(flatten(obj))
77+
78+
# if exactly one element → recurse
79+
if len(flat) == 1:
80+
return collapse_singletons(flat[0])
81+
82+
# otherwise recurse inside but keep structure
83+
return [collapse_singletons(el) for el in flat]
84+
85+
# -------------------------
86+
# Case 2: dict
87+
# -------------------------
88+
if isinstance(obj, dict):
89+
# recurse into values
90+
obj = {k: collapse_singletons(v) for k, v in obj.items()}
91+
92+
# if single key → collapse
93+
if len(obj) == 1:
94+
return next(iter(obj.values()))
95+
96+
return obj
97+
98+
# -------------------------
99+
# Base case
100+
# -------------------------
101+
return obj
55102

56103
def opentargets(
57104
ensembl_id,
58105
resource="diseases",
59106
limit=None,
60107
verbose=True,
61108
wrap_text=False,
62-
filters=None,
63-
filter_mode="and",
64109
json=False,
65110
):
66111
"""
@@ -81,63 +126,78 @@ def opentargets(
81126
Note: Not compatible with the 'tractability' and 'depmap' resources.
82127
- verbose Print progress messages (default: True).
83128
- wrap_text If True, displays data frame with wrapped text for easy reading. Default: False.
84-
- filters Filters to apply to the data. Supported filters by resource:
85-
"diseases": None
86-
"drugs": disease_id (e.g. "EFO_0000274")
87-
"tractability": None
88-
"pharmacogenetics": drug_id (e.g. "CHEMBL535")
89-
"expression": tissue_id (e.g. "UBERON_0002245"), anatomical_system (e.g. "nervous system"), organ (e.g. "brain")
90-
"depmap": tissue_id (e.g. "UBERON_0002245")
91-
"interactions": protein_a_id (e.g. "ENSP00000304915"), protein_b_id (e.g. "ENSP00000379111"), gene_b_id (e.g. "ENSG00000077238")
92-
- filter_mode For resources that support multiple types of filters, this argument specifies how to combine them.
93129
- json If True, returns results in JSON format instead of as a Data Frame. Default: False.
94130
95131
96132
Returns requested information in DataFrame format.
97133
"""
98134

99-
# Wrap everything into a list
100-
if filters is not None:
101-
filters = {k: v if isinstance(v, list) else [v] for k, v in filters.items()}
102-
103-
query_string = RESOURCES_TO_QUERY.get(resource)
104-
if query_string is None:
135+
if resource == "diseases":
136+
raise NotImplementedError("The 'diseases' resource is currently not supported. Please check back in a future update.")
137+
elif resource == "drugs":
138+
query_string = QUERY_STRING_DRUGS
139+
rows_path = ["drugAndClinicalCandidates", "rows"]
140+
elif resource == "tractability":
141+
raise NotImplementedError("The 'tractability' resource is currently not supported. Please check back in a future update.")
142+
elif resource == "pharmacogenetics":
143+
raise NotImplementedError("The 'pharmacogenetics' resource is currently not supported. Please check back in a future update.")
144+
elif resource == "expression":
145+
raise NotImplementedError("The 'expression' resource is currently not supported. Please check back in a future update.")
146+
elif resource == "depmap":
147+
raise NotImplementedError("The 'depmap' resource is currently not supported. Please check back in a future update.")
148+
elif resource == "interactions":
149+
raise NotImplementedError("The 'interactions' resource is currently not supported. Please check back in a future update.")
150+
else:
105151
raise ValueError(f"'resource' argument specified as {resource}. Expected one of: {', '.join(RESOURCES)}")
106-
107-
# if resource == "diseases":
108-
# raise NotImplementedError("The 'diseases' resource is currently not supported. Please check back in a future update.")
109-
# elif resource == "drugs":
110-
# query_string = QUERY_STRING_DRUGS
111-
# elif resource == "tractability":
112-
# raise NotImplementedError("The 'tractability' resource is currently not supported. Please check back in a future update.")
113-
# elif resource == "pharmacogenetics":
114-
# raise NotImplementedError("The 'pharmacogenetics' resource is currently not supported. Please check back in a future update.")
115-
# elif resource == "expression":
116-
# raise NotImplementedError("The 'expression' resource is currently not supported. Please check back in a future update.")
117-
# elif resource == "depmap":
118-
# raise NotImplementedError("The 'depmap' resource is currently not supported. Please check back in a future update.")
119-
# elif resource == "interactions":
120-
# raise NotImplementedError("The 'interactions' resource is currently not supported. Please check back in a future update.")
121-
# else:
122-
# raise ValueError(f"'resource' argument specified as {resource}. Expected one of: {', '.join(RESOURCES)}")
123-
124-
# Set variables object of arguments to be passed to endpoint
152+
125153
variables = {"ensemblId": ensembl_id}
126154

127-
# Perform POST request and check status code of response
128-
r = requests.post(OPENTARGETS_GRAPHQL_API, json={"query": query_string, "variables": variables})
129-
print(r.status_code)
155+
if verbose:
156+
logger.info(f"Querying OpenTargets for {resource} associated with {ensembl_id}...")
157+
logger.debug(f"GraphQL query string:\n{query_string}\n\nWith variables:\n{variables}")
158+
159+
r = requests.post(
160+
OPENTARGETS_GRAPHQL_API,
161+
json={"query": query_string, "variables": variables},
162+
)
130163

131-
# Transform API response from JSON into Python dictionary and print in console
132164
api_response = json_.loads(r.text)
133-
print(api_response)
134-
135-
# df['score'].apply(lambda x: round(x, 10) if isinstance(x, float) else x)[0]
136165

166+
if "errors" in api_response:
167+
raise ValueError(api_response["errors"])
168+
169+
if verbose:
170+
logger.debug(f"Raw API response:\n{json_.dumps(api_response, indent=2)}")
171+
137172
# if json:
138-
# return json_.loads(df.to_json(orient="records", force_ascii=False))
139-
# else:
140-
# return df
173+
# return api_response
174+
175+
rows = api_response["data"]["target"]
176+
177+
for i in range(len(rows_path)):
178+
rows = rows[rows_path[i]]
179+
180+
# ---------------------------
181+
# If JSON → return normalized JSON
182+
# ---------------------------
183+
df = pd.json_normalize(rows)
184+
185+
if limit is not None:
186+
df = df.head(limit)
187+
188+
df = df.map(collapse_singletons) # drug.mechanismsOfAction.rows --> drug.mechanismsOfAction.mechanismOfAction
189+
190+
if wrap_text:
191+
for col in df.columns:
192+
if df[col].dtype == object:
193+
df[col] = df[col].apply(
194+
lambda x: textwrap.fill(str(x), width=40) if isinstance(x, str) else x
195+
)
196+
197+
if json:
198+
return json_.loads(df.to_json(orient="records", force_ascii=False))
199+
200+
return df
141201

142202

143203

0 commit comments

Comments
 (0)