opentargets working

josephrich98 · josephrich98 · commit bea711e7ae9f · 2026-04-02T20:53:52.000-07:00
diff --git a/docs/src/en/updates.md b/docs/src/en/updates.md
@@ -2,6 +2,9 @@
 
 ## ✨ What's new
 **Version ≥ 0.30.4** (XXX XX, 2026):
+- [`gget opentargets`](opentargets.md): Rewrote this module to reflect the new Open Targets API structure
+  - some output column/key names may differ to reflect the new API structure
+  - Removed the `--filter_mode` argument
 - [`gget blast`](blast.md): Fixed compatibility with newer pandas versions (≥ 2.0) where `pd.read_html()` no longer accepts raw HTML strings directly, causing a `FileNotFoundError` / `OSError: Filename too long` error when parsing BLAST results
 
 **Version ≥ 0.30.3** (Feb 22, 2026):  
diff --git a/gget/gget_opentargets.py b/gget/gget_opentargets.py
@@ -2,10 +2,9 @@
 import textwrap
 import pandas as pd
 import requests
-import json
 
 from .constants import OPENTARGETS_GRAPHQL_API
-from .utils import set_up_logger, wrap_cols_func, graphql_query, json_list_to_df
+from .utils import set_up_logger
 
 logger = set_up_logger()  # export GGET_LOGLEVEL=DEBUG
 
@@ -241,19 +240,14 @@ def _unhash(x):
       return [_unhash(v) for v in x]
   return x
 
-def _is_hashable_series(s):
-    try:
-        s.dropna().map(hash)
-        return True
-    except TypeError:
-        return False
-
 def opentargets(
     ensembl_id,
     resource="diseases",
     limit=None,
     verbose=True,
     wrap_text=False,
+    filters=None,
+
     json=False,
 ):
     """
@@ -274,6 +268,7 @@ def opentargets(
                     Note: Not compatible with the 'tractability' and 'depmap' resources.
     - verbose       Print progress messages (default: True).
     - wrap_text     If True, displays data frame with wrapped text for easy reading. Default: False.
+    - filters       Filters to apply to the data. Supported filters by equality for any column in the returned data frame. Default: None (no filters applied).
     - json          If True, returns results in JSON format instead of as a Data Frame. Default: False.
 
 
@@ -342,27 +337,32 @@ def opentargets(
                 for row in rows
                 for subdict in row[row_key]
             ]
-        
+    
+    if len(rows) == 0:
+        if verbose:
+            logger.info(f"No {resource} data found for {ensembl_id}.")
+        return pd.DataFrame() if not json else []
 
     # ---------------------------
     # If JSON → return normalized JSON
     # ---------------------------
     df = pd.json_normalize(rows, sep=".")
-    df = df.map(_make_hashable).drop_duplicates().map(_unhash)
-    
-    #? alternative approach to dropping duplicates without needing to make everything hashable first
-    # hashable_cols = [col for col in df.columns if _is_hashable_series(df[col])]
-    # df = df.drop_duplicates(subset=hashable_cols)
-    #? alternative approach to dropping duplicates without needing to make everything hashable first
-    
     df = df.dropna(axis=1, how="all")  # drop any all-NaN columns
     df = df.dropna(axis=0, how="all")  # drop any all-NaN rows
+    df = df.map(_make_hashable).drop_duplicates()
 
     if limit is not None:
         df = df.head(limit)
     
+    df = df.map(_unhash)
     df = df.map(_collapse_singletons)
 
+    if filters is not None:
+        for filter_key, filter_value in filters.items():
+            if filter_key not in df.columns:
+                raise ValueError(f"Filter key '{filter_key}' not found in data columns. Available columns: {', '.join(df.columns)}")
+            df = df[df[filter_key] == filter_value]
+
     if wrap_text:
         for col in df.columns:
             if df[col].dtype == object:
diff --git a/tests/from_json.py b/tests/from_json.py
@@ -203,14 +203,16 @@ def normalize(x):
         
         expected_result = td[test]["expected_result"]
         result_to_test = do_call(func, td[test]["args"])
+        if isinstance(result_to_test, pd.DataFrame):
+            result_to_test = json.loads(
+                result_to_test.dropna(axis=1, how="all").to_json(
+                    orient="records", force_ascii=False
+                )
+            )
 
         result_to_test = normalize(result_to_test)
         expected_result = normalize(expected_result)
 
-        # If DataFrame → list of lists
-        if isinstance(result_to_test, pd.DataFrame):
-            result_to_test = result_to_test.dropna(axis=1).values.tolist()
-
         # ✅ Infer keys from expected_result
         if not expected_result:
             raise ValueError(f"Test {test} has empty expected_result")