Skip to content

Add New Endpoint: /metakg/parse issue#271 #280

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 30 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
f156c32
working GET endpoint add, POST is setup
NikkiBytes Nov 8, 2024
05dd22b
adding working mkg parser handler
NikkiBytes Nov 20, 2024
f52acb6
added url parameter for parser methods
NikkiBytes Nov 20, 2024
51b02b5
filter for get and post output in parse
NikkiBytes Dec 3, 2024
8ee011d
updated error handling for parser
NikkiBytes Dec 4, 2024
d43b730
added get_metakg method
NikkiBytes Dec 4, 2024
3ce64f8
added missing )
NikkiBytes Dec 4, 2024
f4de5e9
added tests and clean metakg parse endpoint
NikkiBytes Dec 5, 2024
49cff33
added timeout
NikkiBytes Dec 5, 2024
eb798c5
added timeout
NikkiBytes Dec 5, 2024
663d572
error handle updates
NikkiBytes Jan 31, 2025
1028d5b
error handling update
NikkiBytes Jan 31, 2025
4bd1ad4
error handling update for parse POST
NikkiBytes Feb 5, 2025
3c3c001
flake8 clean up
NikkiBytes Feb 12, 2025
260d7d0
errors raised for unique instances with clear error message
NikkiBytes Feb 20, 2025
2d6987c
adding unique MetadataRetrivalError class for identifying metadata er…
NikkiBytes Feb 20, 2025
bad1284
mkg parser and handler clean up error code
NikkiBytes Feb 24, 2025
95f8f84
added Mixin function for improved code
NikkiBytes Mar 13, 2025
3819a83
error handling cleanup:
NikkiBytes Mar 17, 2025
4b6d261
code cleanup
NikkiBytes Mar 17, 2025
4c8d92e
code cleanup with flake8
NikkiBytes Mar 18, 2025
220e4a3
set ui key to none when empty value
NikkiBytes Mar 18, 2025
dec2502
exchanged basehandler for queryhandler in metakgparserhandler, remove…
NikkiBytes Mar 27, 2025
56af963
code cleanup, whitespaces, etc.
NikkiBytes Mar 27, 2025
8ea30ae
cleaned up excess code
NikkiBytes Mar 27, 2025
6316bf5
removed old code
NikkiBytes Apr 8, 2025
745e16a
removed not needed code
NikkiBytes Apr 15, 2025
bfbd6f0
removed print statement
NikkiBytes Apr 16, 2025
8f06ce7
style: :art: minor coding style fixes
newgene May 2, 2025
4bcd99b
refactor: :recycle: simplify and refactor metakg parsing logics
newgene May 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@
(r"/api/metakg/consolidated/?", "handlers.api.MetaKGQueryHandler", {"biothing_type": "metakg_consolidated"}),
(r"/api/metakg/consolidated/fields/?", "biothings.web.handlers.MetadataFieldHandler", {"biothing_type": "metakg_consolidated"}),
(r"/api/metakg/paths/?", "handlers.api.MetaKGPathFinderHandler", {"biothing_type": "metakgpathfinder"}),
(r"/api/metakg/parse/?", "handlers.api.MetaKGParserHandler"),
]

# biothings web tester will read this
Expand Down
258 changes: 226 additions & 32 deletions src/handlers/api.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import asyncio
import json
import logging
from typing import List, Union
import os
import bmt
from biothings.utils import serializer
Expand All @@ -22,6 +21,9 @@
from utils.metakg.cytoscape_formatter import CytoscapeDataFormatter
from utils.metakg.biolink_helpers import get_expanded_values
from utils.notification import SlackNewAPIMessage, SlackNewTranslatorAPIMessage
from utils.metakg.parser import MetaKGParser
from utils.metakg.metakg_errors import MetadataRetrievalError
from utils.decoder import to_dict

logger = logging.getLogger("smartAPI")

Expand Down Expand Up @@ -382,7 +384,68 @@ def post(self):
raise HTTPError(400, reason="Missing required form field: id")


class MetaKGQueryHandler(QueryHandler):
class MetaKGHandlerMixin:
"""
Mixin to provide reusable logic for filtering API information.
"""
def get_filtered_api(self, api_dict):
"""Extract and return filtered API information."""
api_info = api_dict.get("api", api_dict) # Handle both formats

# Default to False if not present
bte = self.args.bte
api_details = self.args.api_details

# Default structure to preserve top-level keys
filtered_dict = {
key: api_dict.get(key)
for key in ["subject", "object", "predicate", "subject_prefix", "object_prefix"]
if key in api_dict
}

# Determine filtered API structure based on `bte` and `api_details`
if bte and not api_details:
# When bte is True and api_details is False, include only minimal API info
filtered_api = {
**({"name": api_info.get("name")} if "name" in api_info else {}),
**(
{"smartapi": {"id": api_info.get("smartapi", {}).get("id", None)}}
if "smartapi" in api_info
else {"smartapi": {"id": None}}
),
"bte": api_info.get("bte", {}),
}
elif api_details:
# When api_details is True, include more detailed information
filtered_api = api_info.copy()
if not bte:
filtered_api.pop("bte", None)

# Handle case where "ui" key exists and ends with "None"
if filtered_api.get('smartapi', {}).get("ui", "").endswith("/None"):
filtered_api["smartapi"]["ui"] = None
else:
# Default: No bte and no api_details - just minimal API info
filtered_api = {
**({"name": api_info.get("name")} if "name" in api_info else {}),
**(
{"smartapi": {"id": api_info.get("smartapi", {}).get("id", None)}}
if "smartapi" in api_info
else {"smartapi": {"id": None}}
),
}

# Add the filtered 'api' key to the preserved top-level structure
filtered_dict["api"] = filtered_api

# Remove 'bte' from 'api' and move it to the top level
if "bte" in filtered_dict["api"]:
filtered_dict["bte"] = filtered_dict["api"].pop("bte")

return filtered_dict


class MetaKGQueryHandler(QueryHandler, MetaKGHandlerMixin):
"""
Support metakg queries with biolink model's semantic descendants

Expand Down Expand Up @@ -458,30 +521,8 @@ async def get(self, *args, **kwargs):
value_list = get_expanded_values(value_list, self.biolink_model_toolkit) if expanded_fields[field] else value_list
setattr(self.args, field, value_list)


await super().get(*args, **kwargs)

def get_filtered_api(self, api_dict):
"""Extract and return filtered API information."""
api_info = api_dict
if not self.args.bte and not self.args.api_details: # no bte and no api details
filtered_api= {
**({"name": api_info["name"]} if "name" in api_info else {}),
**({"smartapi": {"id": api_info["smartapi"]["id"]}} if "smartapi" in api_info and "id" in api_info["smartapi"] else {})
}
elif self.args.bte and not self.args.api_details : # bte and no api details
filtered_api= {
**({"name": api_info["name"]} if "name" in api_info else {}),
**({"smartapi": {"id": api_info["smartapi"]["id"]}} if "smartapi" in api_info and "id" in api_info["smartapi"] else {}),
'bte': api_info.get('bte', {})
}
elif not self.args.bte and self.args.api_details: # no bte and api details
api_info.pop('bte', None)
filtered_api = api_info
else:
filtered_api = api_info
return filtered_api

def process_apis(self, apis):
"""Process each API dict based on provided args."""
if isinstance(apis, list):
Expand All @@ -491,11 +532,11 @@ def process_apis(self, apis):
elif isinstance(apis, dict):
if 'bte' in apis:
# update dict for new format
apis['api']['bte']=apis.pop('bte')
apis['api']['bte'] = apis.pop('bte')
api_dict = apis["api"]
filtered_api= self.get_filtered_api(api_dict)
filtered_api = self.get_filtered_api(api_dict)
apis["api"] = filtered_api

def write(self, chunk):
"""
Overwrite the biothings query handler to ...
Expand All @@ -522,10 +563,10 @@ def write(self, chunk):
self.set_header("Content-Disposition", 'attachment; filename="smartapi_metakg.graphml"')

return super(BaseAPIHandler, self).write(chunk)

if self.format == "html":
# setup template
template_path = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'templates'))
template_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'templates'))
loader = Loader(template_path)
template = loader.load("cytoscape.html")
# initial counts
Expand All @@ -542,7 +583,7 @@ def write(self, chunk):
graph_data = serializer.to_json(cdf.get_data())
# generate global template variable with graph data
result = template.generate(
data= graph_data,
data=graph_data,
response=serializer.to_json(chunk),
shown=shown,
available=available,
Expand Down Expand Up @@ -680,9 +721,162 @@ async def get(self, *args, **kwargs):
raw_query_output = self.setup_pathfinder_rawquery(expanded_fields)
self.write(raw_query_output)
return
res = {
"total": len(paths_with_edges),
res = {
"total": len(paths_with_edges),
"paths": paths_with_edges,
}
await asyncio.sleep(0.01)
self.finish(res)


class MetaKGParserHandler(BaseHandler, MetaKGHandlerMixin):
"""
Handles parsing of SmartAPI metadata from a given URL or request body.

This handler processes SmartAPI metadata and returns structured,
cleaned results based on the specified query parameters.

Supported HTTP methods:
- **GET**: Parses metadata from a provided URL.
- **POST**: Parses metadata from the request body.

Query Parameters:
- `url` (str, required): The URL of the SmartAPI metadata to parse.
Maximum length: 1000 characters.
- `api_details` (bool, optional, default: `False`):
Whether to return detailed API information.
- `bte` (bool, optional, default: `False`):
Whether to include BTE (BioThings Explorer) specific metadata.
"""

kwargs = {
"GET": {
"url": {
"type": str,
"required": True,
"max": 1000,
"description": "URL of the SmartAPI metadata to parse"
},
"api_details": {"type": bool, "default": False},
"bte": {"type": bool, "default": False},
},
"POST": {
"api_details": {"type": bool, "default": False},
"bte": {"type": bool, "default": False},
},
}

def initialize(self, *args, **kwargs):
super().initialize(*args, **kwargs)
# change the default query pipeline from self.biothings.pipeline
self.pipeline = MetaKGQueryPipeline(ns=self.biothings)

def process_apis(self, apis):
"""Process each API dict based on provided args."""
if isinstance(apis, list):
for i, api_dict in enumerate(apis):
filtered_api = self.get_filtered_api(api_dict)
apis[i] = filtered_api
elif isinstance(apis, dict):
if "bte" in apis:
# Update dict for new format
apis["api"]["bte"] = apis.pop("bte")
api_dict = apis["api"]
filtered_api = self.get_filtered_api(api_dict)
apis["api"] = filtered_api
return apis

async def get(self, *args, **kwargs):
url = self.args.url
if not url:
raise HTTPError(400, reason="A url value is expected for the request, please provide a url.")

# Set initial args and handle potential errors in query parameters
parser = MetaKGParser()

try:
trapi_data = parser.get_TRAPI_metadatas(data=None, url=url)
except MetadataRetrievalError as retrieve_err:
raise HTTPError(retrieve_err.status_code, reason=retrieve_err.message)
except DownloadError:
raise HTTPError(400, reason="There was an error downloading the data from the given input.")

# Get non-TRAPI metadata
try:
nontrapi_data = parser.get_non_TRAPI_metadatas(data=None, url=url)
except MetadataRetrievalError as retrieve_err:
raise HTTPError(retrieve_err.status_code, reason=retrieve_err.message)
except DownloadError:
raise HTTPError(400, reason="There was an error downloading the data from the given input.")

# Apply filtering -- if data found
combined_data = trapi_data + nontrapi_data
if combined_data:
for i, api_dict in enumerate(combined_data):
combined_data[i] = self.get_filtered_api(api_dict)

# Add url to metadata if api_details is set to 1
if self.args.api_details == 1:
for data_dict in combined_data:
if "metadata" in data_dict["api"]["smartapi"] and data_dict["api"]["smartapi"]["metadata"] is None:
data_dict["api"]["smartapi"]["metadata"] = url

response = {
"total": len(combined_data),
"hits": combined_data,
}

self.finish(response)

async def post(self, *args, **kwargs):
raw_body = self.request.body
if not raw_body:
raise HTTPError(400, reason="Request body cannot be empty.")

content_type = self.request.headers.get("Content-Type", "").lower()

# Try to parse the request body based on content type
try:
if content_type == "application/json":
data = to_dict(raw_body, ctype="application/json")
elif content_type == "application/x-yaml":
data = to_dict(raw_body, ctype="application/x-yaml")
else:
# Default to YAML parsing if the content type is unknown or not specified
data = to_dict(raw_body)
except ValueError as val_err:
if 'mapping values are not allowed here' in str(val_err):
raise HTTPError(400, reason="Formatting issue, please consider using --data-binary to maintain YAML format.")
else:
raise HTTPError(400, reason="Invalid value, please provide a valid YAML object.")
except TypeError:
raise HTTPError(400, reason="Invalid type, provide valid type metadata.")

# Ensure the parsed data is a dictionary
if not isinstance(data, dict):
raise ValueError("Invalid input data type. Please provide a valid JSON/YAML object.")

# Process the parsed metadata
parser = MetaKGParser()
try:
trapi_data = parser.get_TRAPI_metadatas(data=data)
nontrapi_data = parser.get_non_TRAPI_metadatas(data=data)
except MetadataRetrievalError as retrieve_err:
raise HTTPError(retrieve_err.status_code, reason=retrieve_err.message)
except DownloadError:
raise HTTPError(400, reason="Error downloading the data from the provided input.")

combined_data = trapi_data + nontrapi_data

# Apply filtering to the combined data
if combined_data:
for i, api_dict in enumerate(combined_data):
combined_data[i] = self.get_filtered_api(api_dict)

# Send the response back to the client
response = {
"total": len(combined_data),
"hits": combined_data,
}

self.finish(response)
16 changes: 16 additions & 0 deletions src/utils/metakg/metakg_errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
class MetadataRetrievalError(Exception):
"""Custom exception for metadata retrieval failures."""

def __init__(self, status_code, message):
self.status_code = status_code
self.message = message
super().__init__(f"MetadataRetrievalError {status_code}: {message}")

def to_dict(self):
"""Return error details in JSON-like dictionary format."""
return {
"code": self.status_code,
"success": False,
"error": "Metadata Retrieval Error",
"details": str(self)
}
Loading