Skip to content

Add New Endpoint: /metakg/parse issue#271 #280

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 30 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
f156c32
working GET endpoint add, POST is setup
NikkiBytes Nov 8, 2024
05dd22b
adding working mkg parser handler
NikkiBytes Nov 20, 2024
f52acb6
added url parameter for parser methods
NikkiBytes Nov 20, 2024
51b02b5
filter for get and post output in parse
NikkiBytes Dec 3, 2024
8ee011d
updated error handling for parser
NikkiBytes Dec 4, 2024
d43b730
added get_metakg method
NikkiBytes Dec 4, 2024
3ce64f8
added missing )
NikkiBytes Dec 4, 2024
f4de5e9
added tests and clean metakg parse endpoint
NikkiBytes Dec 5, 2024
49cff33
added timeout
NikkiBytes Dec 5, 2024
eb798c5
added timeout
NikkiBytes Dec 5, 2024
663d572
error handle updates
NikkiBytes Jan 31, 2025
1028d5b
error handling update
NikkiBytes Jan 31, 2025
4bd1ad4
error handling update for parse POST
NikkiBytes Feb 5, 2025
3c3c001
flake8 clean up
NikkiBytes Feb 12, 2025
260d7d0
errors raised for unique instances with clear error message
NikkiBytes Feb 20, 2025
2d6987c
adding unique MetadataRetrivalError class for identifying metadata er…
NikkiBytes Feb 20, 2025
bad1284
mkg parser and handler clean up error code
NikkiBytes Feb 24, 2025
95f8f84
added Mixin function for improved code
NikkiBytes Mar 13, 2025
3819a83
error handling cleanup:
NikkiBytes Mar 17, 2025
4b6d261
code cleanup
NikkiBytes Mar 17, 2025
4c8d92e
code cleanup with flake8
NikkiBytes Mar 18, 2025
220e4a3
set ui key to none when empty value
NikkiBytes Mar 18, 2025
dec2502
exchanged basehandler for queryhandler in metakgparserhandler, remove…
NikkiBytes Mar 27, 2025
56af963
code cleanup, whitespaces, etc.
NikkiBytes Mar 27, 2025
8ea30ae
cleaned up excess code
NikkiBytes Mar 27, 2025
6316bf5
removed old code
NikkiBytes Apr 8, 2025
745e16a
removed not needed code
NikkiBytes Apr 15, 2025
bfbd6f0
removed print statement
NikkiBytes Apr 16, 2025
8f06ce7
style: :art: minor coding style fixes
newgene May 2, 2025
4bcd99b
refactor: :recycle: simplify and refactor metakg parsing logics
newgene May 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@
(r"/api/metakg/consolidated/?", "handlers.api.MetaKGQueryHandler", {"biothing_type": "metakg_consolidated"}),
(r"/api/metakg/consolidated/fields/?", "biothings.web.handlers.MetadataFieldHandler", {"biothing_type": "metakg_consolidated"}),
(r"/api/metakg/paths/?", "handlers.api.MetaKGPathFinderHandler", {"biothing_type": "metakgpathfinder"}),
(r"/api/metakg/parse/?", "handlers.api.MetaKGParserHandler"),
]

# biothings web tester will read this
Expand Down
241 changes: 203 additions & 38 deletions src/handlers/api.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,27 @@
import asyncio
import json
import logging
from typing import List, Union
import os

import bmt
from biothings.utils import serializer
from biothings.web.auth.authn import BioThingsAuthnMixin
from biothings.web.handlers import BaseAPIHandler, QueryHandler
from biothings.web.handlers.query import BiothingHandler, capture_exceptions
from biothings.web.settings.default import QUERY_KWARGS
from tornado.httpclient import AsyncHTTPClient
from tornado.web import Finish, HTTPError
from tornado.template import Loader
from tornado.web import Finish, HTTPError

from controller import SmartAPI
from controller.exceptions import ControllerError, NotFoundError
from pipeline import MetaKGQueryPipeline
from utils.downloader import DownloadError, download_async
from utils.metakg.biolink_helpers import get_expanded_values
from utils.metakg.cytoscape_formatter import CytoscapeDataFormatter
from utils.metakg.export import edges2graphml
from utils.metakg.parser import MetaKGParser
from utils.metakg.path_finder import MetaKGPathFinder
from utils.metakg.cytoscape_formatter import CytoscapeDataFormatter
from utils.metakg.biolink_helpers import get_expanded_values
from utils.notification import SlackNewAPIMessage, SlackNewTranslatorAPIMessage

logger = logging.getLogger("smartAPI")
Expand Down Expand Up @@ -382,7 +383,68 @@ def post(self):
raise HTTPError(400, reason="Missing required form field: id")


class MetaKGQueryHandler(QueryHandler):
class MetaKGHandlerMixin:
"""
Mixin to provide reusable logic for filtering API information.
"""
def get_filtered_api(self, api_dict):
"""Extract and return filtered API information."""
api_info = api_dict.get("api", api_dict) # Handle both formats

# Default to False if not present
bte = self.args.bte
api_details = self.args.api_details

# Default structure to preserve top-level keys
filtered_dict = {
key: api_dict.get(key)
for key in ["subject", "object", "predicate", "subject_prefix", "object_prefix"]
if key in api_dict
}

# Determine filtered API structure based on `bte` and `api_details`
if bte and not api_details:
# When bte is True and api_details is False, include only minimal API info
filtered_api = {
**({"name": api_info.get("name")} if "name" in api_info else {}),
**(
{"smartapi": {"id": api_info.get("smartapi", {}).get("id", None)}}
if "smartapi" in api_info
else {"smartapi": {"id": None}}
),
"bte": api_info.get("bte", {}),
}
elif api_details:
# When api_details is True, include more detailed information
filtered_api = api_info.copy()
if not bte:
filtered_api.pop("bte", None)

# Handle case where "ui" key exists and ends with "None"
if filtered_api.get('smartapi', {}).get("ui", "").endswith("/None"):
filtered_api["smartapi"]["ui"] = None
else:
# Default: No bte and no api_details - just minimal API info
filtered_api = {
**({"name": api_info.get("name")} if "name" in api_info else {}),
**(
{"smartapi": {"id": api_info.get("smartapi", {}).get("id", None)}}
if "smartapi" in api_info
else {"smartapi": {"id": None}}
),
}

# Add the filtered 'api' key to the preserved top-level structure
filtered_dict["api"] = filtered_api

# Remove 'bte' from 'api' and move it to the top level
if "bte" in filtered_dict["api"]:
filtered_dict["bte"] = filtered_dict["api"].pop("bte")

return filtered_dict


class MetaKGQueryHandler(QueryHandler, MetaKGHandlerMixin):
"""
Support metakg queries with biolink model's semantic descendants

Expand Down Expand Up @@ -458,30 +520,8 @@ async def get(self, *args, **kwargs):
value_list = get_expanded_values(value_list, self.biolink_model_toolkit) if expanded_fields[field] else value_list
setattr(self.args, field, value_list)


await super().get(*args, **kwargs)

def get_filtered_api(self, api_dict):
"""Extract and return filtered API information."""
api_info = api_dict
if not self.args.bte and not self.args.api_details: # no bte and no api details
filtered_api= {
**({"name": api_info["name"]} if "name" in api_info else {}),
**({"smartapi": {"id": api_info["smartapi"]["id"]}} if "smartapi" in api_info and "id" in api_info["smartapi"] else {})
}
elif self.args.bte and not self.args.api_details : # bte and no api details
filtered_api= {
**({"name": api_info["name"]} if "name" in api_info else {}),
**({"smartapi": {"id": api_info["smartapi"]["id"]}} if "smartapi" in api_info and "id" in api_info["smartapi"] else {}),
'bte': api_info.get('bte', {})
}
elif not self.args.bte and self.args.api_details: # no bte and api details
api_info.pop('bte', None)
filtered_api = api_info
else:
filtered_api = api_info
return filtered_api

def process_apis(self, apis):
"""Process each API dict based on provided args."""
if isinstance(apis, list):
Expand All @@ -491,11 +531,11 @@ def process_apis(self, apis):
elif isinstance(apis, dict):
if 'bte' in apis:
# update dict for new format
apis['api']['bte']=apis.pop('bte')
apis['api']['bte'] = apis.pop('bte')
api_dict = apis["api"]
filtered_api= self.get_filtered_api(api_dict)
filtered_api = self.get_filtered_api(api_dict)
apis["api"] = filtered_api

def write(self, chunk):
"""
Overwrite the biothings query handler to ...
Expand All @@ -522,10 +562,10 @@ def write(self, chunk):
self.set_header("Content-Disposition", 'attachment; filename="smartapi_metakg.graphml"')

return super(BaseAPIHandler, self).write(chunk)

if self.format == "html":
# setup template
template_path = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'templates'))
template_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'templates'))
loader = Loader(template_path)
template = loader.load("cytoscape.html")
# initial counts
Expand All @@ -542,7 +582,7 @@ def write(self, chunk):
graph_data = serializer.to_json(cdf.get_data())
# generate global template variable with graph data
result = template.generate(
data= graph_data,
data=graph_data,
response=serializer.to_json(chunk),
shown=shown,
available=available,
Expand Down Expand Up @@ -586,7 +626,7 @@ class MetaKGPathFinderHandler(QueryHandler):
"max": 6,
"default": [],
"enum": ["subject", "object", "predicate", "node", "edge", "all"]
}
}
},
}

Expand Down Expand Up @@ -680,9 +720,134 @@ async def get(self, *args, **kwargs):
raw_query_output = self.setup_pathfinder_rawquery(expanded_fields)
self.write(raw_query_output)
return
res = {
"total": len(paths_with_edges),
"paths": paths_with_edges,
}
res = {
"total": len(paths_with_edges),
"paths": paths_with_edges,
}
await asyncio.sleep(0.01)
self.finish(res)


class MetaKGParserHandler(BaseHandler, MetaKGHandlerMixin):
"""
Handles parsing of SmartAPI metadata from a given URL or request body.

This handler processes SmartAPI metadata and returns structured,
cleaned results based on the specified query parameters.

Supported HTTP methods:
- **GET**: Parses metadata from a provided URL.
- **POST**: Parses metadata from the request body.

Query Parameters:
- `url` (str, required): The URL of the SmartAPI metadata to parse.
Maximum length: 1000 characters.
- `api_details` (bool, optional, default: `False`):
Whether to return detailed API information.
- `bte` (bool, optional, default: `False`):
Whether to include BTE (BioThings Explorer) specific metadata.
"""

kwargs = {
"*": {
"api_details": {"type": bool, "default": False},
"bte": {"type": bool, "default": False},
},
"GET": {
"url": {
"type": str,
"required": True,
"max": 1000,
"description": "URL of the SmartAPI metadata to parse"
},
},
}

def initialize(self, *args, **kwargs):
super().initialize(*args, **kwargs)
# change the default query pipeline from self.biothings.pipeline
self.pipeline = MetaKGQueryPipeline(ns=self.biothings)

def process_apis(self, apis):
"""Process each API dict based on provided args."""
if isinstance(apis, list):
for i, api_dict in enumerate(apis):
filtered_api = self.get_filtered_api(api_dict)
apis[i] = filtered_api
elif isinstance(apis, dict):
if "bte" in apis:
# Update dict for new format
apis["api"]["bte"] = apis.pop("bte")
api_dict = apis["api"]
filtered_api = self.get_filtered_api(api_dict)
apis["api"] = filtered_api
return apis

async def get(self, *args, **kwargs):
url = self.args.url
parser = MetaKGParser()

try:
parsed_metakg = parser.get_metakg(url=url)
except DownloadError:
self.write_error(400, reason="There was an error downloading the data from the given url.")
except (ValueError, TypeError) as err:
self.write_error(
status_code=400,
reason="The data retrived from the given url is not a valid JSON or YAML object.",
message=str(err)
)

# Apply filtering -- if data found
if parsed_metakg:
for i, api_dict in enumerate(parsed_metakg):
parsed_metakg[i] = self.get_filtered_api(api_dict)

# Add url to metadata if api_details is set to 1
if self.args.api_details:
for data_dict in parsed_metakg:
if "metadata" in data_dict["api"]["smartapi"] and data_dict["api"]["smartapi"]["metadata"] is None:
data_dict["api"]["smartapi"]["metadata"] = url

response = {
"total": len(parsed_metakg),
"hits": parsed_metakg,
}

self.finish(response)

async def post(self, *args, **kwargs):
content_type = self.request.headers.get("Content-Type", "").lower()
if content_type in ["application/json", "application/x-yaml"]:
# if content type is set properly, it should have alrady been parsed
metadata_from_body = self.args_json or self.args_yaml
elif self.request.body:
# if request body is provided but no proper content type is set
# we will parse it as YAML anyway
metadata_from_body = self._parse_yaml()
else:
metadata_from_body = None

if metadata_from_body:
# Process the parsed metadata
parser = MetaKGParser()
parsed_metakg = parser.get_metakg(metadata_from_body)

# Apply filtering to the combined data
if parsed_metakg:
for i, api_dict in enumerate(parsed_metakg):
parsed_metakg[i] = self.get_filtered_api(api_dict)

# Send the response back to the client
response = {
"total": len(parsed_metakg),
"hits": parsed_metakg,
}

self.finish(response)
else:
self.write_error(
status_code=400,
reason="Request body cannot be empty.",
message="Please provide a valid JSON/YAML object in the request body."
)
15 changes: 6 additions & 9 deletions src/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,10 @@
from enum import Enum
from typing import Dict, OrderedDict

from biothings.web.query import (
AsyncESQueryBackend,
AsyncESQueryPipeline,
ESQueryBuilder,
ESResultFormatter,
)
from controller.base import OpenAPI, Swagger
from biothings.web.query import AsyncESQueryBackend, AsyncESQueryPipeline, ESQueryBuilder, ESResultFormatter
from elasticsearch_dsl import Q, Search

from controller.base import OpenAPI, Swagger
from utils import decoder


Expand Down Expand Up @@ -219,8 +215,8 @@ def apply_extras(self, search, options):
apply extra filters
"""
# if not options._source:
# by default exclude api.bte or bte field, but can be included by specifying in the fields parameter
# options._source = ["-api.bte", "-bte"]
# by default exclude api.bte or bte field, but can be included by specifying in the fields parameter
# options._source = ["-api.bte", "-bte"]

search = super().apply_extras(search, options)
# apply extra filters from query parameters
Expand Down Expand Up @@ -262,6 +258,7 @@ def adjust_index(self, original_index: str, query: str, **options: Dict) -> str:
query_index = self.indices.get("metakg", None)
return query_index


class MetaKGQueryPipeline(AsyncESQueryPipeline):
def __init__(self, *args, **kwargs):
# ns is an instance of BiothingsNamespace
Expand Down
Loading