Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
843541f
Add tests to check if federated queries between the curies mapping se…
vemonet Apr 11, 2023
899d586
Update MANIFEST.in
cthoyt Apr 13, 2023
6863cc3
Merge branch 'main' into pr/53
cthoyt Apr 13, 2023
e77a846
Update test_sparql.py
cthoyt Apr 13, 2023
67d009e
Remove redundant code
cthoyt Apr 13, 2023
79dd180
Merge branch 'main' into pr/53
cthoyt Apr 13, 2023
7a61e43
Update test_sparql.py
cthoyt Apr 13, 2023
423e9f6
Code cleanup
cthoyt Apr 13, 2023
e4443c0
Update test_sparql.py
cthoyt Apr 13, 2023
418fd74
Update test_sparql.py
cthoyt Apr 13, 2023
41dfdb7
Merge branch 'main' into pr/53
cthoyt Apr 13, 2023
eaf2906
Update test_sparql.py
cthoyt Apr 13, 2023
5f555bf
Update test_sparql.py
cthoyt Apr 13, 2023
62a1f49
try to fix a bit the URLs that have been changed without checking
vemonet Apr 13, 2023
e32c6da
fix the blazegraph local URL in test
vemonet Apr 13, 2023
984f10c
fix federated queries test, only test_from_virtuoso_to_mapping_servic…
vemonet Apr 14, 2023
0e709cf
fix CSV parsing, which fixes all tests
vemonet Apr 14, 2023
85f8652
Use the same query for test from the mapping service to external trip…
vemonet Apr 14, 2023
432fc06
improve how triples are defined in init script
vemonet Apr 14, 2023
477fe35
Add externally configurable tests
cthoyt Apr 14, 2023
5e5e1e9
Add second generic test
cthoyt Apr 14, 2023
78fb063
Better configure queries
cthoyt Apr 14, 2023
3561a57
Update src/curies/mapping_service/utils.py
cthoyt Apr 14, 2023
58e6021
Cleanup code
cthoyt Apr 14, 2023
18cb2d7
pass flake8
cthoyt Apr 14, 2023
f2b9f74
add federated queries tests for fuseki
vemonet Apr 14, 2023
dac165c
merge
vemonet Apr 14, 2023
efbfa00
Remove non-generic tests
cthoyt Apr 14, 2023
3657376
Update test_sparql.py
cthoyt Apr 14, 2023
7e6d339
Make tests generic and not rely on docker bioregistry
cthoyt Apr 14, 2023
7b66804
Switch to cases
cthoyt Apr 14, 2023
4dd192c
Merge branch 'main' into add-federated-queries-test-with-docker
cthoyt Dec 11, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ jobs:
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Start local triplestores for testing with docker
if: matrix.os == 'ubuntu-latest'
run: |
docker-compose up -d
sleep 20
./tests/resources/init_triplestores.sh
- name: Install dependencies
run: pip install tox
- name: Test with pytest and generate coverage file
Expand Down
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ recursive-include docs/source *.svg
global-exclude *.py[cod] __pycache__ *.so *.dylib .DS_Store *.gpickle

include README.md LICENSE
exclude tox.ini .flake8 .bumpversion.cfg .readthedocs.yml codecov.yml
exclude tox.ini .flake8 .bumpversion.cfg .readthedocs.yml codecov.yml docker-compose.yml
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,18 @@ $ cd curies
$ pip install -e .
```

To test the the curies mapping service SPARQL endpoint federated queries properly work with popular triplestore you will need to start the triplestores locally with `docker` (otherwise the tests defined in `tests/test_sparql.py` will be skipped):

```bash
$ docker compose up -d
```

The first time you start the triplestores you will need to initialize them by running a script:

```bash
$ ./tests/resources/init_triplestores.sh
```

### 🥼 Testing

After cloning the repository and installing `tox` with `pip install tox`, the unit tests in the `tests/` folder can be
Expand Down
37 changes: 37 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
version: "3"
services:

mapping-service:
build:
context: .
dockerfile: tests/resources/Dockerfile
ports:
- 8888:8888
volumes:
- ./src:/app/src
- ./tests:/app/tests

blazegraph:
image: metaphacts/blazegraph-basic:2.2.0-20160908.003514-6-jetty9.4.44-jre8-45dbfff
ports:
- 8889:8080
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a reason we can't just use the default ports for each service as we expose outside of docker?


virtuoso:
image: openlink/virtuoso-opensource-7:latest
ports:
- 8890:8890
environment:
- DBA_PASSWORD=dba
- SPARQL_UPDATE=true
- VIRT_Database_ErrorLogLevel=7 # 7 is maximum logs
- VIRT_HTTPServer_HTTPLogFile=/http.log
# https://docs.openlinksw.com/virtuoso/loggingandrecording/

fuseki:
image: stain/jena-fuseki:4.0.0
ports:
- 8891:3030
environment:
- ADMIN_PASSWORD=dba # Admin user: admin
# - FUSEKI_DATASET_1=mapping # Not working with 4.0.0
# - JVM_ARGS=-Xmx2g
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ tests =
coverage
pandas =
pandas
bioregistry =
bioregistry[web]>=0.5.136
flasgger
flask =
flask
defusedxml
Expand Down
4 changes: 2 additions & 2 deletions src/curies/mapping_service/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

"""Utilities for the mapping service."""

import csv
import json
import json.decoder
import unittest
Expand Down Expand Up @@ -75,8 +76,7 @@ def handle_xml(text: str) -> Records:

def handle_csv(text: str) -> Records:
"""Parse bindings encoded in a CSV string."""
header, *lines = (line.strip().split(",") for line in text.splitlines())
return [dict(zip(header, line)) for line in lines]
return list(csv.DictReader(text.splitlines()))


#: A mapping from canonical content types to functions for parsing them
Expand Down
139 changes: 139 additions & 0 deletions tests/cases.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
"""Test cases."""

import itertools as itt
import unittest
from textwrap import dedent
from typing import Collection, NamedTuple, Set, Tuple

from curies.mapping_service.utils import (
get_sparql_record_so_tuples,
get_sparql_records,
sparql_service_available,
)

# NOTE: federated queries need to use docker internal URL
LOCAL_MAPPING_SERVICE = "http://localhost:8888/sparql"
LOCAL_BLAZEGRAPH = "http://localhost:8889/blazegraph/namespace/kb/sparql"
LOCAL_VIRTUOSO = "http://localhost:8890/sparql"
LOCAL_FUSEKI = "http://localhost:8891/mapping"

DOCKER_MAPPING_SERVICE = "http://mapping-service:8888/sparql"
DOCKER_BLAZEGRAPH = "http://blazegraph:8080/blazegraph/namespace/kb/sparql"
DOCKER_VIRTUOSO = "http://virtuoso:8890/sparql"
DOCKER_FUSEKI = "http://fuseki:3030/mapping"

#: Some triplestores are a bit picky on the mime types to use, e.g. blazegraph
#: SELECT query fails when asking for application/xml, so we need to use a subset
#: of content types for the federated tests
TEST_CONTENT_TYPES = {
"application/json",
"application/sparql-results+xml",
"text/csv",
}


class TripleStoreConfiguation(NamedTuple):
"""A tuple with information for each triplestore."""

local_endpoint: str
docker_endpoint: str
mimetypes: Collection[str]
direct_query_fmts: Collection[str]
service_query_fmts: Collection[str]


def get_pairs(endpoint: str, sparql: str, accept: str) -> Set[Tuple[str, str]]:
"""Get a response from a given SPARQL query."""
records = get_sparql_records(endpoint=endpoint, sparql=sparql, accept=accept)
return get_sparql_record_so_tuples(records)


SPARQL_TO_MAPPING_SERVICE_VALUES = """\
PREFIX owl: <http://www.w3.org/2002/07/owl#>
SELECT DISTINCT ?s ?o WHERE {{
SERVICE <{0}> {{
VALUES ?s {{ <http://purl.obolibrary.org/obo/CHEBI_24867> <http://purl.obolibrary.org/obo/CHEBI_24868> }} .
?s owl:sameAs ?o .
}}
}}
""".rstrip()

SPARQL_TO_MAPPING_SERVICE_SIMPLE = """\
PREFIX owl: <http://www.w3.org/2002/07/owl#>
SELECT DISTINCT ?s ?o WHERE {{
SERVICE <{0}> {{
<http://purl.obolibrary.org/obo/CHEBI_24867> owl:sameAs ?o .
?s owl:sameAs ?o .
}}
}}
""".rstrip()

SPARQL_FROM_MAPPING_SERVICE_SIMPLE = """\
PREFIX owl: <http://www.w3.org/2002/07/owl#>
SELECT ?s ?o WHERE {{
<http://purl.obolibrary.org/obo/CHEBI_24867> owl:sameAs ?s .
SERVICE <{0}> {{
?s a ?o .
}}
}}
""".rstrip()

configurations = {
"blazegraph": TripleStoreConfiguation(
local_endpoint=LOCAL_BLAZEGRAPH,
docker_endpoint=DOCKER_BLAZEGRAPH,
mimetypes=TEST_CONTENT_TYPES,
direct_query_fmts=[SPARQL_TO_MAPPING_SERVICE_SIMPLE, SPARQL_TO_MAPPING_SERVICE_VALUES],
service_query_fmts=[SPARQL_FROM_MAPPING_SERVICE_SIMPLE],
),
"virtuoso": TripleStoreConfiguation(
local_endpoint=LOCAL_VIRTUOSO,
docker_endpoint=DOCKER_VIRTUOSO,
mimetypes=TEST_CONTENT_TYPES, # todo generalize?
# TODO: Virtuoso fails to resolves VALUES in federated query
direct_query_fmts=[SPARQL_TO_MAPPING_SERVICE_SIMPLE],
service_query_fmts=[SPARQL_FROM_MAPPING_SERVICE_SIMPLE],
),
"fuseki": TripleStoreConfiguation(
local_endpoint=LOCAL_FUSEKI,
docker_endpoint=DOCKER_FUSEKI,
mimetypes=TEST_CONTENT_TYPES,
direct_query_fmts=[SPARQL_TO_MAPPING_SERVICE_SIMPLE, SPARQL_TO_MAPPING_SERVICE_VALUES],
service_query_fmts=[SPARQL_FROM_MAPPING_SERVICE_SIMPLE],
),
}


class FederationMixin(unittest.TestCase):
"""Tests federated SPARQL queries."""

#: The URL for the mapping service
mapping_service: str

def assert_endpoint(self, endpoint: str, query: str, *, accept: str):
"""Assert the endpoint returns favorable results."""
records = get_pairs(endpoint, query, accept=accept)
self.assertIn(
("http://purl.obolibrary.org/obo/CHEBI_24867", "https://bioregistry.io/chebi:24867"),
records,
)

def test_from_triplestore(self):
"""Test federated queries from various triples stores to the CURIEs service."""
for name, config in configurations.items():
self.assertTrue(sparql_service_available(config.local_endpoint))
for mimetype, sparql_fmt in itt.product(config.mimetypes, config.direct_query_fmts):
sparql = dedent(sparql_fmt.format(self.mapping_service).rstrip())
with self.subTest(name=name, mimetype=mimetype, sparql=sparql):
self.assert_endpoint(config.local_endpoint, sparql, accept=mimetype)

def test_to_triplestore(self):
"""Test a federated query from the CURIEs service to various triple stores."""
for name, config in configurations.items():
self.assertTrue(sparql_service_available(config.local_endpoint))
for mimetype, sparql_fmt in itt.product(config.mimetypes, config.service_query_fmts):
sparql = dedent(sparql_fmt.format(config.docker_endpoint).rstrip())
with self.subTest(name=name, mimetype=mimetype, sparql=sparql):
records = get_pairs(self.mapping_service, sparql, accept=mimetype)
self.assertGreater(len(records), 0)
# TODO add assert_endpoint here?
11 changes: 11 additions & 0 deletions tests/resources/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM python:3.10

# Dockerfile used to spawn a mapping service SPARQL endpoint for testing

WORKDIR /app

ADD . .

RUN pip install -e ".[fastapi,rdflib,bioregistry]"

CMD [ "bioregistry", "web", "--port", "8888", "--host", "0.0.0.0" ]
18 changes: 18 additions & 0 deletions tests/resources/init_triplestores.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
## Script to initialize the triplestores started with docker
# Run it from the root of the repo: ./tests/resources/init_triplestores.sh

TRIPLES="
<https://identifiers.org/CHEBI:24867> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://w3id.org/biolink/vocab/ChemicalEntity> .
<https://identifiers.org/CHEBI:24868> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://w3id.org/biolink/vocab/ChemicalEntity> .
"

echo " 🪄 Load triples to Virtuoso and enable federated queries"
docker compose exec virtuoso isql -U dba -P dba exec='GRANT "SPARQL_SELECT_FED" TO "SPARQL";'
docker compose exec virtuoso isql -U dba -P dba exec="SPARQL INSERT IN <https://identifiers.org/CHEBI> { $TRIPLES };"

echo " ⚡️ Load triples to Blazegraph"
docker compose exec blazegraph curl -X POST http://localhost:8080/blazegraph/namespace/kb/sparql -d "update=insert data { $TRIPLES }"

echo " ☕️ Load triples to Fuseki"
docker compose exec fuseki curl -X POST -u admin:dba -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' 'http://localhost:3030/$/datasets' -d "dbName=mapping&dbType=tdb2"
docker compose exec fuseki curl -X POST http://localhost:3030/mapping -d "update=insert data { $TRIPLES }"
61 changes: 61 additions & 0 deletions tests/test_sparql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""Tests federated SPARQL queries between the curies mapping service and popular triplestores."""

import time
from multiprocessing import Process
from typing import ClassVar

import uvicorn

from curies import Converter
from curies.mapping_service import get_fastapi_mapping_app
from curies.mapping_service.utils import sparql_service_available
from tests import cases
from tests.test_mapping_service import PREFIX_MAP


class TestDockerFederation(cases.FederationMixin):
"""Tests federated SPARQL queries between the curies mapping service and blazegraph/virtuoso triplestores.

Run and init the required triplestores locally:
1. docker compose up
2. ./tests/resources/init_triplestores.sh
"""

def setUp(self) -> None:
"""Set up the test case."""
self.mapping_service = cases.LOCAL_MAPPING_SERVICE

if not sparql_service_available(self.mapping_service):
self.skipTest(f"Mapping service is not available: {self.mapping_service}")


def _get_app():
converter = Converter.from_priority_prefix_map(PREFIX_MAP)
app = get_fastapi_mapping_app(converter)
return app


class TestLocalFederation(cases.FederationMixin):
"""Tests federated SPARQL queries."""

host: ClassVar[str] = "localhost"
port: ClassVar[int] = 8000
mapping_service_process: Process

def setUp(self):
"""Set up the test case."""
# Start the curies mapping service SPARQL endpoint
self.mapping_service_process = Process(
target=uvicorn.run,
# uvicorn.run accepts a zero-argument callable that returns an app
args=(_get_app,),
kwargs={"host": self.host, "port": self.port, "log_level": "info"},
daemon=True,
)
self.mapping_service_process.start()
time.sleep(5)

self.mapping_service = f"http://{self.host}:{self.port}/sparql"

if not sparql_service_available(self.mapping_service):
self.skipTest(f"Mapping service is not available: {self.mapping_service}")