Skip to content

Commit 1538183

Browse files
authored
Merge pull request #10 from sdsc-ordes/develop
v0.1.0
2 parents 32c94ff + 2fd5c69 commit 1538183

9 files changed

Lines changed: 331 additions & 113 deletions

File tree

.env.dist

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
GRAPHDB_GRAPH=
22
GRAPHDB_URL={host}:{port}/repositories/{reponame}
33
GRAPHDB_USER=
4-
GRAPHDB_PASSWORD=
4+
GRAPHDB_PASSWORD=
5+
SEARCH_THRESHOLD=75

CHANGELOG.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,16 @@
22

33
All notable changes to this project will be documented in this file.
44

5+
## [0.1.0] - 2025-07-15
6+
7+
### Added
8+
- New search engine based on Rapidfuzz
9+
- Performance enhanced
10+
- Welcome message
11+
512
## [0.0.1] - 2025-06-25
613

714
### Added
815
- Basic search Based on pyfuzon
916
- Dockerfile for containerization.
10-
- GitHub Actions workflow for automated publishing and releases.
17+
- GitHub Actions workflow for automated publishing and releases.

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ authors = [
66
description = "A microservice for fetching relevant softwares to a certain search term + filters"
77
readme = "README.md"
88
requires-python = ">=3.10"
9-
version = "0.0.1"
9+
version = "0.1.0"
1010

1111
dependencies = [
1212
"fastapi==0.115.11",
@@ -18,6 +18,7 @@ dependencies = [
1818
"requests==2.32.3",
1919
"pydantic==2.10.3",
2020
"pyfuzon==0.4.0",
21+
"RapidFuzz==3.13.0"
2122
]
2223

2324
[dependency-groups]

src/imaging_plaza_search/config.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from rdflib import URIRef
2+
3+
LABEL_PREDICATES_WEIGHTED = [
4+
(URIRef("http://schema.org/name"), 1.0),
5+
# (URIRef("http://www.w3.org/2000/01/rdf-schema#label"), 0.9),
6+
(URIRef("http://schema.org/description"), 0.5),
7+
(URIRef("http://schema.org/featureList"), 0.6),
8+
(URIRef("http://schema.org/programmingLanguage"), 0.5),
9+
(URIRef("http://schema.org/keywords"), 0.5),
10+
(URIRef("https://imaging-plaza.epfl.ch/ontology#relatedToOrganization"), 0.5),
11+
]
Lines changed: 58 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1,116 +1,98 @@
11
from typing import List, Optional
22
from imaging_plaza_search.models import Filter
3-
from SPARQLWrapper import SPARQLWrapper, N3
4-
from urllib.error import HTTPError # Import HTTPError
3+
from SPARQLWrapper import SPARQLWrapper, N3, JSON
4+
from urllib.error import HTTPError
55
from rdflib import Graph
66

77

8-
def get_data_from_graphdb(
9-
db_host: str,
10-
db_user: str,
11-
db_password: str,
12-
filters: Optional[List[Filter]],
13-
graph: str,
14-
) -> str:
15-
"""
16-
Constructs and executes a SPARQL CONSTRUCT query against a GraphDB instance,
17-
using the provided filters, and returns the result as a UTF-8 decoded string.
18-
19-
Parameters:
20-
db_host (str): The GraphDB endpoint URL.
21-
db_user (str): The username for authentication.
22-
db_password (str): The password for authentication.
23-
filters (Optional[List[Filter]]): A list of filter objects to build the query.
24-
graph (str): The graph IRI to query.
25-
26-
Returns:
27-
str: The SPARQL CONSTRUCT query result in N-Triples format as a UTF-8 string.
8+
def build_filter_conditions(filters: Optional[List[Filter]]) -> str:
9+
if not filters:
10+
return ""
2811

29-
Raises:
30-
RuntimeError: If an HTTPError occurs during the SPARQL query execution.
31-
"""
12+
conditions = []
13+
for filter in filters:
14+
values = ", ".join(f'"{val}"' for val in filter.value)
15+
condition = (
16+
f"?s {filter.schema_key} ?{filter.key}. FILTER(?{filter.key} IN ({values}))"
17+
)
18+
conditions.append(condition)
19+
return " ".join(conditions)
3220

33-
is_connected = test_connection(db_host, db_user, db_password)
3421

35-
print(is_connected)
22+
def execute_query(
23+
db_host: str, db_user: str, db_password: str, query: str, return_format: str = "nt"
24+
) -> str:
25+
sparql = SPARQLWrapper(db_host)
26+
sparql.setQuery(query)
27+
sparql.setCredentials(user=db_user, passwd=db_password)
3628

37-
if not filters:
38-
return ""
29+
if return_format == "nt":
30+
sparql.setReturnFormat(N3)
31+
sparql.addCustomHttpHeader("Accept", "application/n-triples")
32+
else:
33+
sparql.setReturnFormat(JSON)
3934

40-
conditions: List[str] = []
41-
for filter in filters:
42-
if filter.value:
43-
values = ", ".join(f'"{val}"' for val in filter.value)
44-
condition = (
45-
f"?s {filter.schema_key} ?{filter.key}. FILTER(?{filter.key} IN ({values}))"
46-
)
47-
conditions.append(condition)
35+
try:
36+
result_bytes = sparql.query().convert()
37+
return (
38+
result_bytes.decode("utf-8")
39+
if isinstance(result_bytes, bytes)
40+
else result_bytes
41+
)
42+
except HTTPError as e:
43+
raise RuntimeError(f"HTTPError during SPARQL query: {e}")
4844

49-
filter_conditions: str = " ".join(conditions)
5045

51-
query: str = f"""
46+
def get_literals_query(graph: str, filters: Optional[List[Filter]]) -> str:
47+
filter_conditions = build_filter_conditions(filters)
48+
return f"""
5249
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
5350
PREFIX schema: <http://schema.org/>
5451
PREFIX imag: <https://imaging-plaza.epfl.ch/ontology#>
5552
PREFIX fuzon: <http://example.org/fuzon#>
53+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
5654
57-
CONSTRUCT
58-
{{
59-
?s fuzon:searchIndexPredicate ?literal .
60-
?s ?p ?o .
61-
?o ?p2 ?o2 .
62-
?o2 ?p3 ?o3 .
55+
CONSTRUCT {{
56+
?s ?p ?o
6357
}}
6458
WHERE {{
6559
GRAPH <{graph}> {{
60+
?s ?p ?o .
6661
?s rdf:type schema:SoftwareSourceCode ;
67-
?p ?o .
68-
69-
OPTIONAL {{
70-
?o ?p2 ?o2 .
71-
OPTIONAL {{ ?o2 ?p3 ?o3 }}
72-
}}
73-
62+
# OPTIONAL {{?s rdfs:label ?label; }}
63+
# OPTIONAL {{?s schema:name ?name ; }}
7464
FILTER(isLiteral(?o))
75-
76-
BIND(str(?o) as ?literal)
65+
.
7766
{filter_conditions}
7867
}}
7968
}}
8069
"""
8170

82-
sparql = SPARQLWrapper(db_host)
83-
sparql.setQuery(query)
84-
sparql.setReturnFormat(N3)
85-
sparql.setCredentials(user=db_user, passwd=db_password)
86-
sparql.addCustomHttpHeader("Accept", "application/n-triples")
8771

88-
try:
89-
result_bytes: bytes = sparql.query().convert()
90-
return result_bytes.decode("utf-8")
91-
92-
except HTTPError as e:
93-
raise RuntimeError(f"HTTPError during SPARQL query: {e}")
72+
def get_subjects_query(graph: str) -> str:
73+
return f"""
74+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
75+
PREFIX schema: <http://schema.org/>
9476
95-
def test_connection(db_host: str, db_user: str, db_password: str) -> bool:
77+
SELECT DISTINCT ?s
78+
WHERE {{
79+
GRAPH <{graph}> {{
80+
?s rdf:type schema:SoftwareSourceCode .
81+
}}
82+
}}
9683
"""
97-
Tests the connection to the GraphDB instance by executing a simple SPARQL query.
9884

99-
Parameters:
100-
db_host (str): The GraphDB endpoint URL.
101-
db_user (str): The username for authentication.
102-
db_password (str): The password for authentication.
10385

104-
Returns:
105-
bool: True if the connection is successful, False otherwise.
106-
"""
86+
def test_connection(db_host: str, db_user: str, db_password: str) -> bool:
10787
sparql = SPARQLWrapper(db_host)
10888
sparql.setCredentials(user=db_user, passwd=db_password)
10989
sparql.setQuery("SELECT ?s WHERE { ?s ?p ?o } LIMIT 1")
110-
sparql.setReturnFormat(N3)
90+
sparql.setReturnFormat(JSON)
11191

11292
try:
11393
sparql.query()
11494
return True
11595
except HTTPError as e:
116-
raise RuntimeError(f"HTTPError during connection test: {e}. Please check that your GRAPHDB_URL is ended in /repositories/imagingplaza")
96+
raise RuntimeError(
97+
f"HTTPError during connection test: {e}. Ensure your GRAPHDB_URL ends in /repositories/..."
98+
)

0 commit comments

Comments
 (0)