Skip to content

Commit 1d94a74

Browse files
committed
chore: refactor repo; rename main.py into strings2things; modularize
1 parent 62a576e commit 1d94a74

15 files changed

Lines changed: 167 additions & 185 deletions

File tree

justfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ test *args:
5757

5858
# Run an executable.
5959
run *args:
60-
uv run cli "$@"
60+
uv run --env-file=.env cli "$@"
6161

6262
# Run the Jupyter notebook.
6363
notebook *args:

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[project]
2-
name = "main.py"
2+
name = "strings2things"
33
authors = [
44
{ name = "Robin Franken", email = "robin.franken@epfl.ch" },
55
{ name = "Martin Fontanet", email = "martin.fontanet@epfl.ch" },
@@ -22,7 +22,7 @@ dev = [
2222
]
2323

2424
[project.scripts]
25-
cli = 'main.py.cli:main'
25+
cli = 'strings2things.cli:main'
2626

2727
[build-system]
2828
requires = ["hatchling"]
-570 Bytes
Binary file not shown.

src/main.py/cli.py

Lines changed: 0 additions & 6 deletions
This file was deleted.

src/main.py/main.py

Lines changed: 0 additions & 145 deletions
This file was deleted.

src/strings2things/cli.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
from strings2things.format import append_input_term
2+
from strings2things.sparql import enumeration_query, find_matches_query, find_predicate_query
3+
from pyfuzon.matcher import TermMatcher
4+
import os
5+
import json
6+
import rdflib
7+
8+
MATCH_THRESHOLD = 0.8
9+
10+
def main():
11+
knowledge_graph_path = os.getenv("KNOWLEDGE_GRAPH_PATH")
12+
ontologies_path = os.getenv("ONTOLOGIES_PATH")
13+
14+
onto = rdflib.Graph()
15+
onto.parse(ontologies_path)
16+
17+
# SPARQLwrapper
18+
dataset = rdflib.Dataset()
19+
20+
#FIXME: Is `data` used?
21+
data = dataset.graph("https://imaging-plaza.epfl.ch/finalGraph")
22+
data.parse(knowledge_graph_path)
23+
# Load the knowledge graph
24+
25+
# todo filter down ontology to only get triples related to enumerations
26+
27+
# Filter down ontology to only get triples related to enumerations
28+
29+
enumeration_results = onto.query(enumeration_query)
30+
# Create a new graph to store the enumeration triples
31+
enumeration_graph = rdflib.Graph()
32+
33+
34+
# Add the results of the CONSTRUCT query to the new graph
35+
for triple in enumeration_results.graph:
36+
enumeration_graph.add(triple)
37+
38+
#FIXME: Is `enum` used?
39+
enum = dataset.graph("https://imaging-plaza.epfl.ch/ontology#enums")
40+
enum.parse(data=enumeration_graph.serialize(format="turtle"), format="turtle")
41+
42+
results = dataset.query(find_matches_query)
43+
44+
# Create a new graph to store the constructed triples
45+
constructed_graph = rdflib.Graph()
46+
47+
# Add the results of the CONSTRUCT query to the new graph
48+
for triple in results.graph:
49+
constructed_graph.add(triple)
50+
51+
52+
matcher = TermMatcher.from_files([ontologies_path])
53+
54+
inputdict = {}
55+
for term in constructed_graph.query(find_predicate_query):
56+
searchterm = term[0]
57+
predicate = term[1]
58+
if sorted(matcher.score(searchterm), reverse=True)[0] / len(searchterm) > MATCH_THRESHOLD:
59+
suggestedterm = matcher.top(searchterm, 1)[0]
60+
print(suggestedterm.uri)
61+
append_input_term(inputdict, str(searchterm), str(predicate), suggestedterm.uri)
62+
else :
63+
append_input_term(inputdict, str(searchterm), str(predicate), None)
64+
65+
json_input = json.dumps(inputdict)
66+
67+
print(json_input)
68+
69+
# TODO: create enums list
70+
# TODO: call LLM
71+
72+
73+
if __name__ == "__main__":
74+
main()

src/strings2things/graph.py

Whitespace-only changes.

0 commit comments

Comments
 (0)