Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: openMINDS_OWL_build_pipeline

on:
push:
branches:
- pipeline
workflow_dispatch: # This triggers the workflow when a webhook is received

jobs:
build:
runs-on: ubuntu-latest
steps:

- name: Checkout Repository
uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v5

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: 3.13

- name: Run build
run: |
uv pip install --system -r requirements.txt
python build.py

- name: Checkout main branch
uses: actions/checkout@v3
with:
ref: main
path: main
token: ${{ secrets.GITHUB_TOKEN }}

- name: Push to main
run: |
cp -R target/* main
cd main
git config --global user.email "support@openmetadatainitiative.org"
git config --global user.name "openMINDS pipeline"
if [[ $(git add . --dry-run | wc -l) -gt 0 ]]; then
git add .
git commit -m "build triggered by ${{ github.event_name }}"
git push -f
else
echo "Nothing to commit"
fi
24 changes: 24 additions & 0 deletions build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import os.path
import shutil

from pipeline.translator import OWLSchemaBuilder
from pipeline.utils import clone_sources, SchemaLoader

print("***************************************")
print(f"Triggering the generation of OWL for openMINDS")
print("***************************************")

# Step 1 - clone central repository in main branch to get the latest sources
clone_sources()
schema_loader = SchemaLoader()
if os.path.exists("target"):
shutil.rmtree("target")

for schema_version in schema_loader.get_schema_versions():

# Step 2 - find all involved schemas for the current version
schemas_file_paths = schema_loader.find_schemas(schema_version)

for schema_file_path in schemas_file_paths:
# Step 3 - translate and build each openMINDS schema as JSON-Schema
OWLSchemaBuilder(schema_file_path, schema_loader.schemas_sources).build()
Empty file added pipeline/__init__.py
Empty file.
141 changes: 141 additions & 0 deletions pipeline/translator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import os
import os.path

from pipeline.utils import load_json

from rdflib import Graph, Namespace, Literal, URIRef, BNode
from rdflib.collection import Collection
from rdflib.namespace import FOAF, RDF, RDFS, OWL, XSD
from typing import List, Dict

class OWLSchemaBuilder(object):
def __init__(self, schema_file_path:str, root_path:str):
_relative_path_without_extension = schema_file_path[len(root_path)+1:].replace(".schema.omi.json", "").split("/")
self.version = _relative_path_without_extension[0]
self.relative_path_without_extension = _relative_path_without_extension[1:]
self.graph = Graph()
self.properties_file = load_json(os.path.join(os.path.realpath("."), "sources", "vocab", "properties.json"))
self._schema_payload = load_json(schema_file_path)
self.class_uri = URIRef(self._schema_payload["_type"])

def _target_file_without_extension(self) -> str:
return os.path.join(self.version, "/".join(self.relative_path_without_extension))

def _restriction_mutiple_range(self, prop_uri:URIRef, prop_range:List, prop_spec:Dict, required:bool):
restriction = BNode()
self.graph.add((self.class_uri, RDFS.subClassOf, restriction))
self.graph.add((restriction, RDF.type, OWL.Restriction))
self.graph.add((restriction, OWL.onProperty, prop_uri))
if len(prop_range) == 1:
self.graph.add((restriction, OWL.allValuesFrom, URIRef(prop_range[0])))
else:
all_values_from = BNode()
self.graph.add((restriction, OWL.allValuesFrom, all_values_from))
union_list_node = BNode()
Collection(self.graph, union_list_node, [URIRef(embedded_type) for embedded_type in prop_range])
self.graph.add((all_values_from, RDF.type, OWL.Class))
self.graph.add((all_values_from, OWL.unionOf, union_list_node))
if ('type' in prop_spec and prop_spec['type'] != 'array') or ('type' not in prop_spec):
self.graph.add((restriction, OWL.maxCardinality, Literal(1, datatype=XSD.nonNegativeInteger)))
#elif ('type' in prop_spec and prop_spec['type'] != 'array') and (len(linked_types := prop_spec.get('_linkedTypes') or []) == 1 or len(embedded_types := prop_spec.get('_embeddedTypes') or []) == 1):
# self.graph.add((prop_uri, RDFS.range, URIRef((linked_types or embedded_types)[0])))

if required:
self.graph.add((self.class_uri, RDFS.subClassOf, restriction))
self.graph.add((restriction, RDF.type, OWL.Restriction))
self.graph.add((restriction, OWL.onProperty, URIRef(prop_uri)))
self.graph.add((restriction, OWL.minCardinality, Literal(1, datatype=XSD.nonNegativeInteger)))

def _translate_property_specifications(self, prop_uri:URIRef, prop_spec:Dict, required:bool):
prop_uri = URIRef(prop_uri)
if '_linkedTypes' in prop_spec:
self.graph.add((prop_uri, RDF.type, OWL.ObjectProperty))
self._restriction_mutiple_range(prop_uri, prop_spec['_linkedTypes'], prop_spec, required)

elif '_embeddedTypes' in prop_spec:
self.graph.add((prop_uri, RDF.type, OWL.ObjectProperty))
self._restriction_mutiple_range(prop_uri, prop_spec['_embeddedTypes'], prop_spec, required)

elif 'type' in prop_spec and prop_spec['type'] in ['string', 'number', 'array']:
self.graph.add((prop_uri, RDF.type, OWL.DatatypeProperty))
if prop_spec['type'] == 'string':
# TODO include list of _formats and other datatypes
if '_formats' in prop_spec and 'date' in prop_spec['_formats'] and len(prop_spec['_formats']) == 1:
self.graph.add((prop_uri, RDFS.range, XSD.date))
else: # IRI not represented in OWL as datatype (xsd:anyURI not suitable)
self.graph.add((prop_uri, RDFS.range, XSD.string))
elif prop_spec['type'] == 'number':
self.graph.add((prop_uri, RDFS.range, XSD.decimal))
elif prop_spec['type'] == 'array':
self.graph.add((prop_uri, RDFS.range, RDF.List))
restriction = BNode()
self.graph.add((self.class_uri, RDFS.subClassOf, restriction))
self.graph.add((restriction, RDF.type, OWL.Restriction))
self.graph.add((restriction, OWL.onProperty, prop_uri))
self.graph.add((restriction, OWL.maxCardinality, Literal(1, datatype=XSD.nonNegativeInteger)))

if required:
restriction = BNode()
self.graph.add((self.class_uri, RDFS.subClassOf, restriction))
self.graph.add((restriction, RDF.type, OWL.Restriction))
self.graph.add((restriction, OWL.onProperty, URIRef(prop_uri)))
self.graph.add((restriction, OWL.minCardinality, Literal(1, datatype=XSD.nonNegativeInteger)))
pass

self.graph.add((prop_uri, RDFS.label, Literal(prop_spec['label'])))
if 'description' in prop_spec:
self.graph.add((prop_uri, RDFS.comment, Literal(prop_spec['description'])))
#self.graph.add((prop_uri, FOAF.name, Literal(prop_spec['name'])))

return

def translate(self):
self.graph.add((self.class_uri, RDF.type, OWL.Class))
self.graph.add((self.class_uri, RDFS.label, Literal(self._schema_payload['label'])))
if 'description' in self._schema_payload:
self.graph.add((self.class_uri, RDFS.comment, Literal(self._schema_payload['description'])))
#self.graph.add((self.class_uri, FOAF.name, Literal(self._schema_payload['name'])))

if "properties" in self._schema_payload and self._schema_payload["properties"]:
for prop_uri, prop_spec in self._schema_payload['properties'].items():
required = False
if "required" in self._schema_payload and self._schema_payload["required"]:
if prop_uri in self._schema_payload['required']:
required=True
self._translate_property_specifications(prop_uri, prop_spec, required)

#self.graph.add((URIRef(prop_uri), RDFS.domain, OWL.Thing))
if prop_uri.split('/')[-1] in self.properties_file:
# property rdfs:domain
if len(self.properties_file[prop_uri.split('/')[-1]]['usedIn'][self.version]) > 1:
property_domain = BNode()
self.graph.add((URIRef(prop_uri), RDFS.domain, property_domain))
union_list_node = BNode()
Collection(self.graph, union_list_node, [URIRef(domain_type) for domain_type in self.properties_file[prop_uri.split('/')[-1]]['usedIn'][self.version]])
self.graph.add((property_domain, RDF.type, OWL.Class))
self.graph.add((property_domain, OWL.unionOf, union_list_node))
else:
self.graph.add((URIRef(prop_uri), RDFS.domain, URIRef(self.properties_file[prop_uri.split('/')[-1]]['usedIn'][self.version][0])))
# property rdfs:range
if 'asEdge' in self.properties_file[prop_uri.split('/')[-1]] and len(self.properties_file[prop_uri.split('/')[-1]]['asEdge']['canPointTo'].get(self.version, [])) > 1:
property_range = BNode()
self.graph.add((URIRef(prop_uri), RDFS.range, property_range))
union_list_node = BNode()
Collection(self.graph, union_list_node, [URIRef(range_type) for range_type in self.properties_file[prop_uri.split('/')[-1]]['asEdge']['canPointTo'][self.version]])
self.graph.add((property_range, RDF.type, OWL.Class))
self.graph.add((property_range, OWL.unionOf, union_list_node))
elif 'asEdge' in self.properties_file[prop_uri.split('/')[-1]] and self.properties_file[prop_uri.split('/')[-1]]['asEdge']['canPointTo'].get(self.version):
self.graph.add((URIRef(prop_uri), RDFS.range, URIRef(self.properties_file[prop_uri.split('/')[-1]]['asEdge']['canPointTo'][self.version][0])))
return

def build(self):
target_file = os.path.join("target", "schemas", "Turtle", f"{self._target_file_without_extension()}.owl")
os.makedirs(os.path.dirname(target_file), exist_ok=True)
self.translate()
self.graph.serialize(destination=target_file, format="ttl")
target_file = os.path.join("target", "schemas", "RDF-XML", f"{self._target_file_without_extension()}.xml")
os.makedirs(os.path.dirname(target_file), exist_ok=True)
self.graph.serialize(destination=target_file, format="xml")
target_file = os.path.join("target", "schemas", "JSON-LD", f"{self._target_file_without_extension()}.jsonld")
os.makedirs(os.path.dirname(target_file), exist_ok=True)
self.graph.serialize(destination=target_file, format="json-ld")
29 changes: 29 additions & 0 deletions pipeline/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import glob
import os
import json
import shutil
from typing import List

from git import Repo, GitCommandError

def clone_sources():
if os.path.exists("sources"):
shutil.rmtree("sources")
Repo.clone_from("https://github.com/openMetadataInitiative/openMINDS.git", to_path="sources", depth=1)

def load_json(path):
with open(path) as f:
json_file = json.load(f)
return json_file

class SchemaLoader(object):

def __init__(self):
self._root_directory = os.path.realpath(".")
self.schemas_sources = os.path.join(self._root_directory, "sources", "schemas")

def get_schema_versions(self) -> List[str]:
return os.listdir(self.schemas_sources)

def find_schemas(self, version:str) -> List[str]:
return glob.glob(os.path.join(self.schemas_sources, version, f'**/*.schema.omi.json'), recursive=True)
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
gitpython
rdflib