Skip to content

Commit 4aa0a55

Browse files
committed
progress
1 parent e01b11c commit 4aa0a55

File tree

5 files changed

+130
-38
lines changed

5 files changed

+130
-38
lines changed

cli/src/main.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,12 @@ fn main() -> Result<()> {
207207
.get_ontology_by_name(iri.as_ref())
208208
.ok_or(anyhow::anyhow!(format!("Ontology {} not found", iri)))?;
209209
let closure = env.get_dependency_closure(ont.id())?;
210-
let graph = env.get_union_graph(&closure, rewrite_sh_prefixes, remove_owl_imports)?;
210+
let (graph, _successful, failed_imports) = env.get_union_graph(&closure, rewrite_sh_prefixes, remove_owl_imports)?;
211+
if let Some(failed_imports) = failed_imports {
212+
for imp in failed_imports {
213+
eprintln!("{}", imp);
214+
}
215+
}
211216
// write the graph to a file
212217
if let Some(destination) = destination {
213218
write_dataset_to_file(&graph, &destination)?;

lib/src/lib.rs

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use std::fs;
3030
use std::io::{BufReader, Write};
3131
use std::path::Path;
3232
use walkdir::WalkDir;
33-
use std::fmt;
33+
use std::fmt::{self, Display};
3434

3535
// custom derive for ontologies field as vec of Ontology
3636
fn ontologies_ser<S>(
@@ -56,6 +56,23 @@ where
5656
Ok(map)
5757
}
5858

59+
pub struct FailedImport {
60+
ontology: GraphIdentifier,
61+
error: String,
62+
}
63+
64+
impl FailedImport {
65+
pub fn new(ontology: GraphIdentifier, error: String) -> Self {
66+
Self { ontology, error }
67+
}
68+
}
69+
70+
impl Display for FailedImport {
71+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
72+
write!(f, "Failed to import ontology {}: {}", self.ontology, self.error)
73+
}
74+
}
75+
5976
pub struct EnvironmentStatus {
6077
// true if there is an environment that ontoenv can find
6178
exists: bool,
@@ -831,25 +848,31 @@ impl OntoEnv {
831848
Ok(closure)
832849
}
833850

834-
/// Returns a graph containing the union of all graphs_ids
851+
/// Returns a graph containing the union of all graphs_ids, along with a list of
852+
/// graphs that could and could not be imported.
835853
pub fn get_union_graph(
836854
&self,
837855
graph_ids: &[GraphIdentifier],
838856
rewrite_sh_prefixes: Option<bool>,
839857
remove_owl_imports: Option<bool>,
840-
// TODO: remove_ontology_declarations
841-
) -> Result<Dataset> {
858+
) -> Result<(Dataset, Vec<GraphIdentifier>, Option<Vec<FailedImport>>)> {
842859
// compute union of all graphs
843860
let mut union: Dataset = Dataset::new();
844861
let store = self.store();
862+
let mut failed_imports: Vec<FailedImport> = vec![];
863+
let mut successful_imports: Vec<GraphIdentifier> = vec![];
845864
for id in graph_ids {
846865
let graphname: NamedOrBlankNode = match id.graphname()? {
847866
GraphName::NamedNode(n) => NamedOrBlankNode::NamedNode(n),
848867
_ => continue,
849868
};
850869

851870
if !store.contains_named_graph(graphname.as_ref())? {
852-
return Err(anyhow::anyhow!("Graph not found: {:?}", id));
871+
failed_imports.push(FailedImport {
872+
ontology: id.clone(),
873+
error: "Graph not found".to_string(),
874+
});
875+
continue;
853876
}
854877

855878
let mut count = 0;
@@ -881,12 +904,9 @@ impl OntoEnv {
881904
ONTOLOGY,
882905
graphname.as_ref(),
883906
);
884-
if !union.remove(to_remove) {
885-
error!("Failed to remove ontology declaration: {:?}", to_remove);
886-
}
907+
union.remove(to_remove);
887908
}
888-
889-
909+
successful_imports.push(id.clone());
890910
info!("Added {} triples from graph: {:?}", count, id);
891911
}
892912
let first_id = graph_ids
@@ -896,15 +916,22 @@ impl OntoEnv {
896916

897917
// Rewrite sh:prefixes
898918
// defaults to true if not specified
899-
if let Some(true) = rewrite_sh_prefixes.or(Some(true)) {
919+
if rewrite_sh_prefixes.unwrap_or(true) {
900920
transform::rewrite_sh_prefixes(&mut union, root_ontology);
901921
}
902922
// remove owl:imports
903-
if let Some(true) = remove_owl_imports.or(Some(true)) {
904-
transform::remove_owl_imports(&mut union)
923+
if remove_owl_imports.unwrap_or(true) {
924+
let to_remove: Vec<NamedNodeRef> = graph_ids.iter().map(|id| id.into()).collect();
925+
println!("Removing owl:imports: {:?}", to_remove);
926+
transform::remove_owl_imports(&mut union, Some(&to_remove));
905927
}
906928
transform::remove_ontology_declarations(&mut union, root_ontology);
907-
Ok(union)
929+
let failed_imports = if failed_imports.is_empty() {
930+
None
931+
} else {
932+
Some(failed_imports)
933+
};
934+
Ok((union, successful_imports, failed_imports))
908935
}
909936

910937
/// Returns a list of issues with the environment

lib/src/ontology.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,18 @@ impl std::fmt::Display for GraphIdentifier {
5555
}
5656
}
5757

58+
impl Into<NamedNode> for GraphIdentifier {
59+
fn into(self) -> NamedNode {
60+
self.name
61+
}
62+
}
63+
64+
impl<'a> Into<NamedNodeRef<'a>> for &'a GraphIdentifier {
65+
fn into(self) -> NamedNodeRef<'a> {
66+
(&self.name).into()
67+
}
68+
}
69+
5870
impl GraphIdentifier {
5971
pub fn new(name: NamedNodeRef) -> Self {
6072
// location is same as name

lib/src/transform.rs

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::consts::{DECLARE, IMPORTS, ONTOLOGY, PREFIXES, TYPE};
2-
use oxigraph::model::{Dataset, Graph, Quad, QuadRef, SubjectRef, Triple, TripleRef};
2+
use oxigraph::model::{Dataset, Graph, Quad, QuadRef, SubjectRef, Triple, TripleRef, NamedNodeRef, TermRef};
33

44
/// Rewrites all sh:prefixes in the graph to point to the provided root
55
pub fn rewrite_sh_prefixes(graph: &mut Dataset, root: SubjectRef) {
@@ -68,13 +68,24 @@ pub fn rewrite_sh_prefixes_graph(graph: &mut Graph, root: SubjectRef) {
6868

6969
/// Remove owl:imports statements from a graph. Can be helpful to do after computing the union of
7070
/// all imports so that downstream tools do not attempt to fetch these graph dependencies
71-
/// themselves
72-
pub fn remove_owl_imports(graph: &mut Dataset) {
73-
// remove owl:imports
74-
let mut to_remove: Vec<Quad> = vec![];
75-
for quad in graph.quads_for_predicate(IMPORTS) {
76-
to_remove.push(quad.into());
77-
}
71+
/// themselves. If ontologies_to_remove is provided, only remove owl:imports to those ontologies
72+
pub fn remove_owl_imports(graph: &mut Dataset, ontologies_to_remove: Option<&[NamedNodeRef]>) {
73+
let to_remove: Vec<Quad> = graph.quads_for_predicate(IMPORTS)
74+
.filter_map(|quad| {
75+
match quad.object {
76+
TermRef::NamedNode(obj) => {
77+
if ontologies_to_remove.map_or(true, |ontologies| ontologies.contains(&obj)) {
78+
Some(quad.into())
79+
} else {
80+
None
81+
}
82+
}
83+
_ => None
84+
}
85+
})
86+
.collect();
87+
88+
// Remove the collected quads
7889
for quad in to_remove {
7990
graph.remove(quad.as_ref());
8091
}
@@ -83,14 +94,25 @@ pub fn remove_owl_imports(graph: &mut Dataset) {
8394
/// Remove owl:imports statements from a graph. Can be helpful to do after computing the union of
8495
/// all imports so that downstream tools do not attempt to fetch these graph dependencies
8596
/// themselves
86-
pub fn remove_owl_imports_graph(graph: &mut Graph) {
87-
// remove owl:imports
88-
let mut to_remove: Vec<Triple> = vec![];
89-
for triple in graph.triples_for_predicate(IMPORTS) {
90-
to_remove.push(triple.into());
91-
}
92-
for triple in to_remove {
93-
graph.remove(triple.as_ref());
97+
pub fn remove_owl_imports_graph(graph: &mut Graph, ontologies_to_remove: Option<&[NamedNodeRef]>) {
98+
let to_remove: Vec<Triple> = graph.triples_for_predicate(IMPORTS)
99+
.filter_map(|triple| {
100+
match triple.object {
101+
TermRef::NamedNode(obj) => {
102+
if ontologies_to_remove.map_or(true, |ontologies| ontologies.contains(&obj)) {
103+
Some(triple.into())
104+
} else {
105+
None
106+
}
107+
}
108+
_ => None
109+
}
110+
})
111+
.collect();
112+
113+
// Remove the collected quads
114+
for quad in to_remove {
115+
graph.remove(quad.as_ref());
94116
}
95117
}
96118

python/src/lib.rs

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#![feature(once_cell_try)]
22
use ::ontoenv as ontoenvrs;
3-
use ::ontoenv::consts::{ONTOLOGY, TYPE};
3+
use ::ontoenv::consts::{ONTOLOGY, TYPE, IMPORTS};
44
use ::ontoenv::ontology::OntologyLocation;
55
use ::ontoenv::transform;
66
use anyhow::Error;
@@ -118,14 +118,15 @@ fn term_to_python<'a>(
118118
}
119119

120120
#[pyclass]
121+
#[derive(Clone)]
121122
struct Config {
122123
cfg: ontoenvrs::config::Config,
123124
}
124125

125126
#[pymethods]
126127
impl Config {
127128
#[new]
128-
#[pyo3(signature = (search_directories=None, require_ontology_names=false, strict=false, offline=false, resolution_policy="default".to_owned(), root=".".to_owned(), includes=vec![], excludes=vec![]))]
129+
#[pyo3(signature = (search_directories=None, require_ontology_names=false, strict=false, offline=false, resolution_policy="default".to_owned(), root=".".to_owned(), includes=None, excludes=None))]
129130
fn new(
130131
search_directories: Option<Vec<String>>,
131132
require_ontology_names: bool,
@@ -173,10 +174,10 @@ struct OntoEnv {
173174
#[pymethods]
174175
impl OntoEnv {
175176
#[new]
176-
#[pyo3(signature = (config=None, path=Path::new(".").to_owned(), recreate=false, read_only=false))]
177+
#[pyo3(signature = (config=None, path=Some(Path::new(".").to_owned()), recreate=false, read_only=false))]
177178
fn new(
178179
_py: Python,
179-
config: Option<&Config>,
180+
config: Option<Config>,
180181
path: Option<PathBuf>,
181182
recreate: bool,
182183
read_only: bool,
@@ -280,7 +281,8 @@ impl OntoEnv {
280281
transform::rewrite_sh_prefixes_graph(&mut graph, base_ontology);
281282
transform::remove_ontology_declarations_graph(&mut graph, base_ontology);
282283
}
283-
transform::remove_owl_imports_graph(&mut graph);
284+
// remove the owl:import statement for the 'uri' ontology
285+
transform::remove_owl_imports_graph(&mut graph, Some(&[(&iri).into()]));
284286

285287
Python::with_gil(|_py| {
286288
for triple in graph.into_iter() {
@@ -304,6 +306,7 @@ impl OntoEnv {
304306
Ok(())
305307
}
306308

309+
/// List the ontologies in the imports closure of the given ontology
307310
#[pyo3(signature = (uri))]
308311
fn list_closure(&self, py: Python, uri: &str) -> PyResult<Vec<String>> {
309312
let iri = NamedNode::new(uri)
@@ -320,6 +323,9 @@ impl OntoEnv {
320323
Ok(names)
321324
}
322325

326+
/// Merge all graphs in the imports closure of the given ontology into a single graph. If
327+
/// destination_graph is provided, add the merged graph to the destination_graph. If not,
328+
/// return the merged graph.
323329
#[pyo3(signature = (uri, destination_graph=None, rewrite_sh_prefixes=false, remove_owl_imports=false))]
324330
fn get_closure<'a>(
325331
&self,
@@ -345,7 +351,7 @@ impl OntoEnv {
345351
Some(g) => g.clone(),
346352
None => rdflib.getattr("Graph")?.call0()?,
347353
};
348-
let graph = env
354+
let (graph, successful_imports, failed_imports) = env
349355
.get_union_graph(
350356
&closure,
351357
Some(rewrite_sh_prefixes),
@@ -357,7 +363,6 @@ impl OntoEnv {
357363
let s: Term = triple.subject.into();
358364
let p: Term = triple.predicate.into();
359365
let o: Term = triple.object.into();
360-
361366
let t = PyTuple::new(
362367
py,
363368
&[
@@ -368,10 +373,24 @@ impl OntoEnv {
368373
)?;
369374
destination_graph.getattr("add")?.call1((t,))?;
370375
}
376+
377+
// Remove each successful_imports url in the closure from the destination_graph
378+
if remove_owl_imports {
379+
for graphid in successful_imports {
380+
let iri = term_to_python(py, &rdflib, Term::NamedNode(graphid.into()))?;
381+
let pred = term_to_python(py, &rdflib, IMPORTS.into())?;
382+
// remove triples with (None, pred, iri)
383+
let remove_tuple = PyTuple::new(py, &[py.None(), pred.into(), iri.into()])?;
384+
destination_graph.getattr("remove")?.call1((remove_tuple,))?;
385+
}
386+
}
387+
388+
// Remove each url in the closure from the destination_graph
371389
return Ok::<Bound<'_, PyAny>, PyErr>(destination_graph);
372390
})
373391
}
374392

393+
/// Print the contents of the OntoEnv
375394
#[pyo3(signature = (includes=None))]
376395
fn dump(&self, py: Python, includes: Option<String>) -> PyResult<()> {
377396
let inner = self.inner.clone();
@@ -380,6 +399,8 @@ impl OntoEnv {
380399
Ok(())
381400
}
382401

402+
/// Import the dependencies of the given graph into the graph. Removes the owl:imports
403+
/// of all imported ontologies.
383404
fn import_dependencies<'a>(
384405
&self,
385406
py: Python<'a>,
@@ -401,6 +422,7 @@ impl OntoEnv {
401422
self.get_closure(py, &ontology, Some(graph), true, true)
402423
}
403424

425+
/// Add a new ontology to the OntoEnv
404426
fn add(&self, location: &Bound<'_, PyAny>) -> PyResult<()> {
405427
let inner = self.inner.clone();
406428
let mut env = inner.lock().unwrap();
@@ -411,6 +433,8 @@ impl OntoEnv {
411433
Ok(())
412434
}
413435

436+
/// Refresh the OntoEnv by re-loading all remote graphs and loading
437+
/// any local graphs which have changed since the last update
414438
fn refresh(&self) -> PyResult<()> {
415439
let inner = self.inner.clone();
416440
let mut env = inner.lock().unwrap();
@@ -419,6 +443,7 @@ impl OntoEnv {
419443
Ok(())
420444
}
421445

446+
/// Export the graph with the given URI to an rdflib.Graph
422447
fn get_graph(&self, py: Python, uri: &Bound<'_, PyString>) -> PyResult<Py<PyAny>> {
423448
let rdflib = py.import("rdflib")?;
424449
let iri = NamedNode::new(uri.to_string())
@@ -451,6 +476,7 @@ impl OntoEnv {
451476
Ok(res.into())
452477
}
453478

479+
/// Get the names of all ontologies in the OntoEnv
454480
fn get_ontology_names(&self) -> PyResult<Vec<String>> {
455481
let inner = self.inner.clone();
456482
let env = inner.lock().unwrap();

0 commit comments

Comments
 (0)