Skip to content

Commit 5561be4

Browse files
authored
Merge pull request #5 from gtfierro/gtf-python-fixes
Gtf python fixes
2 parents 4cde1e0 + 4aa0a55 commit 5561be4

File tree

7 files changed

+223
-72
lines changed

7 files changed

+223
-72
lines changed

cli/src/main.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,12 @@ fn main() -> Result<()> {
207207
.get_ontology_by_name(iri.as_ref())
208208
.ok_or(anyhow::anyhow!(format!("Ontology {} not found", iri)))?;
209209
let closure = env.get_dependency_closure(ont.id())?;
210-
let graph = env.get_union_graph(&closure, rewrite_sh_prefixes, remove_owl_imports)?;
210+
let (graph, _successful, failed_imports) = env.get_union_graph(&closure, rewrite_sh_prefixes, remove_owl_imports)?;
211+
if let Some(failed_imports) = failed_imports {
212+
for imp in failed_imports {
213+
eprintln!("{}", imp);
214+
}
215+
}
211216
// write the graph to a file
212217
if let Some(destination) = destination {
213218
write_dataset_to_file(&graph, &destination)?;

lib/src/lib.rs

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use std::fs;
3030
use std::io::{BufReader, Write};
3131
use std::path::Path;
3232
use walkdir::WalkDir;
33-
use std::fmt;
33+
use std::fmt::{self, Display};
3434

3535
// custom derive for ontologies field as vec of Ontology
3636
fn ontologies_ser<S>(
@@ -56,6 +56,23 @@ where
5656
Ok(map)
5757
}
5858

59+
pub struct FailedImport {
60+
ontology: GraphIdentifier,
61+
error: String,
62+
}
63+
64+
impl FailedImport {
65+
pub fn new(ontology: GraphIdentifier, error: String) -> Self {
66+
Self { ontology, error }
67+
}
68+
}
69+
70+
impl Display for FailedImport {
71+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
72+
write!(f, "Failed to import ontology {}: {}", self.ontology, self.error)
73+
}
74+
}
75+
5976
pub struct EnvironmentStatus {
6077
// true if there is an environment that ontoenv can find
6178
exists: bool,
@@ -831,25 +848,31 @@ impl OntoEnv {
831848
Ok(closure)
832849
}
833850

834-
/// Returns a graph containing the union of all graphs_ids
851+
/// Returns a graph containing the union of all graphs_ids, along with a list of
852+
/// graphs that could and could not be imported.
835853
pub fn get_union_graph(
836854
&self,
837855
graph_ids: &[GraphIdentifier],
838856
rewrite_sh_prefixes: Option<bool>,
839857
remove_owl_imports: Option<bool>,
840-
// TODO: remove_ontology_declarations
841-
) -> Result<Dataset> {
858+
) -> Result<(Dataset, Vec<GraphIdentifier>, Option<Vec<FailedImport>>)> {
842859
// compute union of all graphs
843860
let mut union: Dataset = Dataset::new();
844861
let store = self.store();
862+
let mut failed_imports: Vec<FailedImport> = vec![];
863+
let mut successful_imports: Vec<GraphIdentifier> = vec![];
845864
for id in graph_ids {
846865
let graphname: NamedOrBlankNode = match id.graphname()? {
847866
GraphName::NamedNode(n) => NamedOrBlankNode::NamedNode(n),
848867
_ => continue,
849868
};
850869

851870
if !store.contains_named_graph(graphname.as_ref())? {
852-
return Err(anyhow::anyhow!("Graph not found: {:?}", id));
871+
failed_imports.push(FailedImport {
872+
ontology: id.clone(),
873+
error: "Graph not found".to_string(),
874+
});
875+
continue;
853876
}
854877

855878
let mut count = 0;
@@ -881,12 +904,9 @@ impl OntoEnv {
881904
ONTOLOGY,
882905
graphname.as_ref(),
883906
);
884-
if !union.remove(to_remove) {
885-
error!("Failed to remove ontology declaration: {:?}", to_remove);
886-
}
907+
union.remove(to_remove);
887908
}
888-
889-
909+
successful_imports.push(id.clone());
890910
info!("Added {} triples from graph: {:?}", count, id);
891911
}
892912
let first_id = graph_ids
@@ -896,15 +916,22 @@ impl OntoEnv {
896916

897917
// Rewrite sh:prefixes
898918
// defaults to true if not specified
899-
if let Some(true) = rewrite_sh_prefixes.or(Some(true)) {
919+
if rewrite_sh_prefixes.unwrap_or(true) {
900920
transform::rewrite_sh_prefixes(&mut union, root_ontology);
901921
}
902922
// remove owl:imports
903-
if let Some(true) = remove_owl_imports.or(Some(true)) {
904-
transform::remove_owl_imports(&mut union)
923+
if remove_owl_imports.unwrap_or(true) {
924+
let to_remove: Vec<NamedNodeRef> = graph_ids.iter().map(|id| id.into()).collect();
925+
println!("Removing owl:imports: {:?}", to_remove);
926+
transform::remove_owl_imports(&mut union, Some(&to_remove));
905927
}
906928
transform::remove_ontology_declarations(&mut union, root_ontology);
907-
Ok(union)
929+
let failed_imports = if failed_imports.is_empty() {
930+
None
931+
} else {
932+
Some(failed_imports)
933+
};
934+
Ok((union, successful_imports, failed_imports))
908935
}
909936

910937
/// Returns a list of issues with the environment

lib/src/ontology.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,18 @@ impl std::fmt::Display for GraphIdentifier {
5555
}
5656
}
5757

58+
impl Into<NamedNode> for GraphIdentifier {
59+
fn into(self) -> NamedNode {
60+
self.name
61+
}
62+
}
63+
64+
impl<'a> Into<NamedNodeRef<'a>> for &'a GraphIdentifier {
65+
fn into(self) -> NamedNodeRef<'a> {
66+
(&self.name).into()
67+
}
68+
}
69+
5870
impl GraphIdentifier {
5971
pub fn new(name: NamedNodeRef) -> Self {
6072
// location is same as name

lib/src/transform.rs

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::consts::{DECLARE, IMPORTS, ONTOLOGY, PREFIXES, TYPE};
2-
use oxigraph::model::{Dataset, Graph, Quad, QuadRef, SubjectRef, Triple, TripleRef};
2+
use oxigraph::model::{Dataset, Graph, Quad, QuadRef, SubjectRef, Triple, TripleRef, NamedNodeRef, TermRef};
33

44
/// Rewrites all sh:prefixes in the graph to point to the provided root
55
pub fn rewrite_sh_prefixes(graph: &mut Dataset, root: SubjectRef) {
@@ -68,13 +68,24 @@ pub fn rewrite_sh_prefixes_graph(graph: &mut Graph, root: SubjectRef) {
6868

6969
/// Remove owl:imports statements from a graph. Can be helpful to do after computing the union of
7070
/// all imports so that downstream tools do not attempt to fetch these graph dependencies
71-
/// themselves
72-
pub fn remove_owl_imports(graph: &mut Dataset) {
73-
// remove owl:imports
74-
let mut to_remove: Vec<Quad> = vec![];
75-
for quad in graph.quads_for_predicate(IMPORTS) {
76-
to_remove.push(quad.into());
77-
}
71+
/// themselves. If ontologies_to_remove is provided, only remove owl:imports to those ontologies
72+
pub fn remove_owl_imports(graph: &mut Dataset, ontologies_to_remove: Option<&[NamedNodeRef]>) {
73+
let to_remove: Vec<Quad> = graph.quads_for_predicate(IMPORTS)
74+
.filter_map(|quad| {
75+
match quad.object {
76+
TermRef::NamedNode(obj) => {
77+
if ontologies_to_remove.map_or(true, |ontologies| ontologies.contains(&obj)) {
78+
Some(quad.into())
79+
} else {
80+
None
81+
}
82+
}
83+
_ => None
84+
}
85+
})
86+
.collect();
87+
88+
// Remove the collected quads
7889
for quad in to_remove {
7990
graph.remove(quad.as_ref());
8091
}
@@ -83,14 +94,25 @@ pub fn remove_owl_imports(graph: &mut Dataset) {
8394
/// Remove owl:imports statements from a graph. Can be helpful to do after computing the union of
8495
/// all imports so that downstream tools do not attempt to fetch these graph dependencies
8596
/// themselves
86-
pub fn remove_owl_imports_graph(graph: &mut Graph) {
87-
// remove owl:imports
88-
let mut to_remove: Vec<Triple> = vec![];
89-
for triple in graph.triples_for_predicate(IMPORTS) {
90-
to_remove.push(triple.into());
91-
}
92-
for triple in to_remove {
93-
graph.remove(triple.as_ref());
97+
pub fn remove_owl_imports_graph(graph: &mut Graph, ontologies_to_remove: Option<&[NamedNodeRef]>) {
98+
let to_remove: Vec<Triple> = graph.triples_for_predicate(IMPORTS)
99+
.filter_map(|triple| {
100+
match triple.object {
101+
TermRef::NamedNode(obj) => {
102+
if ontologies_to_remove.map_or(true, |ontologies| ontologies.contains(&obj)) {
103+
Some(triple.into())
104+
} else {
105+
None
106+
}
107+
}
108+
_ => None
109+
}
110+
})
111+
.collect();
112+
113+
// Remove the collected quads
114+
for quad in to_remove {
115+
graph.remove(quad.as_ref());
94116
}
95117
}
96118

lib/src/util.rs

Lines changed: 38 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use anyhow::Result;
22

3-
use std::io::Read;
3+
use std::io::{Read, Seek};
44
use std::path::Path;
55

66
use reqwest::header::CONTENT_TYPE;
@@ -58,6 +58,42 @@ pub fn read_file(file: &Path) -> Result<OxigraphGraph> {
5858
Ok(graph)
5959
}
6060

61+
fn read_format<T: Read + Seek>(mut original_content: BufReader<T>, format: Option<RdfFormat>) -> Result<OxigraphGraph> {
62+
let format = format.unwrap_or(RdfFormat::Turtle);
63+
for format in [
64+
format,
65+
RdfFormat::Turtle,
66+
RdfFormat::RdfXml,
67+
RdfFormat::NTriples,
68+
] {
69+
let content = original_content.get_mut();
70+
content.rewind()?;
71+
let parser = RdfParser::from_format(format);
72+
let mut graph = OxigraphGraph::new();
73+
let parser = parser.for_reader(content);
74+
75+
// Process each quad from the parser
76+
for quad in parser {
77+
match quad {
78+
Ok(q) => {
79+
let triple = Triple::new(q.subject, q.predicate, q.object);
80+
graph.insert(&triple);
81+
}
82+
Err(_) => {
83+
// Break the outer loop if an error occurs
84+
break;
85+
}
86+
}
87+
}
88+
89+
// If we successfully processed quads and did not encounter an error
90+
if !graph.is_empty() {
91+
return Ok(graph);
92+
}
93+
}
94+
Err(anyhow::anyhow!("Failed to parse graph"))
95+
}
96+
6197
pub fn read_url(file: &str) -> Result<OxigraphGraph> {
6298
debug!("Reading url: {}", file);
6399

@@ -83,39 +119,7 @@ pub fn read_url(file: &str) -> Result<OxigraphGraph> {
83119
});
84120

85121
let content: BufReader<_> = BufReader::new(std::io::Cursor::new(resp.bytes()?));
86-
87-
// if content type is known, use it to parse the graph
88-
if let Some(format) = content_type {
89-
let parser = RdfParser::from_format(format);
90-
let mut graph = OxigraphGraph::new();
91-
let parser = parser.for_reader(content);
92-
for quad in parser {
93-
let quad = quad?;
94-
let triple = Triple::new(quad.subject, quad.predicate, quad.object);
95-
graph.insert(&triple);
96-
}
97-
return Ok(graph);
98-
}
99-
100-
// if content type is unknown, try all formats. Requires us to make a copy of the content
101-
// since we can't rewind the reader
102-
let content_vec: Vec<u8> = content.bytes().map(|b| b.unwrap()).collect();
103-
104-
for format in [RdfFormat::Turtle, RdfFormat::RdfXml, RdfFormat::NTriples] {
105-
let vcontent = BufReader::new(std::io::Cursor::new(&content_vec));
106-
let parser = RdfParser::from_format(format);
107-
let mut graph = OxigraphGraph::new();
108-
109-
// TODO: if there's an error on parser.read_triples, try the next format
110-
let parser = parser.for_reader(vcontent);
111-
for quad in parser {
112-
let quad = quad?;
113-
let triple = Triple::new(quad.subject, quad.predicate, quad.object);
114-
graph.insert(&triple);
115-
}
116-
return Ok(graph);
117-
}
118-
Err(anyhow::anyhow!("Failed to parse graph from {}", file))
122+
read_format(content, content_type)
119123
}
120124

121125
// return a "impl IntoIterator<Item = impl Into<Quad>>" for a graph. Iter through

0 commit comments

Comments
 (0)