diff --git a/pom.xml b/pom.xml index 84fb0af6..fe9ab29e 100644 --- a/pom.xml +++ b/pom.xml @@ -1,4 +1,5 @@ - 4.0.0 @@ -10,6 +11,7 @@ 3.3.2 2.2.4 UTF-8 + 3.7 @@ -20,10 +22,10 @@ provided - org.neo4j - server-api - ${neo4j.version} - provided + org.neo4j + server-api + ${neo4j.version} + provided org.eclipse.rdf4j @@ -109,6 +111,11 @@ 1.5.0-beta03 test + + org.apache.commons + commons-lang3 + ${apache-common.version} + @@ -130,7 +137,8 @@ - + diff --git a/src/main/java/semantics/RDFImport.java b/src/main/java/semantics/RDFImport.java index 4202050c..8713cda3 100644 --- a/src/main/java/semantics/RDFImport.java +++ b/src/main/java/semantics/RDFImport.java @@ -1,5 +1,26 @@ package semantics; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLConnection; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; + +import org.apache.commons.io.FilenameUtils; +import org.eclipse.rdf4j.rio.RDFFormat; +import org.eclipse.rdf4j.rio.RDFHandlerException; +import org.eclipse.rdf4j.rio.RDFParseException; +import org.eclipse.rdf4j.rio.RDFParser; +import org.eclipse.rdf4j.rio.Rio; import org.neo4j.graphdb.GraphDatabaseService; import org.neo4j.graphdb.Node; import org.neo4j.graphdb.QueryExecutionException; @@ -10,208 +31,289 @@ import org.neo4j.procedure.Mode; import org.neo4j.procedure.Name; import org.neo4j.procedure.Procedure; -import org.eclipse.rdf4j.rio.*; -import semantics.result.GraphResult; -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.net.MalformedURLException; -import java.net.URL; -import java.net.URLConnection; -import java.nio.charset.Charset; -import java.util.*; -import java.util.stream.Stream; +import semantics.result.GraphResult; /** * Created by jbarrasa on 21/03/2016. *

- * RDF importer based on: - * 1. DatatypeProperties become node attributes - * 2. rdf:type relationships are transformed into labels on the subject node - * 3. rdf:type relationships generate :Class nodes on the object + * RDF importer based on: 1. DatatypeProperties become node attributes 2. + * rdf:type relationships are transformed into labels on the subject node 3. + * rdf:type relationships generate :Class nodes on the object */ public class RDFImport { - private static final boolean DEFAULT_SHORTEN_URLS = true; - private static final boolean DEFAULT_TYPES_TO_LABELS = true; - private static final long DEFAULT_COMMIT_SIZE = 25000; - private static final long DEFAULT_NODE_CACHE_SIZE = 10000; - public static final String PREFIX_SEPARATOR = "__"; - - @Context - public GraphDatabaseService db; - @Context - public Log log; - - public static RDFFormat[] availableParsers = new RDFFormat[]{RDFFormat.RDFXML, RDFFormat.JSONLD, RDFFormat.TURTLE, - RDFFormat.NTRIPLES, RDFFormat.TRIG}; - - - - @Procedure(mode = Mode.WRITE) - public Stream importRDF(@Name("url") String url, @Name("format") String format, - @Name("props") Map props) { - - final boolean shortenUrls = (props.containsKey("shortenUrls")?(boolean)props.get("shortenUrls"):DEFAULT_SHORTEN_URLS); - final boolean typesToLabels = (props.containsKey("typesToLabels")?(boolean)props.get("typesToLabels"):DEFAULT_TYPES_TO_LABELS); - final long commitSize = (props.containsKey("commitSize")?(long)props.get("commitSize"):DEFAULT_COMMIT_SIZE); - final long nodeCacheSize = (props.containsKey("nodeCacheSize")?(long)props.get("nodeCacheSize"):DEFAULT_NODE_CACHE_SIZE); - final String languageFilter = (props.containsKey("languageFilter")?(String)props.get("languageFilter"):null); - - ImportResults importResults = new ImportResults(); - URLConnection urlConn; - DirectStatementLoader statementLoader = new DirectStatementLoader(db, (commitSize > 0 ? commitSize : 5000), - nodeCacheSize, shortenUrls, typesToLabels, languageFilter, log); - try { - checkIndexesExist(); - urlConn = new URL(url).openConnection(); - if (props.containsKey("headerParams")) { - ((Map) props.get("headerParams")).forEach( (k,v) -> urlConn.setRequestProperty(k,v)); - } - InputStream inputStream = urlConn.getInputStream(); - RDFParser rdfParser = Rio.createParser(getFormat(format)); - rdfParser.setRDFHandler(statementLoader); - rdfParser.parse(inputStream, url); - } catch (MalformedURLException e) { - e.printStackTrace(); - } catch (IOException | RDFHandlerException | QueryExecutionException | RDFParseException | RDFImportPreRequisitesNotMet e) { - importResults.setTerminationKO(e.getMessage()); - e.printStackTrace(); - } finally { - importResults.setTriplesLoaded(statementLoader.getIngestedTriples()); - importResults.setNamespaces(statementLoader.getNamespaces()); - } - return Stream.of(importResults); - } - - @Procedure(mode = Mode.READ) - public Stream previewRDF(@Name("url") String url, @Name("format") String format, - @Name("props") Map props) { - - final boolean shortenUrls = (props.containsKey("shortenUrls")?(boolean)props.get("shortenUrls"):DEFAULT_SHORTEN_URLS); - final boolean typesToLabels = (props.containsKey("typesToLabels")?(boolean)props.get("typesToLabels"):DEFAULT_TYPES_TO_LABELS); - final String languageFilter = (props.containsKey("languageFilter")?(String)props.get("languageFilter"):null); - - URLConnection urlConn; - Map virtualNodes = new HashMap<>(); - List virtualRels = new ArrayList<>(); - - StatementPreviewer statementViewer = new StatementPreviewer(db, shortenUrls, typesToLabels, virtualNodes, virtualRels, languageFilter, log); - try { - urlConn = new URL(url).openConnection(); - if (props.containsKey("headerParams")) { - ((Map) props.get("headerParams")).forEach( (k,v) -> urlConn.setRequestProperty(k,v)); - } - InputStream inputStream = urlConn.getInputStream(); - RDFFormat rdfFormat = getFormat(format); - log.info("Data set to be parsed as " + rdfFormat); - RDFParser rdfParser = Rio.createParser(rdfFormat); - rdfParser.setRDFHandler(statementViewer); - rdfParser.parse(inputStream, "http://neo4j.com/base/"); - } catch (MalformedURLException e) { - e.printStackTrace(); - } catch (IOException | RDFHandlerException | QueryExecutionException | RDFParseException | RDFImportPreRequisitesNotMet e) { - e.printStackTrace(); - } - - GraphResult graphResult = new GraphResult(new ArrayList<>(virtualNodes.values()), virtualRels); - return Stream.of(graphResult); - - - } - - @Procedure(mode = Mode.READ) - public Stream previewRDFSnippet(@Name("rdf") String rdfFragment, @Name("format") String format, - @Name("props") Map props) { - - final boolean shortenUrls = (props.containsKey("shortenUrls")?(boolean)props.get("shortenUrls"):DEFAULT_SHORTEN_URLS); - final boolean typesToLabels = (props.containsKey("typesToLabels")?(boolean)props.get("typesToLabels"):DEFAULT_TYPES_TO_LABELS); - final String languageFilter = (props.containsKey("languageFilter")?(String)props.get("languageFilter"):null); - - Map virtualNodes = new HashMap<>(); - List virtualRels = new ArrayList<>(); - - StatementPreviewer statementViewer = new StatementPreviewer(db, shortenUrls, typesToLabels, virtualNodes, virtualRels, languageFilter, log); - try { - InputStream inputStream = new ByteArrayInputStream( rdfFragment.getBytes(Charset.defaultCharset()) ); //rdfFragment.openStream(); - RDFFormat rdfFormat = getFormat(format); - log.info("Data set to be parsed as " + rdfFormat); - RDFParser rdfParser = Rio.createParser(rdfFormat); - rdfParser.setRDFHandler(statementViewer); - rdfParser.parse(inputStream, "http://neo4j.com/base/"); - } catch (MalformedURLException e) { - e.printStackTrace(); - } catch (IOException | RDFHandlerException | QueryExecutionException | RDFParseException | RDFImportPreRequisitesNotMet e) { - e.printStackTrace(); - } - - GraphResult graphResult = new GraphResult(new ArrayList<>(virtualNodes.values()), virtualRels); - return Stream.of(graphResult); - - - } - private void checkIndexesExist() throws RDFImportPreRequisitesNotMet { - Iterable indexes = db.schema().getIndexes(); - if(missing(indexes.iterator(),"Resource")){ - throw new RDFImportPreRequisitesNotMet("The required index on :Resource(uri) could not be found"); - } - } - - private boolean missing(Iterator iterator, String indexLabel) { - while (iterator.hasNext()){ - IndexDefinition indexDef = iterator.next(); - if(indexDef.getLabel().name().equals(indexLabel) && - indexDef.getPropertyKeys().iterator().next().equals("uri")) { - return false; - } - } - return true; - } - - private RDFFormat getFormat(String format) throws RDFImportPreRequisitesNotMet { - if (format != null) { - for (RDFFormat parser : availableParsers) { - if (parser.getName().equals(format)) - return parser; - } - } - throw new RDFImportPreRequisitesNotMet("Unrecognized serialization format: " + format); - } - - - - - public static class ImportResults { - public String terminationStatus = "OK"; - public long triplesLoaded = 0; - public Map namespaces; - public String extraInfo = ""; - - public void setTriplesLoaded(long triplesLoaded) { - this.triplesLoaded = triplesLoaded; - } - - public void setNamespaces(Map namespaces) { - this.namespaces = namespaces; - } - - public void setTerminationKO(String message) { - this.terminationStatus = "KO"; - this.extraInfo = message; - } - - } - - private class RDFImportPreRequisitesNotMet extends Exception { - String message; - - public RDFImportPreRequisitesNotMet(String s) { - message = s; - } - - @Override - public String getMessage() { - return message; - } - } + private static final boolean DEFAULT_SHORTEN_URLS = true; + private static final boolean DEFAULT_TYPES_TO_LABELS = true; + private static final long DEFAULT_COMMIT_SIZE = 25000; + private static final long DEFAULT_NODE_CACHE_SIZE = 10000; + public static final String PREFIX_SEPARATOR = "__"; + + @Context + public GraphDatabaseService db; + @Context + public Log log; + + public static RDFFormat[] availableParsers = new RDFFormat[] { RDFFormat.RDFXML, RDFFormat.JSONLD, RDFFormat.TURTLE, + RDFFormat.NTRIPLES, RDFFormat.TRIG }; + + @Procedure(mode = Mode.WRITE) + public Stream importRDF(@Name("url") String url, + @Name("format") String format, + @Name("props") Map props) { + + final boolean shortenUrls = (props.containsKey("shortenUrls") ? (boolean) props.get("shortenUrls") : DEFAULT_SHORTEN_URLS); + final boolean typesToLabels = (props.containsKey("typesToLabels") ? (boolean) props.get("typesToLabels") : DEFAULT_TYPES_TO_LABELS); + final long commitSize = (props.containsKey("commitSize") ? (long) props.get("commitSize") : DEFAULT_COMMIT_SIZE); + final long nodeCacheSize = (props.containsKey("nodeCacheSize") ? (long) props.get("nodeCacheSize") : DEFAULT_NODE_CACHE_SIZE); + final String languageFilter = (props.containsKey("languageFilter") ? (String) props.get("languageFilter") : null); + + ImportResults importResults = new ImportResults(); + + try { + List listOfUrls = getTTLUrls(url); + for (String oneUrl : listOfUrls) { + + DirectStatementLoader statementLoader = new DirectStatementLoader(db, + (commitSize > 0 ? commitSize : 5000), nodeCacheSize, shortenUrls, typesToLabels, languageFilter, + log); + + ImportResults iResults = importRDFProcessor(oneUrl, format, props, statementLoader); + importResults.setNamespaces(iResults.namespaces); + importResults.setTriplesLoaded(iResults.triplesLoaded); + importResults.setTerminationKO(iResults.extraInfo); + } + } catch (MalformedURLException ex) { + ex.printStackTrace(); + } + + return Stream.of(importResults); + + } + + private ImportResults importRDFProcessor(String url, String format, Map props, DirectStatementLoader statementLoader) { + + ImportResults importResults = new ImportResults(); + + URLConnection urlConn; + + try { + checkIndexesExist(); + + urlConn = new URL(url).openConnection(); + if (props.containsKey("headerParams")) { + ((Map) props.get("headerParams")).forEach((k, v) -> urlConn.setRequestProperty(k, v)); + } + InputStream inputStream = urlConn.getInputStream(); + RDFParser rdfParser = Rio.createParser(getFormat(format)); + rdfParser.setRDFHandler(statementLoader); + rdfParser.parse(inputStream, url); + } catch (MalformedURLException e) { + e.printStackTrace(); + } catch (IOException | RDFHandlerException | QueryExecutionException | RDFParseException + | RDFImportPreRequisitesNotMet e) { + importResults.setTerminationKO(e.getMessage()); + e.printStackTrace(); + } finally { + importResults.setTriplesLoaded(statementLoader.getIngestedTriples()); + importResults.setNamespaces(statementLoader.getNamespaces()); + } + + return importResults; + } + + /** + * Accepts a url of the form: http:// or file:// + * If url protocol is file:// and is directory, collects all ttl files in subfolders + * @param url + * @return a list of one or more urls + * @throws MalformedURLException + */ + private List getTTLUrls(String url) throws MalformedURLException { + + List listOfUrls = new ArrayList(); + + URL lURL = new URL(url); + + //Handle only file protocol else ignore http protocol + if ("file".equalsIgnoreCase(lURL.getProtocol())) { + String file = lURL.getFile(); + //Handle file = directory + if (new File(file).isDirectory()) { + listOfUrls = loadTTLFiles(file); + } else { + listOfUrls.add(url); + } + } else { + listOfUrls.add(url); + } + + return listOfUrls; + } + + /** + * Recursively load all ttl files it finds in subdirectories + * @param directoryName + * @return list of fullpath of ttl files + */ + private static List loadTTLFiles(String directoryName) { + + File directory = new File(directoryName); + + List lFiles = new ArrayList(); + + // Get all the ttl files from the sub-directory + File[] fList = directory.listFiles(); + for (File file : fList) { + if (file.isFile()) { + + //Skip files which are NOT ttl + if (!FilenameUtils.getExtension(file.getName()).equalsIgnoreCase("ttl")) { + continue; + } + + lFiles.add("file:" + file.toPath().toString()); + + } else if (file.isDirectory()) { + lFiles.addAll(loadTTLFiles(file.getAbsolutePath())); + } + } + return lFiles; + } + + @Procedure(mode = Mode.READ) + public Stream previewRDF(@Name("url") String url, @Name("format") String format, + @Name("props") Map props) { + + final boolean shortenUrls = (props.containsKey("shortenUrls") ? (boolean) props.get("shortenUrls") + : DEFAULT_SHORTEN_URLS); + final boolean typesToLabels = (props.containsKey("typesToLabels") ? (boolean) props.get("typesToLabels") + : DEFAULT_TYPES_TO_LABELS); + final String languageFilter = (props.containsKey("languageFilter") ? (String) props.get("languageFilter") + : null); + + URLConnection urlConn; + Map virtualNodes = new HashMap<>(); + List virtualRels = new ArrayList<>(); + + StatementPreviewer statementViewer = new StatementPreviewer(db, shortenUrls, typesToLabels, virtualNodes, + virtualRels, languageFilter, log); + try { + urlConn = new URL(url).openConnection(); + if (props.containsKey("headerParams")) { + ((Map) props.get("headerParams")).forEach((k, v) -> urlConn.setRequestProperty(k, v)); + } + InputStream inputStream = urlConn.getInputStream(); + RDFFormat rdfFormat = getFormat(format); + log.info("Data set to be parsed as " + rdfFormat); + RDFParser rdfParser = Rio.createParser(rdfFormat); + rdfParser.setRDFHandler(statementViewer); + rdfParser.parse(inputStream, "http://neo4j.com/base/"); + } catch (MalformedURLException e) { + e.printStackTrace(); + } catch (IOException | RDFHandlerException | QueryExecutionException | RDFParseException + | RDFImportPreRequisitesNotMet e) { + e.printStackTrace(); + } + + GraphResult graphResult = new GraphResult(new ArrayList<>(virtualNodes.values()), virtualRels); + return Stream.of(graphResult); + + } + + @Procedure(mode = Mode.READ) + public Stream previewRDFSnippet(@Name("rdf") String rdfFragment, @Name("format") String format, + @Name("props") Map props) { + + final boolean shortenUrls = (props.containsKey("shortenUrls") ? (boolean) props.get("shortenUrls") + : DEFAULT_SHORTEN_URLS); + final boolean typesToLabels = (props.containsKey("typesToLabels") ? (boolean) props.get("typesToLabels") + : DEFAULT_TYPES_TO_LABELS); + final String languageFilter = (props.containsKey("languageFilter") ? (String) props.get("languageFilter") + : null); + + Map virtualNodes = new HashMap<>(); + List virtualRels = new ArrayList<>(); + + StatementPreviewer statementViewer = new StatementPreviewer(db, shortenUrls, typesToLabels, virtualNodes, + virtualRels, languageFilter, log); + try { + InputStream inputStream = new ByteArrayInputStream(rdfFragment.getBytes(Charset.defaultCharset())); // rdfFragment.openStream(); + RDFFormat rdfFormat = getFormat(format); + log.info("Data set to be parsed as " + rdfFormat); + RDFParser rdfParser = Rio.createParser(rdfFormat); + rdfParser.setRDFHandler(statementViewer); + rdfParser.parse(inputStream, "http://neo4j.com/base/"); + } catch (MalformedURLException e) { + e.printStackTrace(); + } catch (IOException | RDFHandlerException | QueryExecutionException | RDFParseException + | RDFImportPreRequisitesNotMet e) { + e.printStackTrace(); + } + + GraphResult graphResult = new GraphResult(new ArrayList<>(virtualNodes.values()), virtualRels); + return Stream.of(graphResult); + + } + + private void checkIndexesExist() throws RDFImportPreRequisitesNotMet { + Iterable indexes = db.schema().getIndexes(); + if (missing(indexes.iterator(), "Resource")) { + throw new RDFImportPreRequisitesNotMet("The required index on :Resource(uri) could not be found"); + } + } + + private boolean missing(Iterator iterator, String indexLabel) { + while (iterator.hasNext()) { + IndexDefinition indexDef = iterator.next(); + if (indexDef.getLabel().name().equals(indexLabel) + && indexDef.getPropertyKeys().iterator().next().equals("uri")) { + return false; + } + } + return true; + } + + private RDFFormat getFormat(String format) throws RDFImportPreRequisitesNotMet { + if (format != null) { + for (RDFFormat parser : availableParsers) { + if (parser.getName().equals(format)) + return parser; + } + } + throw new RDFImportPreRequisitesNotMet("Unrecognized serialization format: " + format); + } + + public static class ImportResults { + public String terminationStatus = "OK"; + public long triplesLoaded = 0; + public Map namespaces = new HashMap(); + public String extraInfo = ""; + + public void setTriplesLoaded(long triplesLoaded) { + this.triplesLoaded += triplesLoaded; + } + + public void setNamespaces(Map namespaces) { + this.namespaces.putAll(namespaces); + } + + public void setTerminationKO(String message) { + this.terminationStatus = "KO"; + this.extraInfo = message; + } + + } + + private class RDFImportPreRequisitesNotMet extends Exception { + String message; + + public RDFImportPreRequisitesNotMet(String s) { + message = s; + } + + @Override + public String getMessage() { + return message; + } + } } diff --git a/src/test/java/semantics/RDFImportTest.java b/src/test/java/semantics/RDFImportTest.java index 66d1f5cd..27842158 100644 --- a/src/test/java/semantics/RDFImportTest.java +++ b/src/test/java/semantics/RDFImportTest.java @@ -445,6 +445,33 @@ public void testPreviewFromFileLangFilter() throws Exception { } } + /** + * Can we load ttl files in sub folders if a root folder is supplied + * @throws Exception + */ + @Test + public void testImportMultipleTurtleFiles01() throws Exception { + + try (Driver driver = GraphDatabase.driver(neo4j.boltURI(), Config.build().withEncryptionLevel(Config.EncryptionLevel.NONE).toConfig())) { + + Session session = driver.session(); + createIndices(neo4j.getGraphDatabaseService()); + session.run("CREATE (rdf:NamespacePrefixDefinition {" + + " `http://www.example.com/ontology/1.0.0#`: 'ex'," + + " `http://www.w3.org/1999/02/22-rdf-syntax-ns#`: 'rdfs'})"); + StatementResult importResults = session.run(String.format( + "CALL semantics.importRDF('%s','Turtle',{nodeCacheSize: 1})", file("rootTTLs/"))); + assertEquals(15, importResults.next().get("triplesLoaded").asInt()); + + StatementResult result = session.run( + "MATCH (:ex" + PREFIX_SEPARATOR + "DISTANCEVALUE)-[:ex" + PREFIX_SEPARATOR + "units]->(mu) " + + "RETURN mu.uri AS unitsUri, mu.ex" + PREFIX_SEPARATOR + "name as unitsName"); + + assertEquals(3, result.list().size()); + } + } + + private void createIndices(GraphDatabaseService db) { db.execute("CREATE INDEX ON :Resource(uri)"); } diff --git a/src/test/resources/rootTTLs/sub1/testTurtle-sub1.ttl b/src/test/resources/rootTTLs/sub1/testTurtle-sub1.ttl new file mode 100644 index 00000000..c866c300 --- /dev/null +++ b/src/test/resources/rootTTLs/sub1/testTurtle-sub1.ttl @@ -0,0 +1,14 @@ +@prefix : . +@prefix common: . +@prefix xsd: . +@prefix rdf: . +@prefix project: . + +project:DISTANCEVALUE-A181451 + a :DISTANCEVALUE ; + :units common:MEASUREMENTUNIT-T1510615421641 ; + :value 0.55 . + +common:MEASUREMENTUNIT-T1510615421641 + a :MEASUREMENTUNIT ; + :name "metres" . diff --git a/src/test/resources/rootTTLs/sub2/testTurtle-sub2.ttl b/src/test/resources/rootTTLs/sub2/testTurtle-sub2.ttl new file mode 100644 index 00000000..59db8bab --- /dev/null +++ b/src/test/resources/rootTTLs/sub2/testTurtle-sub2.ttl @@ -0,0 +1,14 @@ +@prefix : . +@prefix common: . +@prefix xsd: . +@prefix rdf: . +@prefix project: . + +project:DISTANCEVALUE-A181452 + a :DISTANCEVALUE ; + :units common:MEASUREMENTUNIT-T1510615421642 ; + :value 0.55 . + +common:MEASUREMENTUNIT-T1510615421642 + a :MEASUREMENTUNIT ; + :name "metres" . diff --git a/src/test/resources/rootTTLs/sub3/testTurtle-sub3.ttl b/src/test/resources/rootTTLs/sub3/testTurtle-sub3.ttl new file mode 100644 index 00000000..33de506d --- /dev/null +++ b/src/test/resources/rootTTLs/sub3/testTurtle-sub3.ttl @@ -0,0 +1,14 @@ +@prefix : . +@prefix common: . +@prefix xsd: . +@prefix rdf: . +@prefix project: . + +project:DISTANCEVALUE-A181453 + a :DISTANCEVALUE ; + :units common:MEASUREMENTUNIT-T1510615421643 ; + :value 0.55 . + +common:MEASUREMENTUNIT-T1510615421643 + a :MEASUREMENTUNIT ; + :name "metres" .