Skip to content

Commit f7d3992

Browse files
author
Stefan Milosavljevic
committed
feat: first simple implementation of turtle as additional input
1 parent a32116d commit f7d3992

3 files changed

Lines changed: 64 additions & 12 deletions

File tree

src/index.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ fn index_triple(t: Triple, index: &mut TypeIndex) {
9595
pub fn create_type_index(input: &Path, output: &Path) {
9696
let buf_in = io::get_reader(input);
9797
let buf_out = io::get_writer(output);
98-
let mut triples = io::parse_ntriples(buf_in);
98+
let mut triples = io::parse_input(input, buf_in).expect("Error parsing input for index");
9999
let mut index = TypeIndex::new();
100100

101101
while !triples.is_end() {

src/io.rs

Lines changed: 62 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use crate::{index::TypeIndex, rules::Rules};
2-
use rio_turtle::NTriplesParser;
2+
use rio_api::{formatter::TriplesFormatter, parser::TriplesParser};
3+
use rio_turtle::{NTriplesFormatter, NTriplesParser, TurtleParser};
34
use std::{
45
fs::File,
56
io::{self, stdin, stdout, BufRead, BufReader, BufWriter, Read},
@@ -38,8 +39,45 @@ pub fn get_writer(path: &Path) -> Writer {
3839

3940
/// Parse RDF triples.
4041
/// This function takes ownership of a generic type which implements `BufRead`.
41-
pub fn parse_ntriples(reader: impl BufRead) -> NTriplesParser<impl BufRead> {
42-
NTriplesParser::new(reader)
42+
pub fn parse_ntriples<R: BufRead + 'static>(
43+
reader: R,
44+
) -> Result<NTriplesParser<Box<dyn BufRead>>, anyhow::Error> {
45+
let reader: Box<dyn BufRead> = Box::new(reader);
46+
Ok(NTriplesParser::new(reader))
47+
}
48+
49+
/// Parse turtle file and convert to Ntriples stream
50+
pub fn parse_turtle<R: BufRead>(
51+
reader: R,
52+
) -> Result<NTriplesParser<Box<dyn BufRead>>, anyhow::Error> {
53+
let mut formatter = NTriplesFormatter::new(Vec::default());
54+
let mut turtle_parser = TurtleParser::new(reader, None);
55+
56+
turtle_parser
57+
.parse_all(&mut |triple| formatter.format(&triple))
58+
.map_err(|e| anyhow::Error::msg(e.to_string()))?;
59+
let formatted_data = formatter
60+
.finish()
61+
.map_err(|e| anyhow::Error::msg(e.to_string()))?;
62+
let reader: Box<dyn BufRead> = Box::new(BufReader::new(std::io::Cursor::new(formatted_data)));
63+
Ok(NTriplesParser::new(reader))
64+
}
65+
66+
/// Detect input format and parse accordingly.
67+
pub fn parse_input<R: BufRead + 'static>(
68+
input: &Path,
69+
buf_input: R,
70+
) -> Result<NTriplesParser<Box<dyn BufRead>>, anyhow::Error> {
71+
match input.extension() {
72+
Some(ext) if ext == std::ffi::OsStr::new("nt") => parse_ntriples(buf_input),
73+
Some(ext) if ext == std::ffi::OsStr::new("ttl") => parse_turtle(buf_input),
74+
Some(_) => Err(anyhow::Error::msg(
75+
"Extension not supported as input. Please use '.nt' or '.ttl'",
76+
)),
77+
None => Err(anyhow::Error::msg(
78+
"No input format detected. Please use '.nt' or '.ttl'",
79+
)),
80+
}
4381
}
4482

4583
/// Parse yaml configuration file.
@@ -74,19 +112,33 @@ pub fn read_bytes(path: &PathBuf) -> Vec<u8> {
74112

75113
#[cfg(test)]
76114
mod tests {
77-
use super::{parse_ntriples, parse_rules};
115+
use crate::io::parse_input;
116+
117+
use super::parse_rules;
78118
use rio_api::parser::TriplesParser;
79119
use std::{
80120
io::{BufRead, BufReader},
81121
path::Path,
82122
};
83123

84-
#[test]
85-
// Test the parsing of a triple.
86-
fn triple_parsing() {
87-
let input: &[u8] = "<http://example.org/resource2> <http://example.org/relatedTo> <http://example.org/resource3> .\n".as_bytes();
88-
let buffer_input: Box<dyn BufRead> = Box::new(BufReader::new(input));
89-
let mut triples = parse_ntriples(buffer_input);
124+
use rstest::rstest;
125+
126+
// Test the parsing of an input in a given format.
127+
#[rstest]
128+
// Ntriples input
129+
#[case("test.nt", "ntriple")]
130+
// Turtle input
131+
#[case("test.ttl", "turtle")]
132+
fn triple_parsing(#[case] input_filename: &str, #[case] mock_input_type: &str) {
133+
let input_filename_string = input_filename.to_string();
134+
let input_path = Path::new(&input_filename_string);
135+
let mock_input: &[u8] = match mock_input_type {
136+
"ntriple" => "<http://example.org/resource2> <http://example.org/relatedTo> <http://example.org/resource3> .\n".as_bytes(),
137+
"turtle" => "@prefix : <http://example.org/> .\n\n :resource2 :relatedTo :resource3 .\n".as_bytes(),
138+
_ => "".as_bytes()
139+
};
140+
let buffer_input: Box<dyn BufRead> = Box::new(BufReader::new(mock_input));
141+
let mut triples = parse_input(input_path, buffer_input).expect("Error parsing input");
90142
triples
91143
.parse_all(&mut |t| -> Result<(), Box<dyn std::error::Error>> {
92144
assert_eq!(t.subject.to_string(), "<http://example.org/resource2>");

src/pseudo.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ pub fn pseudonymize_graph(
6161
let secret = secret_path.as_ref().map(io::read_bytes);
6262
let pseudonymizer = new_pseudonymizer(None, secret);
6363

64-
let mut triples = io::parse_ntriples(buf_input);
64+
let mut triples = io::parse_input(input, buf_input).expect("Error parsing input");
6565

6666
// Run the loop single-threaded.
6767
while !triples.is_end() {

0 commit comments

Comments
 (0)