Skip to content

Commit f879635

Browse files
authored
refactor(rules): config format (#47)
1 parent 2727fc4 commit f879635

6 files changed

Lines changed: 237 additions & 250 deletions

File tree

docs/tutorial.md

Lines changed: 15 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,7 @@ There are three possible ways to pseudonymize RDF triples:
2323
2. Pseudonymize values for specific subject-predicate combinations.
2424
3. Pseudonymize any value for a given predicate.
2525

26-
By using all three ways together, we're able to get an RDF file with sensitive
27-
information:
28-
29-
<details>
30-
<summary><b>Click to show input</b></summary>
26+
By combining these, can process an RDF file with sensitive information:
3127

3228
```ntriples
3329
<http://example.org/Alice> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> .
@@ -40,15 +36,12 @@ information:
4036
<http://example.org/Bank> <http://schema.org/name> "Bank" .
4137
```
4238

43-
</details>
4439

45-
And pseudonymize the sensitive information such as people's names, personal and
46-
secret information while keeping the rest as is:
40+
into a pseudonymized file where the sensitive information such as people's names, personal and
41+
secret information is hashed to protect privacy:
4742

48-
<details>
49-
<summary><b>Click to show output</b></summary>
5043

51-
```
44+
```ntriples
5245
<http://example.org/af321bbc> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> .
5346
<http://example.org/af321bbc> <http://xmlns.com/foaf/0.1/holdsAccount> <http://example.org/bs2313bc> .
5447
<http://example.org/bs2313bc> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/OnlineAccount> .
@@ -59,55 +52,49 @@ secret information while keeping the rest as is:
5952
<http://example.org/Bank> <http://schema.org/name> "Bank" .
6053
```
6154

62-
</details>
63-
6455
The next subsections break down each of the three pseudonymization approaches to
6556
better understand how they operate.
6657

6758
### 1. Pseudonymize the URI of nodes with `rdf:type`
6859

69-
<details>
70-
<summary><b>Click to show</b></summary>
7160

7261
Given the following config:
7362

7463
```yaml
75-
replace_uri_of_nodes_with_type:
64+
subjects:
65+
of_type:
7666
- "http://xmlns.com/foaf/0.1/Person"
7767
```
7868
7969
The goal is to pseudonymize all instaces of `rdf:type` Person. The following
8070
input file:
8171

82-
```
72+
```ntriples
8373
<http://example.org/Alice> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> .
8474
```
8575

8676
Would become:
8777

88-
```
78+
```ntriples
8979
<http://example.org/af321bbc> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> .
9080
```
9181

92-
</details>
9382

9483
### 2. Pseudonymize values for specific subject-predicate combinations
9584

96-
<details>
97-
<summary><b>Click to show</b></summary>
98-
9985
Given the following config:
10086

10187
```yaml
102-
replace_values_of_subject_predicate:
103-
"http://xmlns.com/foaf/0.1/Person":
88+
objects:
89+
on_type_predicate:
90+
"http://xmlns.com/foaf/0.1/Person":
10491
- "http://schema.org/name"
10592
```
10693

10794
The goal is to pseudonymize only the instances of names when they're associated
10895
to Person. The following input file:
10996

110-
```
97+
```ntriples
11198
<http://example.org/Alice> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> .
11299
<http://example.org/Alice> <http://schema.org/name> "Alice" .
113100
<http://example.org/Bank> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Organization> .
@@ -116,19 +103,15 @@ to Person. The following input file:
116103

117104
Would become:
118105

119-
```
106+
```ntriples
120107
<http://example.org/Alice> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> .
121108
<http://example.org/Alice> <http://schema.org/name> "af321bbc" .
122109
<http://example.org/Bank> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Organization> .
123110
<http://example.org/Bank> <http://schema.org/name> "Bank" .
124111
```
125112

126-
</details>
127-
128113
### 3. Pseudonymize any value for a given predicate
129114

130-
<details>
131-
<summary><b>Click to show</b></summary>
132115

133116
Given the following config:
134117

@@ -140,7 +123,7 @@ replace_value_of_predicate:
140123
The goal is to pseudonymize any values associated to name. The following input
141124
file:
142125

143-
```
126+
```ntriples
144127
<http://example.org/Alice> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> .
145128
<http://example.org/Alice> <http://schema.org/name> "Alice" .
146129
<http://example.org/Bank> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Organization> .
@@ -149,11 +132,9 @@ file:
149132

150133
Would become:
151134

152-
```
135+
```ntriples
153136
<http://example.org/Alice> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> .
154137
<http://example.org/Alice> <http://schema.org/name> "af321bbc" .
155138
<http://example.org/Bank> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Organization> .
156139
<http://example.org/Bank> <http://schema.org/name> "38a3dd71" .
157140
```
158-
159-
</details>
File renamed without changes.

src/main.rs

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
// Define the module.
22
mod crypto;
3+
mod index;
34
mod io;
45
mod log;
56
mod model;
6-
mod pass_first;
7-
mod pass_second;
7+
mod pseudo;
88
mod rdf_types;
99
mod rules;
1010

1111
// Define the imports.
1212
use crate::{
13+
index::create_type_map,
1314
log::{create_logger, info},
14-
pass_first::create_type_map,
15-
pass_second::pseudonymize_graph,
15+
pseudo::pseudonymize_graph,
1616
};
1717

1818
use clap::{Args, Parser, Subcommand};
@@ -51,11 +51,6 @@ struct PseudoArgs {
5151
#[arg(default_value = "-")]
5252
input: PathBuf,
5353

54-
/// Invert the matching rules for the subject and the object.
55-
/// Disabled by default
56-
#[arg(short = 'v', long)]
57-
invert_match: bool,
58-
5954
/// The config file descriptor to use for defining RDF elements to pseudonymize.
6055
/// Format: yaml
6156
#[arg(short, long)]
@@ -103,7 +98,6 @@ fn main() {
10398
&args.output,
10499
&args.index,
105100
&args.secret,
106-
&args.invert_match,
107101
)
108102
}
109103
}
Lines changed: 2 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -10,35 +10,10 @@ use crate::{
1010
crypto::{new_pseudonymizer, Pseudonymize},
1111
io,
1212
log::Logger,
13-
model::TripleMask,
1413
rdf_types::*,
15-
rules::{
16-
match_predicate_rule, match_subject_predicate_rule, match_type_rule_object,
17-
match_type_rule_subject, Rules,
18-
},
14+
rules::{match_rules, Rules},
1915
};
2016

21-
fn match_rules(
22-
triple: Triple,
23-
rules: &Rules,
24-
type_map: &HashMap<String, String>,
25-
invert_match: &bool,
26-
) -> TripleMask {
27-
// Check each field of the triple against the rules
28-
let mut mask = TripleMask::default();
29-
30-
mask = match_type_rule_subject(&triple.subject, mask, type_map, rules);
31-
mask = match_type_rule_object(&triple.object, mask, type_map, rules);
32-
mask = match_predicate_rule(&triple.predicate, mask, rules);
33-
mask = match_subject_predicate_rule(&triple.subject, &triple.predicate, mask, type_map, rules);
34-
35-
if *invert_match {
36-
mask = mask.invert();
37-
}
38-
39-
return mask;
40-
}
41-
4217
// mask and encode input triple
4318
// NOTE: This will need the type-map to perform masking
4419
fn process_triple(
@@ -47,9 +22,8 @@ fn process_triple(
4722
node_to_type: &HashMap<String, String>,
4823
out: &mut impl Write,
4924
hasher: &dyn Pseudonymize,
50-
invert_match: &bool,
5125
) {
52-
let mask = match_rules(triple.clone(), rules_config, node_to_type, invert_match);
26+
let mask = match_rules(&triple, rules_config, node_to_type);
5327

5428
let r = || -> std::io::Result<()> {
5529
out.write_all(hasher.pseudo_triple(&triple, mask).to_string().as_bytes())?;
@@ -86,7 +60,6 @@ pub fn pseudonymize_graph(
8660
output: &Path,
8761
index: &Path,
8862
secret_path: &Option<PathBuf>,
89-
invert_match: &bool,
9063
) {
9164
let buf_input = io::get_reader(input);
9265
let buf_index = io::get_reader(index);
@@ -110,7 +83,6 @@ pub fn pseudonymize_graph(
11083
&node_to_type,
11184
&mut buf_output,
11285
&pseudonymizer,
113-
invert_match,
11486
);
11587
Result::<(), TurtleError>::Ok(())
11688
})
@@ -139,15 +111,13 @@ mod tests {
139111
let output_path = dir.path().join("output.nt");
140112
let type_map_path = Path::new("tests/data/type_map.nt");
141113
let key = None;
142-
let invert_match = false;
143114
pseudonymize_graph(
144115
&logger,
145116
&input_path,
146117
&config_path,
147118
&output_path,
148119
&type_map_path,
149120
&key,
150-
&invert_match,
151121
);
152122
}
153123
}

0 commit comments

Comments
 (0)