Skip to content

Commit 6d88557

Browse files
authored
Merge pull request #7 from harvard-lil/artifacts
Use artifacts for data flow between plugins
2 parents eda6dcf + dabb45e commit 6d88557

67 files changed

Lines changed: 2322 additions & 1340 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
/target
2-
*.swp
3-
*.swo
1+
/target
2+
*.snap.new
3+
*.swp
4+
*.swo
45
*.pyc
56
*.so
67
/binoc-python/python/binoc/lib_binoc.dylib.dSYM

AGENTS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ Rust workspace with five crates:
1414
| `binoc-python` | PyO3 bindings and Python plugin support |
1515
| `binoc-sqlite` | Demo plugin: SQLite schema and row count diffing (also a reference for plugin authors) |
1616

17-
Shared test fixtures live in `test-vectors/`. Authoritative architecture spec is `docs/design.md`.
17+
Shared test fixtures live in `test-vectors/`. ADRs live in `docs/adr/`.
1818

1919
## Key Architectural Rules
2020

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,8 @@ Generic diff tools don't understand data formats, while version control systems
7474
## Documentation
7575

7676
- [tutorial.md](docs/tutorial.md): end-to-end contributor walkthrough.
77-
- Start with [docs/design.md](docs/design.md) for the current architectural contract.
7877
- [test-vectors/](test-vectors/): fixtures demonstrating major capabilities.
78+
- [docs/adr/](docs/adr/): records of architectural decisions.
7979

8080
## Quick Start
8181

@@ -139,7 +139,7 @@ Or with `uvx`, no install needed:
139139
uvx binoc --with binoc-sqlite diff snapshots/v1 snapshots/v2
140140
```
141141

142-
Plugins can be Rust crates (compiled as native shared libraries via the `export_plugin!` macro) or pure Python. See [docs/design.md](docs/design.md) for architecture and [model-plugins/](model-plugins/) for reference implementations.
142+
Plugins can be Rust crates (compiled as native shared libraries via the `export_plugin!` macro) or pure Python. See [docs/adr/](docs/adr/) for architecture and [model-plugins/](model-plugins/) for reference implementations.
143143

144144
### Rust-only CLI
145145

binoc-cli/tests/cli.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,8 @@ comparators:
177177
- binoc.csv
178178
- binoc.text
179179
- binoc.binary
180-
transformers: []
180+
transformers:
181+
- binoc.tabular_analyzer
181182
"#,
182183
)
183184
.unwrap();

binoc-core/src/config.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ impl DatasetConfig {
6666
transformers: vec![
6767
"binoc.move_detector".into(),
6868
"binoc.copy_detector".into(),
69+
"binoc.tabular_analyzer".into(),
6970
"binoc.column_reorder_detector".into(),
7071
],
7172
renderers: default_renderers(),

binoc-core/src/controller.rs

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ impl Controller {
6161
///
6262
/// Implements the reopen walk: traverses the ancestor chain calling
6363
/// `reopen()` on each container comparator to reconstruct the
64-
/// scratchpad, then `compare()` at the target leaf to populate cache,
65-
/// and finally `extract()` on the last toucher.
64+
/// scratchpad, then `compare()` at the target leaf to regenerate
65+
/// artifacts, and finally `extract()` on the last toucher.
6666
pub fn extract(
6767
&self,
6868
changeset: &Changeset,
@@ -109,11 +109,18 @@ impl Controller {
109109
let comparator = self.find_comparator_by_name(comp_name).ok_or_else(|| {
110110
BinocError::Extract(format!("comparator '{comp_name}' not found in registry"))
111111
})?;
112-
let _ = comparator.compare(&current_pair, data.as_ref())?;
112+
let compare_result = comparator.compare(&current_pair, data.as_ref())?;
113113

114114
let mut target_node = target.clone();
115115
target_node.source_items = Some(current_pair);
116116

117+
match compare_result {
118+
CompareResult::Leaf(n) | CompareResult::Expand(n, _) => {
119+
target_node.artifacts = n.artifacts;
120+
}
121+
_ => {}
122+
}
123+
117124
if let Some(last_transformer_name) = target_node.transformed_by.last().cloned() {
118125
let transformer = self
119126
.find_transformer_by_name(&last_transformer_name)
@@ -446,17 +453,43 @@ impl Controller {
446453
}
447454
}
448455

456+
/// All fields are AND (every non-empty field must pass).
457+
/// Within each field, values are OR (any value satisfies that field).
458+
/// Empty/default fields are unconstrained (always pass).
459+
/// A descriptor with all fields empty/default matches nothing.
449460
fn transformer_matches(desc: &TransformerDescriptor, node: &DiffNode) -> bool {
450-
if !desc.match_types.is_empty() && desc.match_types.iter().any(|t| t == &node.item_type) {
451-
return true;
452-
}
453-
if !desc.match_tags.is_empty() && desc.match_tags.iter().any(|t| node.tags.contains(t)) {
454-
return true;
461+
let dominated = match desc.node_shape {
462+
NodeShapeFilter::Container => !node.children.is_empty(),
463+
NodeShapeFilter::Leaf => node.children.is_empty(),
464+
NodeShapeFilter::Any => true,
465+
};
466+
if !dominated {
467+
return false;
455468
}
456-
if !desc.match_actions.is_empty() && desc.match_actions.iter().any(|k| k == &node.action) {
457-
return true;
469+
470+
let types_ok =
471+
desc.match_types.is_empty() || desc.match_types.iter().any(|t| t == &node.item_type);
472+
let tags_ok =
473+
desc.match_tags.is_empty() || desc.match_tags.iter().any(|t| node.tags.contains(t));
474+
let actions_ok =
475+
desc.match_actions.is_empty() || desc.match_actions.iter().any(|k| k == &node.action);
476+
let artifacts_ok = desc.match_artifacts.is_empty()
477+
|| desc
478+
.match_artifacts
479+
.iter()
480+
.any(|req| node.artifacts.iter().any(|a| a.format == *req));
481+
482+
if !types_ok || !tags_ok || !actions_ok || !artifacts_ok {
483+
return false;
458484
}
459-
false
485+
486+
// At least one non-default field must be set, otherwise the
487+
// descriptor is unconstrained and matches nothing.
488+
!matches!(desc.node_shape, NodeShapeFilter::Any)
489+
|| !desc.match_types.is_empty()
490+
|| !desc.match_tags.is_empty()
491+
|| !desc.match_actions.is_empty()
492+
|| !desc.match_artifacts.is_empty()
460493
}
461494
}
462495

binoc-python/python/binoc/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,9 @@ def transform(self, node):
108108
match_types: list[str] = []
109109
match_tags: list[str] = []
110110
match_actions: list[str] = []
111+
node_shape: str = "any"
112+
"""Dispatch filter on node shape: ``"any"`` (default), ``"container"``
113+
(only nodes with children), or ``"leaf"`` (only childless nodes)."""
111114

112115
def can_handle(self, node: DiffNode) -> bool:
113116
"""Return True if this transformer should process the given node.

binoc-python/src/lib.rs

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,26 @@ use binoc_sdk::*;
1818

1919
use binoc_stdlib::renderers::markdown as md_renderer;
2020

21+
fn restore_transform_transient_fields(
22+
result: &mut TransformResult,
23+
source_items: &Option<ItemPair>,
24+
artifacts: &[ArtifactDescriptor],
25+
) {
26+
match result {
27+
TransformResult::Replace(ref mut node) => {
28+
node.source_items = source_items.clone();
29+
node.artifacts = artifacts.to_vec();
30+
}
31+
TransformResult::ReplaceMany(ref mut nodes) => {
32+
for node in nodes.iter_mut() {
33+
node.source_items = source_items.clone();
34+
node.artifacts = artifacts.to_vec();
35+
}
36+
}
37+
_ => {}
38+
}
39+
}
40+
2141
// ═══════════════════════════════════════════════════════════════════════════
2242
// Native plugin loader — loads Rust plugins via C ABI (libloading)
2343
// ═══════════════════════════════════════════════════════════════════════════
@@ -160,7 +180,18 @@ impl Comparator for NativeComparator {
160180
let response: CompareResponse = serde_json::from_str(&json)
161181
.map_err(|e| BinocError::Other(format!("deserialize CompareResponse: {e}")))?;
162182
match response {
163-
CompareResponse::Ok { result } => Ok(*result),
183+
CompareResponse::Ok {
184+
mut result,
185+
artifacts,
186+
} => {
187+
match result.as_mut() {
188+
CompareResult::Leaf(n) | CompareResult::Expand(n, _) => {
189+
n.artifacts = artifacts;
190+
}
191+
_ => {}
192+
}
193+
Ok(*result)
194+
}
164195
CompareResponse::Error { message } => Err(BinocError::Comparator {
165196
comparator: self.desc.name.clone(),
166197
message,
@@ -212,6 +243,7 @@ impl Comparator for NativeComparator {
212243
aspect: aspect.to_string(),
213244
data_root: data_root.to_string_lossy().to_string(),
214245
source_items: node.source_items.clone(),
246+
artifacts: node.artifacts.clone(),
215247
};
216248
let request_json = serde_json::to_string(&request).ok()?;
217249
let json = self
@@ -249,10 +281,12 @@ impl Transformer for NativeTransformer {
249281
Err(_) => return TransformResult::Unchanged,
250282
};
251283
let source_items = node.source_items.clone();
284+
let artifacts = node.artifacts.clone();
252285
let request = TransformRequest {
253286
node,
254287
data_root: data_root.to_string_lossy().to_string(),
255-
source_items,
288+
source_items: source_items.clone(),
289+
artifacts: artifacts.clone(),
256290
};
257291
let request_json = match serde_json::to_string(&request) {
258292
Ok(j) => j,
@@ -269,10 +303,12 @@ impl Transformer for NativeTransformer {
269303
Ok(r) => r,
270304
Err(_) => return TransformResult::Unchanged,
271305
};
272-
match response.into_result() {
306+
let mut result = match response.into_result() {
273307
Ok(r) => r,
274308
Err(_) => TransformResult::Unchanged,
275-
}
309+
};
310+
restore_transform_transient_fields(&mut result, &source_items, &artifacts);
311+
result
276312
}
277313

278314
fn extract(
@@ -288,6 +324,7 @@ impl Transformer for NativeTransformer {
288324
aspect: aspect.to_string(),
289325
data_root: data_root.to_string_lossy().to_string(),
290326
source_items: node.source_items.clone(),
327+
artifacts: node.artifacts.clone(),
291328
};
292329
let request_json = serde_json::to_string(&request).ok()?;
293330
let json = self
@@ -1236,10 +1273,21 @@ fn create_transformer_bridge(
12361273
.getattr("match_actions")
12371274
.and_then(|v| v.extract())
12381275
.unwrap_or_default();
1276+
let node_shape: NodeShapeFilter = obj
1277+
.getattr("node_shape")
1278+
.and_then(|v| v.extract::<String>())
1279+
.ok()
1280+
.and_then(|s| match s.as_str() {
1281+
"container" => Some(NodeShapeFilter::Container),
1282+
"leaf" => Some(NodeShapeFilter::Leaf),
1283+
_ => None,
1284+
})
1285+
.unwrap_or_default();
12391286
let desc = TransformerDescriptor::new(name)
12401287
.with_match_types(match_types)
12411288
.with_match_tags(match_tags)
1242-
.with_match_actions(match_actions);
1289+
.with_match_actions(match_actions)
1290+
.with_node_shape(node_shape);
12431291
Ok(PyTransformerBridge {
12441292
py_obj: obj.clone().unbind(),
12451293
desc,

0 commit comments

Comments
 (0)