Skip to content

Commit 880af7c

Browse files
authored
Create yaml.rs
1 parent 18e6e17 commit 880af7c

1 file changed

Lines changed: 204 additions & 0 deletions

File tree

  • crates/signia-plugins/src/builtin/workflow
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
//! YAML support for the built-in `workflow` plugin.
2+
//!
3+
//! This module provides deterministic parsing and normalization from YAML into the
4+
//! canonical JSON shape expected by `builtin.workflow`.
5+
//!
6+
//! Why YAML:
7+
//! - workflows are often authored as YAML in CI/CD and orchestration systems
8+
//! - YAML is human-friendly for quick iteration
9+
//!
10+
//! Determinism rules:
11+
//! - YAML is parsed into a serde_yaml::Value, then converted into serde_json::Value
12+
//! - mapping keys are normalized to stable strings
13+
//! - the resulting JSON is canonicalized (sorted keys) before hashing/usage
14+
//!
15+
//! I/O rules:
16+
//! - no filesystem/network I/O
17+
//! - caller provides YAML text/bytes
18+
//!
19+
//! Feature gates:
20+
//! - requires the `yaml` Cargo feature for `serde_yaml` dependency
21+
22+
#![cfg(all(feature = "builtin", feature = "yaml"))]
23+
24+
use anyhow::{anyhow, Result};
25+
use serde_json::Value;
26+
27+
use signia_core::determinism::canonical_json::canonicalize_json;
28+
29+
/// Parse workflow YAML to the canonical JSON shape used by SIGNIA.
30+
///
31+
/// Expected YAML structure (example):
32+
///
33+
/// ```yaml
34+
/// name: demo
35+
/// version: v1
36+
/// nodes:
37+
/// - id: a
38+
/// type: http
39+
/// meta:
40+
/// url: https://example.com
41+
/// - id: b
42+
/// type: llm
43+
/// edges:
44+
/// - from: a
45+
/// to: b
46+
/// kind: data
47+
/// label: response
48+
/// ```
49+
pub fn parse_workflow_yaml(yaml_text: &str) -> Result<Value> {
50+
if yaml_text.trim().is_empty() {
51+
return Err(anyhow!("workflow yaml is empty"));
52+
}
53+
54+
let y: serde_yaml::Value = serde_yaml::from_str(yaml_text)
55+
.map_err(|e| anyhow!("failed to parse yaml: {e}"))?;
56+
57+
let j = yaml_to_json(&y)?;
58+
let c = canonicalize_json(&j)?;
59+
Ok(c)
60+
}
61+
62+
/// Convert YAML value to JSON deterministically.
63+
pub fn yaml_to_json(v: &serde_yaml::Value) -> Result<Value> {
64+
match v {
65+
serde_yaml::Value::Null => Ok(Value::Null),
66+
serde_yaml::Value::Bool(b) => Ok(Value::Bool(*b)),
67+
serde_yaml::Value::Number(n) => {
68+
// serde_yaml numbers can be i64/f64/u64
69+
if let Some(i) = n.as_i64() {
70+
Ok(Value::Number(i.into()))
71+
} else if let Some(u) = n.as_u64() {
72+
Ok(Value::Number(serde_json::Number::from(u)))
73+
} else if let Some(f) = n.as_f64() {
74+
serde_json::Number::from_f64(f)
75+
.map(Value::Number)
76+
.ok_or_else(|| anyhow!("invalid float in yaml"))
77+
} else {
78+
Err(anyhow!("unknown numeric type in yaml"))
79+
}
80+
}
81+
serde_yaml::Value::String(s) => Ok(Value::String(s.clone())),
82+
serde_yaml::Value::Sequence(seq) => {
83+
let mut out = Vec::with_capacity(seq.len());
84+
for item in seq {
85+
out.push(yaml_to_json(item)?);
86+
}
87+
Ok(Value::Array(out))
88+
}
89+
serde_yaml::Value::Mapping(map) => {
90+
// YAML keys can be complex; we normalize keys to strings deterministically.
91+
// Strategy:
92+
// - if key is String => use it
93+
// - otherwise serialize key to YAML and use that as stable string
94+
let mut pairs: Vec<(String, Value)> = Vec::with_capacity(map.len());
95+
for (k, v2) in map {
96+
let key = match k {
97+
serde_yaml::Value::String(s) => s.clone(),
98+
_ => {
99+
// Deterministic string for non-string keys
100+
// serde_yaml::to_string is stable for a single value.
101+
let ks = serde_yaml::to_string(k).unwrap_or_else(|_| "<key>".to_string());
102+
ks.trim().to_string()
103+
}
104+
};
105+
pairs.push((key, yaml_to_json(v2)?));
106+
}
107+
108+
// Deterministic ordering by key
109+
pairs.sort_by(|a, b| a.0.cmp(&b.0));
110+
111+
let mut obj = serde_json::Map::new();
112+
for (k, v2) in pairs {
113+
obj.insert(k, v2);
114+
}
115+
Ok(Value::Object(obj))
116+
}
117+
// Tagged/other variants (serde_yaml may represent as Mapping/Sequence/String already)
118+
_ => Err(anyhow!("unsupported yaml value kind")),
119+
}
120+
}
121+
122+
/// Validate that the parsed workflow JSON matches the minimal required shape.
123+
/// This is a lightweight guard; the plugin will do strict validation as well.
124+
pub fn validate_workflow_json(j: &Value) -> Result<()> {
125+
let obj = j.as_object().ok_or_else(|| anyhow!("workflow must be a JSON object"))?;
126+
127+
let name = obj.get("name").and_then(|v| v.as_str()).unwrap_or("");
128+
if name.trim().is_empty() {
129+
return Err(anyhow!("workflow.name is required"));
130+
}
131+
132+
let nodes = obj
133+
.get("nodes")
134+
.and_then(|v| v.as_array())
135+
.ok_or_else(|| anyhow!("workflow.nodes must be an array"))?;
136+
if nodes.is_empty() {
137+
return Err(anyhow!("workflow.nodes must not be empty"));
138+
}
139+
140+
// Basic node fields
141+
for n in nodes {
142+
let no = n.as_object().ok_or_else(|| anyhow!("workflow node must be an object"))?;
143+
let id = no.get("id").and_then(|v| v.as_str()).unwrap_or("");
144+
let ty = no.get("type").and_then(|v| v.as_str()).unwrap_or("");
145+
if id.is_empty() || ty.is_empty() {
146+
return Err(anyhow!("workflow node requires id and type"));
147+
}
148+
}
149+
150+
// Edges optional but if present must be array
151+
if let Some(e) = obj.get("edges") {
152+
if !e.is_array() {
153+
return Err(anyhow!("workflow.edges must be an array"));
154+
}
155+
}
156+
157+
Ok(())
158+
}
159+
160+
#[cfg(test)]
161+
mod tests {
162+
use super::*;
163+
164+
#[test]
165+
fn parse_and_validate_yaml() {
166+
let y = r#"
167+
name: demo
168+
version: v1
169+
nodes:
170+
- id: a
171+
type: http
172+
meta:
173+
url: https://example.com
174+
- id: b
175+
type: llm
176+
edges:
177+
- from: a
178+
to: b
179+
kind: data
180+
label: response
181+
"#;
182+
let j = parse_workflow_yaml(y).unwrap();
183+
validate_workflow_json(&j).unwrap();
184+
assert!(j.get("name").is_some());
185+
assert!(j.get("nodes").is_some());
186+
}
187+
188+
#[test]
189+
fn yaml_mapping_key_sort_is_stable() {
190+
let y = r#"
191+
name: demo
192+
nodes:
193+
- id: b
194+
type: t
195+
- id: a
196+
type: t
197+
edges: []
198+
"#;
199+
let j = parse_workflow_yaml(y).unwrap();
200+
let s1 = serde_json::to_string(&j).unwrap();
201+
let s2 = serde_json::to_string(&parse_workflow_yaml(y).unwrap()).unwrap();
202+
assert_eq!(s1, s2);
203+
}
204+
}

0 commit comments

Comments
 (0)