|
| 1 | +//! YAML support for the built-in `workflow` plugin. |
| 2 | +//! |
| 3 | +//! This module provides deterministic parsing and normalization from YAML into the |
| 4 | +//! canonical JSON shape expected by `builtin.workflow`. |
| 5 | +//! |
| 6 | +//! Why YAML: |
| 7 | +//! - workflows are often authored as YAML in CI/CD and orchestration systems |
| 8 | +//! - YAML is human-friendly for quick iteration |
| 9 | +//! |
| 10 | +//! Determinism rules: |
| 11 | +//! - YAML is parsed into a serde_yaml::Value, then converted into serde_json::Value |
| 12 | +//! - mapping keys are normalized to stable strings |
| 13 | +//! - the resulting JSON is canonicalized (sorted keys) before hashing/usage |
| 14 | +//! |
| 15 | +//! I/O rules: |
| 16 | +//! - no filesystem/network I/O |
| 17 | +//! - caller provides YAML text/bytes |
| 18 | +//! |
| 19 | +//! Feature gates: |
| 20 | +//! - requires the `yaml` Cargo feature for `serde_yaml` dependency |
| 21 | +
|
| 22 | +#![cfg(all(feature = "builtin", feature = "yaml"))] |
| 23 | + |
| 24 | +use anyhow::{anyhow, Result}; |
| 25 | +use serde_json::Value; |
| 26 | + |
| 27 | +use signia_core::determinism::canonical_json::canonicalize_json; |
| 28 | + |
| 29 | +/// Parse workflow YAML to the canonical JSON shape used by SIGNIA. |
| 30 | +/// |
| 31 | +/// Expected YAML structure (example): |
| 32 | +/// |
| 33 | +/// ```yaml |
| 34 | +/// name: demo |
| 35 | +/// version: v1 |
| 36 | +/// nodes: |
| 37 | +/// - id: a |
| 38 | +/// type: http |
| 39 | +/// meta: |
| 40 | +/// url: https://example.com |
| 41 | +/// - id: b |
| 42 | +/// type: llm |
| 43 | +/// edges: |
| 44 | +/// - from: a |
| 45 | +/// to: b |
| 46 | +/// kind: data |
| 47 | +/// label: response |
| 48 | +/// ``` |
| 49 | +pub fn parse_workflow_yaml(yaml_text: &str) -> Result<Value> { |
| 50 | + if yaml_text.trim().is_empty() { |
| 51 | + return Err(anyhow!("workflow yaml is empty")); |
| 52 | + } |
| 53 | + |
| 54 | + let y: serde_yaml::Value = serde_yaml::from_str(yaml_text) |
| 55 | + .map_err(|e| anyhow!("failed to parse yaml: {e}"))?; |
| 56 | + |
| 57 | + let j = yaml_to_json(&y)?; |
| 58 | + let c = canonicalize_json(&j)?; |
| 59 | + Ok(c) |
| 60 | +} |
| 61 | + |
| 62 | +/// Convert YAML value to JSON deterministically. |
| 63 | +pub fn yaml_to_json(v: &serde_yaml::Value) -> Result<Value> { |
| 64 | + match v { |
| 65 | + serde_yaml::Value::Null => Ok(Value::Null), |
| 66 | + serde_yaml::Value::Bool(b) => Ok(Value::Bool(*b)), |
| 67 | + serde_yaml::Value::Number(n) => { |
| 68 | + // serde_yaml numbers can be i64/f64/u64 |
| 69 | + if let Some(i) = n.as_i64() { |
| 70 | + Ok(Value::Number(i.into())) |
| 71 | + } else if let Some(u) = n.as_u64() { |
| 72 | + Ok(Value::Number(serde_json::Number::from(u))) |
| 73 | + } else if let Some(f) = n.as_f64() { |
| 74 | + serde_json::Number::from_f64(f) |
| 75 | + .map(Value::Number) |
| 76 | + .ok_or_else(|| anyhow!("invalid float in yaml")) |
| 77 | + } else { |
| 78 | + Err(anyhow!("unknown numeric type in yaml")) |
| 79 | + } |
| 80 | + } |
| 81 | + serde_yaml::Value::String(s) => Ok(Value::String(s.clone())), |
| 82 | + serde_yaml::Value::Sequence(seq) => { |
| 83 | + let mut out = Vec::with_capacity(seq.len()); |
| 84 | + for item in seq { |
| 85 | + out.push(yaml_to_json(item)?); |
| 86 | + } |
| 87 | + Ok(Value::Array(out)) |
| 88 | + } |
| 89 | + serde_yaml::Value::Mapping(map) => { |
| 90 | + // YAML keys can be complex; we normalize keys to strings deterministically. |
| 91 | + // Strategy: |
| 92 | + // - if key is String => use it |
| 93 | + // - otherwise serialize key to YAML and use that as stable string |
| 94 | + let mut pairs: Vec<(String, Value)> = Vec::with_capacity(map.len()); |
| 95 | + for (k, v2) in map { |
| 96 | + let key = match k { |
| 97 | + serde_yaml::Value::String(s) => s.clone(), |
| 98 | + _ => { |
| 99 | + // Deterministic string for non-string keys |
| 100 | + // serde_yaml::to_string is stable for a single value. |
| 101 | + let ks = serde_yaml::to_string(k).unwrap_or_else(|_| "<key>".to_string()); |
| 102 | + ks.trim().to_string() |
| 103 | + } |
| 104 | + }; |
| 105 | + pairs.push((key, yaml_to_json(v2)?)); |
| 106 | + } |
| 107 | + |
| 108 | + // Deterministic ordering by key |
| 109 | + pairs.sort_by(|a, b| a.0.cmp(&b.0)); |
| 110 | + |
| 111 | + let mut obj = serde_json::Map::new(); |
| 112 | + for (k, v2) in pairs { |
| 113 | + obj.insert(k, v2); |
| 114 | + } |
| 115 | + Ok(Value::Object(obj)) |
| 116 | + } |
| 117 | + // Tagged/other variants (serde_yaml may represent as Mapping/Sequence/String already) |
| 118 | + _ => Err(anyhow!("unsupported yaml value kind")), |
| 119 | + } |
| 120 | +} |
| 121 | + |
| 122 | +/// Validate that the parsed workflow JSON matches the minimal required shape. |
| 123 | +/// This is a lightweight guard; the plugin will do strict validation as well. |
| 124 | +pub fn validate_workflow_json(j: &Value) -> Result<()> { |
| 125 | + let obj = j.as_object().ok_or_else(|| anyhow!("workflow must be a JSON object"))?; |
| 126 | + |
| 127 | + let name = obj.get("name").and_then(|v| v.as_str()).unwrap_or(""); |
| 128 | + if name.trim().is_empty() { |
| 129 | + return Err(anyhow!("workflow.name is required")); |
| 130 | + } |
| 131 | + |
| 132 | + let nodes = obj |
| 133 | + .get("nodes") |
| 134 | + .and_then(|v| v.as_array()) |
| 135 | + .ok_or_else(|| anyhow!("workflow.nodes must be an array"))?; |
| 136 | + if nodes.is_empty() { |
| 137 | + return Err(anyhow!("workflow.nodes must not be empty")); |
| 138 | + } |
| 139 | + |
| 140 | + // Basic node fields |
| 141 | + for n in nodes { |
| 142 | + let no = n.as_object().ok_or_else(|| anyhow!("workflow node must be an object"))?; |
| 143 | + let id = no.get("id").and_then(|v| v.as_str()).unwrap_or(""); |
| 144 | + let ty = no.get("type").and_then(|v| v.as_str()).unwrap_or(""); |
| 145 | + if id.is_empty() || ty.is_empty() { |
| 146 | + return Err(anyhow!("workflow node requires id and type")); |
| 147 | + } |
| 148 | + } |
| 149 | + |
| 150 | + // Edges optional but if present must be array |
| 151 | + if let Some(e) = obj.get("edges") { |
| 152 | + if !e.is_array() { |
| 153 | + return Err(anyhow!("workflow.edges must be an array")); |
| 154 | + } |
| 155 | + } |
| 156 | + |
| 157 | + Ok(()) |
| 158 | +} |
| 159 | + |
| 160 | +#[cfg(test)] |
| 161 | +mod tests { |
| 162 | + use super::*; |
| 163 | + |
| 164 | + #[test] |
| 165 | + fn parse_and_validate_yaml() { |
| 166 | + let y = r#" |
| 167 | +name: demo |
| 168 | +version: v1 |
| 169 | +nodes: |
| 170 | + - id: a |
| 171 | + type: http |
| 172 | + meta: |
| 173 | + url: https://example.com |
| 174 | + - id: b |
| 175 | + type: llm |
| 176 | +edges: |
| 177 | + - from: a |
| 178 | + to: b |
| 179 | + kind: data |
| 180 | + label: response |
| 181 | +"#; |
| 182 | + let j = parse_workflow_yaml(y).unwrap(); |
| 183 | + validate_workflow_json(&j).unwrap(); |
| 184 | + assert!(j.get("name").is_some()); |
| 185 | + assert!(j.get("nodes").is_some()); |
| 186 | + } |
| 187 | + |
| 188 | + #[test] |
| 189 | + fn yaml_mapping_key_sort_is_stable() { |
| 190 | + let y = r#" |
| 191 | +name: demo |
| 192 | +nodes: |
| 193 | + - id: b |
| 194 | + type: t |
| 195 | + - id: a |
| 196 | + type: t |
| 197 | +edges: [] |
| 198 | +"#; |
| 199 | + let j = parse_workflow_yaml(y).unwrap(); |
| 200 | + let s1 = serde_json::to_string(&j).unwrap(); |
| 201 | + let s2 = serde_json::to_string(&parse_workflow_yaml(y).unwrap()).unwrap(); |
| 202 | + assert_eq!(s1, s2); |
| 203 | + } |
| 204 | +} |
0 commit comments