diff --git a/crates/arco-cli/src/inspect.rs b/crates/arco-cli/src/inspect.rs index 9d523a3..acae782 100644 --- a/crates/arco-cli/src/inspect.rs +++ b/crates/arco-cli/src/inspect.rs @@ -272,7 +272,6 @@ fn build_set_records(program: &SemanticProgram, filtered_set_names: &[&str]) -> } fn find_set_alias(program: &SemanticProgram, set_name: &str) -> Option { - // Check variable families for index aliases that map to this set for family in &program.variable_families { for (index, domain) in &family.index_domains { if domain == set_name && index != set_name { @@ -280,7 +279,6 @@ fn find_set_alias(program: &SemanticProgram, set_name: &str) -> Option { } } } - // Check constraint generation bindings for constraint in &program.active_constraints { for binding in &constraint.generation_bindings { if binding.domain == set_name && binding.variable != set_name { @@ -295,11 +293,9 @@ fn infer_set_dtype(resolved: &arco_kdl::semantic::ResolvedSet) -> String { if resolved.values.is_empty() { return "string".to_string(); } - // Check if all values are integers if resolved.values.iter().all(|v| v.parse::().is_ok()) { return "int".to_string(); } - // Check if all values are floats if resolved.values.iter().all(|v| v.parse::().is_ok()) { return "float64".to_string(); } @@ -893,7 +889,6 @@ fn collect_term_refs_from_expr( }) .collect(); - // Check if this target is already in the output if !out.iter().any(|r| r.name == *target) { out.push(TermRef { name: target.clone(), @@ -1064,7 +1059,6 @@ fn build_objective_terms( }); } Expr::Reduction(reduction) => { - // Check if the body references named expressions let body_terms = split_additive_terms(&reduction.body); let mut found_expressions = false; for body_term in &body_terms { diff --git a/crates/arco-cli/tests/example_cli_commands.rs b/crates/arco-cli/tests/example_cli_commands.rs index a3edeb6..75c2917 100644 --- a/crates/arco-cli/tests/example_cli_commands.rs +++ b/crates/arco-cli/tests/example_cli_commands.rs @@ -46,7 +46,6 @@ fn inspect_produces_valid_toml() { let stdout = String::from_utf8_lossy(&output.stdout); let parsed: toml::Value = toml::from_str(&stdout).expect("output should be valid TOML"); - // Check top-level sections exist assert!(parsed.get("meta").is_some(), "should have meta section"); assert!(parsed.get("set").is_some(), "should have set section"); assert!( @@ -84,7 +83,6 @@ fn inspect_json_produces_valid_json() { let payload: Value = serde_json::from_slice(&output.stdout).expect("valid inspect json"); - // Check structure matches TOML layout assert!(payload.get("meta").is_some()); assert!(payload.get("set").is_some()); assert!(payload.get("variable").is_some()); @@ -92,7 +90,6 @@ fn inspect_json_produces_valid_json() { assert!(payload.get("constraint").is_some()); assert!(payload.get("objective").is_some()); - // Check counts let counts = payload["meta"]["counts"] .as_object() .expect("counts object"); diff --git a/crates/arco-kdl/src/semantic/error.rs b/crates/arco-kdl/src/semantic/error.rs index 7984280..70c3df6 100644 --- a/crates/arco-kdl/src/semantic/error.rs +++ b/crates/arco-kdl/src/semantic/error.rs @@ -143,7 +143,7 @@ pub enum SemanticError { #[diagnostic( code(arco::semantic::unresolved_rule_set_filter_identifier), help( - "if token is a categorical value, quote it in filter, e.g. `where {{ a == \"north\" }}`" + "if token is a categorical value, quote it in filter, e.g. `filter {{ a == \"north\" }}`" ) )] UnresolvedRuleSetFilterIdentifier { diff --git a/crates/arco-kdl/src/source/parser.rs b/crates/arco-kdl/src/source/parser.rs index 4fd95e2..7119daf 100644 --- a/crates/arco-kdl/src/source/parser.rs +++ b/crates/arco-kdl/src/source/parser.rs @@ -165,15 +165,15 @@ fn parse_data(node: &KdlNode, context: &ParseContext<'_>) -> Result {} + other => return Err(unsupported_declaration_error(child, other, context)), + } + } + Ok(crate::source::ParamDecl { name: first_arg_string(node, 0, context)?, value: positional_value(node, &indices, context)?, @@ -264,7 +271,7 @@ fn parse_set(node: &KdlNode, context: &ParseContext<'_>) -> Result { + "filter" => { filter_expression = Some(algebra_text_from_node(child, context)?); } member => { @@ -562,31 +569,17 @@ data "D" source="file.csv" { } #[test] - fn data_from_property_parses_same_as_source() { + fn data_from_property_is_rejected() { let path = PathBuf::from("test.kdl"); - let with_source = r#" -data "D" source="file.csv" { - map "X" from="name" -} -"#; - let with_from = r#" data "D" from="file.csv" { map "X" from="name" } "#; - let source_parsed = parse_program_text(with_source, &path).expect("source= syntax parses"); - let from_parsed = parse_program_text(with_from, &path).expect("from= syntax parses"); - - assert_eq!(source_parsed.program.data.len(), 1); - assert_eq!(from_parsed.program.data.len(), 1); - assert_eq!( - source_parsed.program.data[0].source, - from_parsed.program.data[0].source - ); - assert_eq!(from_parsed.program.data[0].source, "file.csv"); + let error = parse_program_text(with_from, &path).expect_err("from= should be rejected"); + assert!(error.to_string().contains("source")); } #[test] @@ -610,30 +603,19 @@ data "D" from="file.csv" { } #[test] - fn where_keyword_parses_same_as_filter() { + fn where_keyword_is_rejected() { let path = PathBuf::from("test.kdl"); let with_where = r#"set "thermal" { in "gen"; where { type == "thermal" } }"#; - let with_filter = r#"set "thermal" { in "gen"; filter { type == "thermal" } }"#; - let where_parsed = parse_program_text(with_where, &path).expect("where syntax parses"); - let filter_parsed = parse_program_text(with_filter, &path).expect("filter syntax parses"); - - assert_eq!(where_parsed.program.sets.len(), 1); - assert_eq!(filter_parsed.program.sets.len(), 1); - - assert_eq!( - where_parsed.program.sets[0].filter_expression, - filter_parsed.program.sets[0].filter_expression - ); - assert!(where_parsed.program.sets[0].filter_expression.is_some()); + parse_program_text(with_where, &path).expect_err("where syntax should fail"); } #[test] fn mixed_old_and_new_syntax_parses() { let path = PathBuf::from("test.kdl"); - // Mix of old (control, map, alias=, filter) and new (var, alias, as=, where) + // Mix of legacy-compatible and canonical forms we still support. let mixed = r#" data "D" source="file.csv" { map "old_col" from="x" @@ -654,16 +636,13 @@ scenario "S" { use "M" } let parsed = parse_program_text(mixed, &path).expect("mixed syntax parses"); - // Verify data block assert_eq!(parsed.program.data.len(), 1); assert_eq!(parsed.program.data[0].maps.len(), 2); - // Verify sets assert_eq!(parsed.program.sets.len(), 2); assert_eq!(parsed.program.sets[0].alias, Some("o".to_string())); assert_eq!(parsed.program.sets[1].alias, Some("n".to_string())); - // Verify model controls (both var and control) assert_eq!(parsed.program.models.len(), 1); assert_eq!(parsed.program.models[0].controls.len(), 2); } diff --git a/crates/arco-kdl/src/source/parser_helpers.rs b/crates/arco-kdl/src/source/parser_helpers.rs index e9ee21d..1e6da8b 100644 --- a/crates/arco-kdl/src/source/parser_helpers.rs +++ b/crates/arco-kdl/src/source/parser_helpers.rs @@ -51,7 +51,7 @@ pub(super) fn parse_optional_filter_expression( context: &ParseContext<'_>, ) -> Result, SourceError> { for child in node.iter_children() { - if matches!(child.name().value(), "filter" | "where") { + if child.name().value() == "filter" { return Ok(Some(algebra_text_from_node(child, context)?)); } } @@ -91,7 +91,7 @@ pub(super) fn declaration_indices( for child in node.iter_children() { match child.name().value() { - "filter" | "where" | "bounds" => {} + "filter" | "bounds" => {} "index" => { let index_name = first_arg_string(child, 0, context)?; let domain = child diff --git a/crates/arco-kdl/src/source/surface.rs b/crates/arco-kdl/src/source/surface.rs index 878d9db..c8604b0 100644 --- a/crates/arco-kdl/src/source/surface.rs +++ b/crates/arco-kdl/src/source/surface.rs @@ -26,7 +26,6 @@ fn rewrite_math_block_at(text: &str, start: usize) -> Option<(String, usize)> { b'm' => rewrite_math_block(text, start, "minimize") .or_else(|| rewrite_math_block(text, start, "maximize")), b'u' => rewrite_math_block(text, start, "upper"), - b'w' => rewrite_math_block(text, start, "where"), _ => None, } } @@ -119,7 +118,7 @@ fn rewrite_math_block(text: &str, start: usize, keyword: &str) -> Option<(String "constraint" => format!("{header} expression={encoded_body}"), "expression" => format!("{header} {{ formula {encoded_body} }}"), "minimize" | "maximize" => format!("{header} expression={encoded_body}"), - "lower" | "upper" | "if" | "filter" | "where" => { + "lower" | "upper" | "if" | "filter" => { format!("{header} expression={encoded_body}") } _ => return None, diff --git a/crates/arco-kdl/tests/compile_suite.rs b/crates/arco-kdl/tests/compile_suite.rs index a70986f..9511da1 100644 --- a/crates/arco-kdl/tests/compile_suite.rs +++ b/crates/arco-kdl/tests/compile_suite.rs @@ -480,7 +480,7 @@ data "links" source="data/links.csv" { index "i" { in "tech" } index "g" { in "generators" } index "b" { in "buses" } - where { feasible > 0 } + filter { feasible > 0 } } } @@ -548,7 +548,7 @@ data "links" source="data/links.csv" { index "i" { in "tech" } index "g" { in "generators" } index "b" { in "buses" } - where { feasible > 0 } + filter { feasible > 0 } } } @@ -621,7 +621,7 @@ data "links" source="data/links.csv" { index "i" { in "tech" } index "g" { in "generators" } index "b" { in "buses" } - where { feasible > 0 } + filter { feasible > 0 } } } @@ -630,7 +630,7 @@ set "feasible_links" { index "i" { in "tech" } index "g" { in "generators" } index "b" { in "buses" } - where { a == "1" } + filter { a == "1" } } model "TupleDispatch" { @@ -698,7 +698,7 @@ data "links" source="data/links.csv" { index "i" { in "tech" } index "g" { in "generators" } index "b" { in "buses" } - where { feasible > 0 } + filter { feasible > 0 } } } @@ -706,7 +706,7 @@ set "target_pairs" { in "feasible_links" index "a" { in "area" } index "i" { in "tech" } - where { generators == "g1" } + filter { generators == "g1" } } model "TupleDispatch" { @@ -782,7 +782,7 @@ data "links" source="data/links.csv" { index "i" { in "tech" } index "g" { in "generators" } index "b" { in "buses" } - where { feasible > 0 } + filter { feasible > 0 } } } @@ -850,7 +850,7 @@ data "links" source="data/links.csv" { index "i" { in "tech" } index "g" { in "generators" } index "b" { in "buses" } - where { feasible > 0 } + filter { feasible > 0 } } } @@ -916,7 +916,7 @@ data "links" source="data/links.csv" { index "i" { in "tech" } index "g" { in "generators" } index "b" { in "buses" } - where { feasible > 0 } + filter { feasible > 0 } } } @@ -1073,7 +1073,7 @@ data "links" source="data/links.csv" { index "i" { in "tech" } index "g" { in "generators" } index "b" { in "buses" } - where { feasible > 0 } + filter { feasible > 0 } } } diff --git a/crates/arco-kdl/tests/semantic_validation.rs b/crates/arco-kdl/tests/semantic_validation.rs index d34c8e8..61a36d9 100644 --- a/crates/arco-kdl/tests/semantic_validation.rs +++ b/crates/arco-kdl/tests/semantic_validation.rs @@ -649,7 +649,7 @@ data "links" source="data/links.csv" { index "i" { in "tech" } index "g" { in "generators" } index "b" { in "buses" } - where { feasible > 0 } + filter { feasible > 0 } } } @@ -692,7 +692,7 @@ set "tech" { "wind"; "solar" } set "feasible_links" { index "a" { in "area" } index "i" { in "tech" } - where { unknown_col == "1" } + filter { unknown_col == "1" } } model "Dispatch" { @@ -894,7 +894,7 @@ data "links" source="data/links.csv" { index "i" { in "tech" } index "g" { in "generators" } index "b" { in "buses" } - where { feasible > 0 } + filter { feasible > 0 } } } diff --git a/docs/appendix-a-ergonomic-profile.md b/docs/appendix-a-ergonomic-profile.md index 93e0b5b..f5c06a6 100644 --- a/docs/appendix-a-ergonomic-profile.md +++ b/docs/appendix-a-ergonomic-profile.md @@ -51,7 +51,7 @@ canonical `set { in ...; filter { ... } }` syntax defined in This is a usage guide, not new grammar. ```kdl -data branch_data from="data/branches.csv" { +data branch_data source="data/branches.csv" { set edge set active_edge { in edge; filter { conex == 1 } } param conex index=edge @@ -82,7 +82,7 @@ data branch_data from="data/branches.csv" { > index i { in tech } > index g { in generators } > index b { in buses } -> where { feasible > 0 } +> filter { feasible > 0 } > } > } > @@ -92,7 +92,7 @@ data branch_data from="data/branches.csv" { > index i { in tech } > index g { in generators } > index b { in buses } -> where { area == "south" } +> filter { area == "south" } > } > > model nodal_allocation { diff --git a/docs/arco-spec.md b/docs/arco-spec.md index 4ec2c5c..3c6b358 100644 --- a/docs/arco-spec.md +++ b/docs/arco-spec.md @@ -392,17 +392,16 @@ Required properties: - `source`: CSV file path. Relative paths are resolved from the directory containing the `.kdl` file being parsed. Absolute paths are also accepted. - - `from`: compatibility alias for `source` (legacy syntax). Prefer `source` in - new files. - CSV parsing: Arco expects RFC 4180-compliant CSV files (comma-delimited, - optional double-quote escaping, CRLF or LF line endings). The first row MUST be - a header row containing column names. Column names in the header MUST be unique; - duplicate column names MUST fail validation (see - [§10](#10-validation-requirements), rule 73). CSV files MUST be UTF-8 encoded. - Implementations SHOULD accept files with or without a UTF-8 BOM. Empty cells in - a numeric column MUST fail validation. Empty cells in a string/categorical - column are treated as empty strings. Column matching is always by name, not by - position. + +CSV parsing: Arco expects RFC 4180-compliant CSV files (comma-delimited, +optional double-quote escaping, CRLF or LF line endings). The first row MUST be +a header row containing column names. Column names in the header MUST be unique; +duplicate column names MUST fail validation (see +[§10](#10-validation-requirements), rule 73). CSV files MUST be UTF-8 encoded. +Implementations SHOULD accept files with or without a UTF-8 BOM. Empty cells in +a numeric column MUST fail validation. Empty cells in a string/categorical +column are treated as empty strings. Column matching is always by name, not by +position. Allowed children: diff --git a/examples/nodal-allocation/input.kdl b/examples/nodal-allocation/input.kdl index e90a383..3183a42 100644 --- a/examples/nodal-allocation/input.kdl +++ b/examples/nodal-allocation/input.kdl @@ -12,7 +12,7 @@ data "links" source="data/links.csv" { index "i" { in "tech" } index "g" { in "generators" } index "b" { in "buses" } - where { feasible > 0 } + filter { feasible > 0 } } param "capacity_mw" { @@ -36,7 +36,7 @@ set "priority_links" { index "i" { in "tech" } index "g" { in "generators" } index "b" { in "buses" } - where { area == "south" } + filter { area == "south" } } model "NodalAllocation" { diff --git a/scripts/.gitkeep b/scripts/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/tools/tree-sitter-arco-kdl/examples/highlight_demo.kdl b/tools/tree-sitter-arco-kdl/examples/highlight_demo.kdl index 93ba2d9..2790b44 100644 --- a/tools/tree-sitter-arco-kdl/examples/highlight_demo.kdl +++ b/tools/tree-sitter-arco-kdl/examples/highlight_demo.kdl @@ -6,7 +6,7 @@ set time alias=t { member 3 } -data generators from="data/generators.csv" { +data generators source="data/generators.csv" { map gen from=generator param capacity_mw index=gen param fuel_cost from=cost_per_mwh index=gen