Skip to content

Commit f179a8b

Browse files
GeorgeHahnclaude
andcommitted
Remove hardcoded newline injection from grammar generator
The grammar generator should not inject delimiters between samples — the grammar itself defines the output format. Users who want newline-delimited output can include "\n" in their start production. Updated example grammars, tests, and docs accordingly. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 5785000 commit f179a8b

File tree

3 files changed

+17
-8
lines changed

3 files changed

+17
-8
lines changed

examples/grammars/json.ebnf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
start = value , "\n" ;
2+
13
value = object | array | string | number | "true" | "false" | "null" ;
24
35
object = "{" [ pair { "," pair } ] "}" ;

lading_payload/README.grammar.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
# Grammar payload
22

33
Generates structured data from EBNF, PEG, or ANTLR v4 grammar files using
4-
[barkus](https://github.com/DataDog/barkus). Each generated sample is a
5-
newline-delimited byte sequence conforming to the grammar.
4+
[barkus](https://github.com/DataDog/barkus). Samples are concatenated directly
5+
with no injected delimiters — if you need newline-delimited output, include a
6+
trailing `\n` in your grammar's start production.
67

78
## Using it in a lading config
89

@@ -69,6 +70,9 @@ generator:
6970
## Example EBNF grammar (simplified JSON)
7071

7172
```ebnf
73+
(* Wrap the start production with a trailing newline for line-delimited output. *)
74+
start = value , "\n" ;
75+
7276
value = object | array | string | number | "true" | "false" | "null" ;
7377
7478
object = "{" [ pair { "," pair } ] "}" ;

lading_payload/src/grammar.rs

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
//! This module wraps barkus — a structure-aware grammar-based data generator —
44
//! as a lading payload type. It accepts EBNF, PEG, or ANTLR v4 grammar files
55
//! and generates conforming structured output.
6+
//!
7+
//! Unlike other lading payload types, the grammar generator does **not** inject
8+
//! delimiters (e.g. newlines) between samples. If you need newline-delimited
9+
//! output, include the newline in your grammar's start production.
610
711
use std::{io::Write, path::PathBuf};
812

@@ -161,13 +165,11 @@ impl crate::Serialize for Grammar {
161165
continue;
162166
}
163167
consecutive_failures = 0;
164-
// +1 for the trailing newline
165-
let needed = sample.len() + 1;
168+
let needed = sample.len();
166169
let Some(remainder) = bytes_remaining.checked_sub(needed) else {
167170
break;
168171
};
169172
writer.write_all(&sample)?;
170-
writer.write_all(b"\n")?;
171173
bytes_remaining = remainder;
172174
}
173175
Err(GenerateError::BudgetExhausted { .. }) => {
@@ -232,7 +234,8 @@ mod test {
232234

233235
#[test]
234236
fn ebnf_generates_expected_output() {
235-
let source = "greeting = \"hello\" | \"world\" ;";
237+
// Grammar includes a trailing newline so each sample is its own line.
238+
let source = "greeting = ( \"hello\" | \"world\" ) , \"\\n\" ;";
236239
let (_dir, config) = config_from_source(source, GrammarFormat::Ebnf, "ebnf");
237240

238241
let mut grammar = Grammar::new(&config).unwrap();
@@ -256,7 +259,7 @@ mod test {
256259

257260
#[test]
258261
fn peg_generates_expected_output() {
259-
let source = "greeting <- \"hello\" / \"world\"";
262+
let source = "greeting <- (\"hello\" / \"world\") \"\\n\"";
260263
let (_dir, config) = config_from_source(source, GrammarFormat::Peg, "peg");
261264

262265
let mut grammar = Grammar::new(&config).unwrap();
@@ -278,7 +281,7 @@ mod test {
278281

279282
#[test]
280283
fn antlr_generates_expected_output() {
281-
let source = "grammar Test;\ngreeting : 'hello' | 'world' ;";
284+
let source = "grammar Test;\ngreeting : ('hello' | 'world') '\\n' ;";
282285
let (_dir, config) = config_from_source(source, GrammarFormat::Antlr, "g4");
283286

284287
let mut grammar = Grammar::new(&config).unwrap();

0 commit comments

Comments
 (0)