Skip to content

Commit e776055

Browse files
tobiaslinsclaude
andauthored
Fix heredoc leading whitespace stripping (#52)
* Fix heredoc leading whitespace stripping The heredoc content was losing leading whitespace due to two issues: 1. In lexer.ts, skipWhitespace() was called before checking for pending heredocs, which stripped leading whitespace from the first line of heredoc content. Fixed by moving the heredoc check before skipWhitespace(). 2. In Bash.ts, the script normalization (trimStart on every line) was stripping whitespace from heredoc content. Fixed by making the normalizeScript function heredoc-aware so it preserves content inside heredocs while still normalizing the surrounding script. Added tests for whitespace preservation in heredocs. Co-Authored-By: Claude Opus 4.5 <[email protected]> * Fix heredoc leading whitespace stripping The heredoc content was losing leading whitespace due to two issues: 1. In lexer.ts, skipWhitespace() was called before checking for pending heredocs, which stripped leading whitespace from the first line of heredoc content. Fixed by moving the heredoc check before skipWhitespace(). 2. In Bash.ts, the script normalization (trimStart on every line) was stripping whitespace from heredoc content. Fixed by making the normalizeScript function heredoc-aware so it preserves content inside heredocs while still normalizing the surrounding script. Added tests for whitespace preservation in heredocs. Co-Authored-By: Claude Opus 4.5 <[email protected]> * Fix heredoc delimiter matching edge cases 1. Don't use trimStart() for regular << heredocs - delimiter must match exactly. A line like " EOF" should be content, not end the heredoc. 2. Allow hyphens in heredoc delimiters (e.g., END-TEST) by using [\w-]+ instead of \w+ in the regex pattern. Added tests for both edge cases. Co-Authored-By: Claude Opus 4.5 <[email protected]> * Fix heredoc delimiter matching edge cases 1. Don't use trimStart() for regular << heredocs - delimiter must match exactly. A line like " EOF" should be content, not end the heredoc. 2. Allow hyphens in heredoc delimiters (e.g., END-TEST) by using [\w-]+ instead of \w+ in the regex pattern. Added tests for both edge cases. Co-Authored-By: Claude Opus 4.5 <[email protected]> * Don't skip some read tests --------- Co-authored-by: Claude Opus 4.5 <[email protected]>
1 parent 71526aa commit e776055

File tree

4 files changed

+173
-11
lines changed

4 files changed

+173
-11
lines changed

src/Bash.ts

Lines changed: 63 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -351,12 +351,11 @@ export class Bash {
351351
};
352352

353353
// Normalize indented multi-line scripts (unless rawScript is true)
354+
// This allows writing indented bash scripts in template literals
355+
// BUT we must preserve whitespace inside heredoc content
354356
let normalized = commandLine;
355357
if (!options?.rawScript) {
356-
const normalizedLines = commandLine
357-
.split("\n")
358-
.map((line) => line.trimStart());
359-
normalized = normalizedLines.join("\n");
358+
normalized = normalizeScript(commandLine);
360359
}
361360

362361
try {
@@ -449,3 +448,63 @@ export class Bash {
449448
return { ...this.state.env };
450449
}
451450
}
451+
452+
/**
453+
* Normalize a script by stripping leading whitespace from lines,
454+
* while preserving whitespace inside heredoc content.
455+
*
456+
* This allows writing indented bash scripts in template literals:
457+
* ```
458+
* await bash.exec(`
459+
* if [ -f foo ]; then
460+
* echo "yes"
461+
* fi
462+
* `);
463+
* ```
464+
*
465+
* Heredocs are detected by looking for << or <<- operators and their delimiters.
466+
*/
467+
function normalizeScript(script: string): string {
468+
const lines = script.split("\n");
469+
const result: string[] = [];
470+
471+
// Stack of pending heredoc delimiters (for nested heredocs)
472+
const pendingDelimiters: { delimiter: string; stripTabs: boolean }[] = [];
473+
474+
for (let i = 0; i < lines.length; i++) {
475+
const line = lines[i];
476+
477+
// If we're inside a heredoc, check if this line ends it
478+
if (pendingDelimiters.length > 0) {
479+
const current = pendingDelimiters[pendingDelimiters.length - 1];
480+
// For <<-, strip leading tabs when checking delimiter
481+
// For <<, require exact match (no leading whitespace allowed)
482+
const lineToCheck = current.stripTabs ? line.replace(/^\t+/, "") : line;
483+
if (lineToCheck === current.delimiter) {
484+
// End of heredoc - this line can be normalized
485+
result.push(line.trimStart());
486+
pendingDelimiters.pop();
487+
continue;
488+
}
489+
// Inside heredoc - preserve the line exactly as-is
490+
result.push(line);
491+
continue;
492+
}
493+
494+
// Not inside a heredoc - normalize the line and check for heredoc starts
495+
const normalizedLine = line.trimStart();
496+
result.push(normalizedLine);
497+
498+
// Check for heredoc operators in this line
499+
// Match: <<DELIM, <<-DELIM, << 'DELIM', <<- "DELIM", etc.
500+
// Multiple heredocs on one line are possible: cmd <<EOF1 <<EOF2
501+
const heredocPattern = /<<(-?)\s*(['"]?)([\w-]+)\2/g;
502+
for (const match of normalizedLine.matchAll(heredocPattern)) {
503+
const stripTabs = match[1] === "-";
504+
const delimiter = match[3];
505+
pendingDelimiters.push({ delimiter, stripTabs });
506+
}
507+
}
508+
509+
return result.join("\n");
510+
}

src/parser/lexer.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -244,11 +244,8 @@ export class Lexer {
244244
const pendingHeredocs = this.pendingHeredocs;
245245

246246
while (this.pos < len) {
247-
this.skipWhitespace();
248-
249-
if (this.pos >= len) break;
250-
251-
// Check for pending here-documents after newline
247+
// Check for pending here-documents after newline BEFORE skipping whitespace
248+
// to preserve leading whitespace in heredoc content
252249
if (
253250
pendingHeredocs.length > 0 &&
254251
tokens.length > 0 &&
@@ -258,6 +255,10 @@ export class Lexer {
258255
continue;
259256
}
260257

258+
this.skipWhitespace();
259+
260+
if (this.pos >= len) break;
261+
261262
const token = this.nextToken();
262263
if (token) {
263264
tokens.push(token);

src/spec-tests/cases/builtin-read.test.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,6 @@ b
416416
## N-I dash/zsh stdout-json: ""
417417

418418
#### read with IFS=$'\n'
419-
## SKIP: IFS with newline character not implemented
420419
# The leading spaces are stripped if they appear in IFS.
421420
IFS=$(echo -e '\n')
422421
read var <<EOF
@@ -444,7 +443,6 @@ printf "%s\n" "[$a|$b|$c|$d]"
444443
## stdout: [ \a |b: c|d e|]
445444

446445
#### read with IFS=''
447-
## SKIP: Read with special IFS values not implemented
448446
IFS=''
449447
read x y <<EOF
450448
a b c d

src/syntax/here-document.test.ts

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,4 +125,108 @@ echo done`);
125125
expect(result.stdout).toBe("hello\ndone\n");
126126
expect(result.exitCode).toBe(0);
127127
});
128+
129+
describe("whitespace preservation", () => {
130+
it("should preserve leading spaces in here document content", async () => {
131+
const env = new Bash();
132+
const result = await env.exec(`cat <<EOF
133+
four spaces at start
134+
two spaces at start
135+
no spaces
136+
EOF`);
137+
expect(result.stdout).toBe(
138+
" four spaces at start\n two spaces at start\nno spaces\n",
139+
);
140+
expect(result.exitCode).toBe(0);
141+
});
142+
143+
it("should preserve leading tabs in here document content (not <<-)", async () => {
144+
const env = new Bash();
145+
const result = await env.exec(`cat <<EOF
146+
\tleading tab
147+
no tab
148+
EOF`);
149+
expect(result.stdout).toBe("\tleading tab\nno tab\n");
150+
expect(result.exitCode).toBe(0);
151+
});
152+
153+
it("should preserve mixed whitespace in here document", async () => {
154+
const env = new Bash();
155+
const result = await env.exec(`cat <<EOF
156+
spaces
157+
\ttab
158+
\tmixed
159+
EOF`);
160+
expect(result.stdout).toBe(" spaces\n\ttab\n \tmixed\n");
161+
expect(result.exitCode).toBe(0);
162+
});
163+
164+
it("should preserve whitespace even when script is indented", async () => {
165+
const env = new Bash();
166+
// This tests that the script normalization doesn't strip heredoc content
167+
const result = await env.exec(`
168+
cat <<EOF
169+
indented content
170+
more indented
171+
EOF
172+
`);
173+
expect(result.stdout).toBe(
174+
" indented content\n more indented\n",
175+
);
176+
expect(result.exitCode).toBe(0);
177+
});
178+
179+
it("should preserve ASCII art triangle with leading spaces", async () => {
180+
const env = new Bash();
181+
const result = await env.exec(`cat <<'EOF'
182+
*
183+
* *
184+
* *
185+
* *
186+
* *
187+
* *
188+
* *
189+
* *
190+
* *
191+
* *
192+
* *
193+
*********************
194+
EOF`);
195+
expect(result.stdout).toBe(` *
196+
* *
197+
* *
198+
* *
199+
* *
200+
* *
201+
* *
202+
* *
203+
* *
204+
* *
205+
* *
206+
*********************
207+
`);
208+
expect(result.exitCode).toBe(0);
209+
});
210+
211+
it("should not treat indented delimiter as end of heredoc", async () => {
212+
const env = new Bash();
213+
// A line with " EOF" (spaces before EOF) should be content, not delimiter
214+
const result = await env.exec(`cat <<EOF
215+
line 1
216+
EOF
217+
line 2
218+
EOF`);
219+
expect(result.stdout).toBe("line 1\n EOF\nline 2\n");
220+
expect(result.exitCode).toBe(0);
221+
});
222+
223+
it("should handle delimiter with hyphen", async () => {
224+
const env = new Bash();
225+
const result = await env.exec(`cat <<END-TEST
226+
content with spaces
227+
END-TEST`);
228+
expect(result.stdout).toBe(" content with spaces\n");
229+
expect(result.exitCode).toBe(0);
230+
});
231+
});
128232
});

0 commit comments

Comments
 (0)