Skip to content

Commit a9e9c99

Browse files
authored
Build system: Support Windows depfiles with unquoted, backslash escaped spaces (#20100)
1 parent 6375491 commit a9e9c99

File tree

2 files changed

+140
-44
lines changed

2 files changed

+140
-44
lines changed

lib/std/Build/Cache.zig

+11-2
Original file line numberDiff line numberDiff line change
@@ -860,14 +860,23 @@ pub const Manifest = struct {
860860

861861
var it: DepTokenizer = .{ .bytes = dep_file_contents };
862862

863-
while (true) {
864-
switch (it.next() orelse return) {
863+
while (it.next()) |token| {
864+
switch (token) {
865865
// We don't care about targets, we only want the prereqs
866866
// Clang is invoked in single-source mode but other programs may not
867867
.target, .target_must_resolve => {},
868868
.prereq => |file_path| if (self.manifest_file == null) {
869869
_ = try self.addFile(file_path, null);
870870
} else try self.addFilePost(file_path),
871+
.prereq_must_resolve => {
872+
var resolve_buf = std.ArrayList(u8).init(self.cache.gpa);
873+
defer resolve_buf.deinit();
874+
875+
try token.resolve(resolve_buf.writer());
876+
if (self.manifest_file == null) {
877+
_ = try self.addFile(resolve_buf.items, null);
878+
} else try self.addFilePost(resolve_buf.items);
879+
},
871880
else => |err| {
872881
try err.printError(error_buf.writer());
873882
log.err("failed parsing {s}: {s}", .{ dep_file_basename, error_buf.items });

lib/std/Build/Cache/DepTokenizer.zig

+129-42
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ pub fn next(self: *Tokenizer) ?Token {
158158
'"' => {
159159
self.index += 1;
160160
self.state = .rhs;
161-
return Token{ .prereq = self.bytes[start .. self.index - 1] };
161+
return finishPrereq(must_resolve, self.bytes[start .. self.index - 1]);
162162
},
163163
else => {
164164
self.index += 1;
@@ -167,11 +167,11 @@ pub fn next(self: *Tokenizer) ?Token {
167167
.prereq => switch (char) {
168168
'\t', ' ' => {
169169
self.state = .rhs;
170-
return Token{ .prereq = self.bytes[start..self.index] };
170+
return finishPrereq(must_resolve, self.bytes[start..self.index]);
171171
},
172172
'\n', '\r' => {
173173
self.state = .lhs;
174-
return Token{ .prereq = self.bytes[start..self.index] };
174+
return finishPrereq(must_resolve, self.bytes[start..self.index]);
175175
},
176176
'\\' => {
177177
self.state = .prereq_continuation;
@@ -185,12 +185,22 @@ pub fn next(self: *Tokenizer) ?Token {
185185
'\n' => {
186186
self.index += 1;
187187
self.state = .rhs;
188-
return Token{ .prereq = self.bytes[start .. self.index - 2] };
188+
return finishPrereq(must_resolve, self.bytes[start .. self.index - 2]);
189189
},
190190
'\r' => {
191191
self.state = .prereq_continuation_linefeed;
192192
self.index += 1;
193193
},
194+
'\\' => {
195+
// The previous \ wasn't a continuation, but this one might be.
196+
self.index += 1;
197+
},
198+
' ' => {
199+
// not continuation, but escaped space must be resolved
200+
must_resolve = true;
201+
self.state = .prereq;
202+
self.index += 1;
203+
},
194204
else => {
195205
// not continuation
196206
self.state = .prereq;
@@ -201,7 +211,7 @@ pub fn next(self: *Tokenizer) ?Token {
201211
'\n' => {
202212
self.index += 1;
203213
self.state = .rhs;
204-
return Token{ .prereq = self.bytes[start .. self.index - 1] };
214+
return finishPrereq(must_resolve, self.bytes[start .. self.index - 3]);
205215
},
206216
else => {
207217
return errorIllegalChar(.continuation_eol, self.index, char);
@@ -251,15 +261,15 @@ pub fn next(self: *Tokenizer) ?Token {
251261
},
252262
.prereq => {
253263
self.state = .lhs;
254-
return Token{ .prereq = self.bytes[start..] };
264+
return finishPrereq(must_resolve, self.bytes[start..]);
255265
},
256266
.prereq_continuation => {
257267
self.state = .lhs;
258-
return Token{ .prereq = self.bytes[start .. self.index - 1] };
268+
return finishPrereq(must_resolve, self.bytes[start .. self.index - 1]);
259269
},
260270
.prereq_continuation_linefeed => {
261271
self.state = .lhs;
262-
return Token{ .prereq = self.bytes[start .. self.index - 2] };
272+
return finishPrereq(must_resolve, self.bytes[start .. self.index - 2]);
263273
},
264274
}
265275
}
@@ -278,6 +288,10 @@ fn finishTarget(must_resolve: bool, bytes: []const u8) Token {
278288
return if (must_resolve) .{ .target_must_resolve = bytes } else .{ .target = bytes };
279289
}
280290

291+
fn finishPrereq(must_resolve: bool, bytes: []const u8) Token {
292+
return if (must_resolve) .{ .prereq_must_resolve = bytes } else .{ .prereq = bytes };
293+
}
294+
281295
const State = enum {
282296
lhs,
283297
target,
@@ -298,6 +312,7 @@ pub const Token = union(enum) {
298312
target: []const u8,
299313
target_must_resolve: []const u8,
300314
prereq: []const u8,
315+
prereq_must_resolve: []const u8,
301316

302317
incomplete_quoted_prerequisite: IndexAndBytes,
303318
incomplete_target: IndexAndBytes,
@@ -318,48 +333,76 @@ pub const Token = union(enum) {
318333
bytes: []const u8,
319334
};
320335

321-
/// Resolve escapes in target. Only valid with .target_must_resolve.
336+
/// Resolve escapes in target or prereq. Only valid with .target_must_resolve or .prereq_must_resolve.
322337
pub fn resolve(self: Token, writer: anytype) @TypeOf(writer).Error!void {
323-
const bytes = self.target_must_resolve; // resolve called on incorrect token
324-
325-
var state: enum { start, escape, dollar } = .start;
326-
for (bytes) |c| {
327-
switch (state) {
328-
.start => {
329-
switch (c) {
330-
'\\' => state = .escape,
331-
'$' => state = .dollar,
332-
else => try writer.writeByte(c),
333-
}
334-
},
335-
.escape => {
336-
switch (c) {
337-
' ', '#', '\\' => {},
338-
'$' => {
339-
try writer.writeByte('\\');
340-
state = .dollar;
341-
continue;
338+
switch (self) {
339+
.target_must_resolve => |bytes| {
340+
var state: enum { start, escape, dollar } = .start;
341+
for (bytes) |c| {
342+
switch (state) {
343+
.start => {
344+
switch (c) {
345+
'\\' => state = .escape,
346+
'$' => state = .dollar,
347+
else => try writer.writeByte(c),
348+
}
349+
},
350+
.escape => {
351+
switch (c) {
352+
' ', '#', '\\' => {},
353+
'$' => {
354+
try writer.writeByte('\\');
355+
state = .dollar;
356+
continue;
357+
},
358+
else => try writer.writeByte('\\'),
359+
}
360+
try writer.writeByte(c);
361+
state = .start;
362+
},
363+
.dollar => {
364+
try writer.writeByte('$');
365+
switch (c) {
366+
'$' => {},
367+
else => try writer.writeByte(c),
368+
}
369+
state = .start;
342370
},
343-
else => try writer.writeByte('\\'),
344371
}
345-
try writer.writeByte(c);
346-
state = .start;
347-
},
348-
.dollar => {
349-
try writer.writeByte('$');
350-
switch (c) {
351-
'$' => {},
352-
else => try writer.writeByte(c),
372+
}
373+
},
374+
.prereq_must_resolve => |bytes| {
375+
var state: enum { start, escape } = .start;
376+
for (bytes) |c| {
377+
switch (state) {
378+
.start => {
379+
switch (c) {
380+
'\\' => state = .escape,
381+
else => try writer.writeByte(c),
382+
}
383+
},
384+
.escape => {
385+
switch (c) {
386+
' ' => {},
387+
'\\' => {
388+
try writer.writeByte(c);
389+
continue;
390+
},
391+
else => try writer.writeByte('\\'),
392+
}
393+
try writer.writeByte(c);
394+
state = .start;
395+
},
353396
}
354-
state = .start;
355-
},
356-
}
397+
}
398+
},
399+
else => unreachable,
357400
}
358401
}
359402

360403
pub fn printError(self: Token, writer: anytype) @TypeOf(writer).Error!void {
361404
switch (self) {
362-
.target, .target_must_resolve, .prereq => unreachable, // not an error
405+
.target, .target_must_resolve, .prereq, .prereq_must_resolve => unreachable, // not an error
363406
.incomplete_quoted_prerequisite,
364407
.incomplete_target,
365408
=> |index_and_bytes| {
@@ -387,7 +430,7 @@ pub const Token = union(enum) {
387430

388431
fn errStr(self: Token) []const u8 {
389432
return switch (self) {
390-
.target, .target_must_resolve, .prereq => unreachable, // not an error
433+
.target, .target_must_resolve, .prereq, .prereq_must_resolve => unreachable, // not an error
391434
.incomplete_quoted_prerequisite => "incomplete quoted prerequisite",
392435
.incomplete_target => "incomplete target",
393436
.invalid_target => "invalid target",
@@ -538,6 +581,15 @@ test "prereq continuation" {
538581
, expect);
539582
}
540583

584+
test "prereq continuation (CRLF)" {
585+
const expect =
586+
\\target = {foo.o}
587+
\\prereq = {foo.h}
588+
\\prereq = {bar.h}
589+
;
590+
try depTokenizer("foo.o: foo.h\\\r\nbar.h", expect);
591+
}
592+
541593
test "multiple prereqs" {
542594
const expect =
543595
\\target = {foo.o}
@@ -728,6 +780,32 @@ test "windows funky targets" {
728780
);
729781
}
730782

783+
test "windows funky prereqs" {
784+
// Note we don't support unquoted escaped spaces at the very beginning of a relative path
785+
// e.g. `\ SpaceAtTheBeginning.c`
786+
// This typically wouldn't be seen in the wild, since depfiles usually use absolute paths
787+
// and supporting it would degrade error messages for cases where it was meant to be a
788+
// continuation, but the line ending is missing.
789+
try depTokenizer(
790+
\\cimport.o: \
791+
\\ trailingbackslash\\
792+
\\ C:\Users\John\ Smith\AppData\Local\zig\p\1220d14057af1a9d6dde4643293527bd5ee5099517d655251a066666a4320737ea7c\cimport.c \
793+
\\ somedir\\ a.c\
794+
\\ somedir/\ a.c\
795+
\\ somedir\\ \ \ b.c\
796+
\\ somedir\\ \\ \c.c\
797+
\\
798+
,
799+
\\target = {cimport.o}
800+
\\prereq = {trailingbackslash\}
801+
\\prereq = {C:\Users\John Smith\AppData\Local\zig\p\1220d14057af1a9d6dde4643293527bd5ee5099517d655251a066666a4320737ea7c\cimport.c}
802+
\\prereq = {somedir\ a.c}
803+
\\prereq = {somedir/ a.c}
804+
\\prereq = {somedir\ b.c}
805+
\\prereq = {somedir\ \ \c.c}
806+
);
807+
}
808+
731809
test "windows drive and forward slashes" {
732810
try depTokenizer(
733811
\\C:/msys64/what/zig-cache\tmp\48ac4d78dd531abd-cxa_thread_atexit.obj: \
@@ -915,6 +993,15 @@ fn depTokenizer(input: []const u8, expect: []const u8) !void {
915993
resolve_buf.items.len = 0;
916994
try buffer.appendSlice("}");
917995
},
996+
.prereq_must_resolve => {
997+
try buffer.appendSlice("prereq = {");
998+
try token.resolve(resolve_buf.writer());
999+
for (resolve_buf.items) |b| {
1000+
try buffer.append(printable_char_tab[b]);
1001+
}
1002+
resolve_buf.items.len = 0;
1003+
try buffer.appendSlice("}");
1004+
},
9181005
else => {
9191006
try buffer.appendSlice("ERROR: ");
9201007
try token.printError(buffer.writer());

0 commit comments

Comments
 (0)