Skip to content

Commit a028026

Browse files
Peter MarreckPeter Marreck
authored andcommitted
fix(embedding): send Connection: close to avoid stale keep-alive
Root cause of the WriteFailed-at-batch-32 issue: std.http.Client pools keep-alive sockets. When the embedding server closes an idle socket (uvicorn does this after a short window), the next POST lands on a half-closed socket and fails mid-sendBodyComplete with error.WriteFailed. Retry didn't help because all retries reused the same pooled socket and got the same failure. Live reproduction on validate_gui confirmed the exact same 186-byte payload failed 3x in a row via codescan, but succeeded via curl with a fresh connection. Sending Connection: close forces std.http to open a fresh TCP connection for each embed request. Extra per-request cost is a one-time TCP handshake (~ms on localhost), negligible for indexing. Verified end-to-end: validate_gui indexes 83/83 files with zero errors post-fix (previously stalled at 32/103).
1 parent e0e3b9d commit a028026

1 file changed

Lines changed: 23 additions & 5 deletions

File tree

src/embedding_http.zig

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,18 @@ pub fn embed(
3737
const body = try buildEmbedRequest(allocator, model, inputs, keep_alive, dialect);
3838
defer allocator.free(body);
3939

40-
var header_buf: [4]std.http.Header = undefined;
41-
var header_count: usize = 2;
40+
var header_buf: [5]std.http.Header = undefined;
41+
var header_count: usize = 3;
4242
header_buf[0] = .{ .name = "Content-Type", .value = "application/json" };
4343
header_buf[1] = .{ .name = "Accept", .value = "application/json" };
44+
// std.http.Client reuses keep-alive sockets; servers may close them after
45+
// short idle windows, which surfaces as WriteFailed mid-batch on re-use.
46+
// Force a fresh TCP connection per embed request.
47+
header_buf[2] = .{ .name = "Connection", .value = "close" };
4448
if (dialect == .openai) {
4549
if (auth_header) |key| {
46-
header_buf[2] = .{ .name = "Authorization", .value = key };
47-
header_count = 3;
50+
header_buf[3] = .{ .name = "Authorization", .value = key };
51+
header_count = 4;
4852
}
4953
}
5054

@@ -342,12 +346,14 @@ pub const MockTransportCtx = struct { tags_body: []const u8,
342346
embed_should_fail: bool = false,
343347
status_override: ?u16 = null,
344348
auth_header_sent: bool = false,
349+
connection_close_sent: bool = false,
345350

346351
pub fn send(ctx_ptr: *anyopaque, allocator: std.mem.Allocator, req: HttpRequest) !HttpResponse { const self: *MockTransportCtx = @ptrCast(@alignCast(ctx_ptr));
347352
for (req.headers) |h| {
348353
if (std.mem.eql(u8, h.name, "Authorization")) {
349354
self.auth_header_sent = true;
350-
break;
355+
} else if (std.ascii.eqlIgnoreCase(h.name, "Connection") and std.ascii.eqlIgnoreCase(h.value, "close")) {
356+
self.connection_close_sent = true;
351357
}
352358
}
353359
if (std.mem.endsWith(u8, req.url, "/api/tags")) {
@@ -514,6 +520,18 @@ test "embed with ollama dialect does not send auth header" {
514520
try std.testing.expect(!mock.auth_header_sent);
515521
}
516522

523+
test "embed sends Connection: close to force fresh TCP connections" {
524+
const allocator = std.testing.allocator;
525+
var mock = MockTransportCtx{
526+
.tags_body = "",
527+
.ps_body = "",
528+
};
529+
const inputs = [_][]const u8{"hello"};
530+
const embeddings = try embed(allocator, mock.transport(), "http://localhost:11434", "bge-large", &inputs, null, .ollama, null);
531+
defer freeEmbeddings(allocator, embeddings);
532+
try std.testing.expect(mock.connection_close_sent);
533+
}
534+
517535
test "ensureModelAvailable reports missing model" {
518536
const allocator = std.testing.allocator;
519537
try skipIfNoOllama(allocator);

0 commit comments

Comments
 (0)