diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6e3a594..97ab852 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,3 +34,49 @@ jobs: - name: Run tests run: zig build test -Doptimize=ReleaseSafe + + startup-smoke: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install Zig 0.15.0 + uses: mlugg/setup-zig@v2 + with: + version: 0.15.0 + + - name: Install Chrome + uses: browser-actions/setup-chrome@v2 + with: + install-dependencies: true + + - name: Build + run: zig build -Doptimize=ReleaseSafe + + - name: Startup smoke test + run: | + set -euo pipefail + ./zig-out/bin/kuri > kuri.log 2>&1 & + kuri_pid=$! + cleanup() { + kill "${kuri_pid}" >/dev/null 2>&1 || true + wait "${kuri_pid}" >/dev/null 2>&1 || true + cat kuri.log + } + trap cleanup EXIT + + for _ in $(seq 1 30); do + if curl -sf http://127.0.0.1:8080/health > health.json; then + break + fi + sleep 1 + done + + curl -sf http://127.0.0.1:8080/health | tee health.json + curl -sf http://127.0.0.1:8080/tabs | tee tabs.json + + grep '"ok":true' health.json + grep '"tabs":1' health.json + grep '"id":"' tabs.json diff --git a/readme.md b/readme.md index c775023..e53db58 100644 --- a/readme.md +++ b/readme.md @@ -149,6 +149,27 @@ zig build test # run 230+ tests ./zig-out/bin/kuri-browse https://example.com ``` +### First run, shortest path + +```bash +# start the server; if CDP_URL is unset, kuri launches managed Chrome for you +./zig-out/bin/kuri + +# discover tabs from that managed browser +curl -s http://127.0.0.1:8080/discover + +# inspect the discovered tab list +curl -s http://127.0.0.1:8080/tabs +``` + +If you already have Chrome running with remote debugging, set `CDP_URL` to either the WebSocket or HTTP endpoint: + +```bash +CDP_URL=ws://127.0.0.1:9222/devtools/browser/... ./zig-out/bin/kuri +# or +CDP_URL=http://127.0.0.1:9222 ./zig-out/bin/kuri +``` + ### Browse vercel.com in 4 commands ```bash @@ -556,7 +577,7 @@ kuri/ |---------|---------|-------------| | `HOST` | `127.0.0.1` | Server bind address | | `PORT` | `8080` | Server port | -| `CDP_URL` | *(none)* | Connect to existing Chrome (`ws://127.0.0.1:9222`) | +| `CDP_URL` | *(none)* | Connect to existing Chrome (`ws://...` or `http://127.0.0.1:9222`) | | `KURI_SECRET` | *(none)* | Auth secret for API requests | | `STATE_DIR` | `.kuri` | Session state directory | | `REQUEST_TIMEOUT_MS` | `30000` | HTTP request timeout | diff --git a/src/bridge/bridge.zig b/src/bridge/bridge.zig index 63afa3c..18fa417 100644 --- a/src/bridge/bridge.zig +++ b/src/bridge/bridge.zig @@ -23,7 +23,17 @@ pub const RefCache = struct { }; } + pub fn clear(self: *RefCache) void { + var it = self.refs.keyIterator(); + while (it.next()) |key| { + self.refs.allocator.free(key.*); + } + self.refs.clearRetainingCapacity(); + self.node_count = 0; + } + pub fn deinit(self: *RefCache) void { + self.clear(); self.refs.deinit(); } }; @@ -64,6 +74,11 @@ pub const Bridge = struct { } self.cdp_clients.deinit(); + var prev_it = self.prev_snapshots.iterator(); + while (prev_it.next()) |entry| { + self.allocator.free(entry.key_ptr.*); + freeSnapshot(self.allocator, entry.value_ptr.*); + } self.prev_snapshots.deinit(); var snap_it = self.snapshots.valueIterator(); @@ -134,7 +149,10 @@ pub const Bridge = struct { const tab = self.tabs.get(tab_id) orelse { if (self.snapshots.getPtr(tab_id)) |cache| cache.deinit(); _ = self.snapshots.remove(tab_id); - _ = self.prev_snapshots.remove(tab_id); + if (self.prev_snapshots.fetchRemove(tab_id)) |kv| { + self.allocator.free(kv.key); + freeSnapshot(self.allocator, kv.value); + } if (self.cdp_clients.fetchRemove(tab_id)) |kv| { kv.value.deinit(); self.allocator.destroy(kv.value); @@ -155,7 +173,10 @@ pub const Bridge = struct { if (self.snapshots.getPtr(tab_id)) |cache| cache.deinit(); _ = self.snapshots.remove(tab_id); - _ = self.prev_snapshots.remove(tab_id); + if (self.prev_snapshots.fetchRemove(tab_id)) |kv| { + self.allocator.free(kv.key); + freeSnapshot(self.allocator, kv.value); + } if (self.cdp_clients.fetchRemove(tab_id)) |kv| { kv.value.deinit(); self.allocator.destroy(kv.value); @@ -255,7 +276,7 @@ pub const Bridge = struct { const colon = std.mem.indexOfScalarPos(u8, json, field_pos + field.len, ':') orelse return null; var i = colon + 1; while (i < json.len and (json[i] == ' ' or json[i] == '"')) : (i += 1) {} - if (i == 0) return null; + if (i >= json.len) return null; const val_start = i; const val_end = std.mem.indexOfScalarPos(u8, json, val_start, '"') orelse return null; return json[val_start..val_end]; @@ -279,8 +300,47 @@ pub const Bridge = struct { }; return rec; } + + pub fn cloneSnapshot(self: *Bridge, snapshot: []const A11yNode) ![]A11yNode { + const copy = try self.allocator.alloc(A11yNode, snapshot.len); + errdefer self.allocator.free(copy); + + var initialized: usize = 0; + errdefer { + for (copy[0..initialized]) |node| { + self.allocator.free(node.ref); + self.allocator.free(node.role); + self.allocator.free(node.name); + self.allocator.free(node.value); + } + } + + for (snapshot, 0..) |node, i| { + copy[i] = .{ + .ref = try self.allocator.dupe(u8, node.ref), + .role = try self.allocator.dupe(u8, node.role), + .name = try self.allocator.dupe(u8, node.name), + .value = try self.allocator.dupe(u8, node.value), + .backend_node_id = node.backend_node_id, + .depth = node.depth, + }; + initialized += 1; + } + + return copy; + } }; +fn freeSnapshot(allocator: std.mem.Allocator, snapshot: []const A11yNode) void { + for (snapshot) |node| { + allocator.free(node.ref); + allocator.free(node.role); + allocator.free(node.name); + allocator.free(node.value); + } + allocator.free(snapshot); +} + test "bridge init/deinit" { var bridge = Bridge.init(std.testing.allocator); defer bridge.deinit(); diff --git a/src/bridge/config.zig b/src/bridge/config.zig index 93c956f..23b1ab0 100644 --- a/src/bridge/config.zig +++ b/src/bridge/config.zig @@ -18,16 +18,23 @@ pub fn load() Config { .host = std.posix.getenv("HOST") orelse "127.0.0.1", .port = parsePort() orelse 8080, .cdp_url = std.posix.getenv("CDP_URL"), - .auth_secret = std.posix.getenv("BROWDIE_SECRET"), - .state_dir = std.posix.getenv("STATE_DIR") orelse ".browdie", + .auth_secret = getenvAny(&.{ "KURI_SECRET", "BROWDIE_SECRET" }), + .state_dir = getenvAny(&.{ "STATE_DIR" }) orelse ".kuri", .stale_tab_interval_s = parseU32("STALE_TAB_INTERVAL_S") orelse 30, .request_timeout_ms = parseU32("REQUEST_TIMEOUT_MS") orelse 30_000, .navigate_timeout_ms = parseU32("NAVIGATE_TIMEOUT_MS") orelse 30_000, - .extensions = std.posix.getenv("BROWDIE_EXTENSIONS"), + .extensions = getenvAny(&.{ "KURI_EXTENSIONS", "BROWDIE_EXTENSIONS" }), .headless = parseBool("HEADLESS") orelse true, }; } +fn getenvAny(names: []const []const u8) ?[]const u8 { + for (names) |name| { + if (std.posix.getenv(name)) |value| return value; + } + return null; +} + fn parsePort() ?u16 { const val = std.posix.getenv("PORT") orelse return null; return std.fmt.parseInt(u16, val, 10) catch null; diff --git a/src/browse_main.zig b/src/browse_main.zig index 691b0ea..a7d9fae 100644 --- a/src/browse_main.zig +++ b/src/browse_main.zig @@ -1,5 +1,6 @@ const std = @import("std"); const markdown = @import("crawler/markdown.zig"); +const validator = @import("crawler/validator.zig"); const version = "0.1.0"; const user_agent = "kuri-browse/" ++ version; @@ -83,6 +84,16 @@ const Browser = struct { fn navigate(self: *Browser, url: []const u8) !void { const resolved = self.resolveUrl(url); + // SSRF validation — block private IPs, metadata endpoints, non-HTTP schemes + validator.validateUrl(resolved) catch |err| { + if (self.color) { + std.debug.print("\x1b[31m✗\x1b[0m blocked: {s} ({s})\n", .{ resolved, @errorName(err) }); + } else { + std.debug.print("error: blocked URL: {s} ({s})\n", .{ resolved, @errorName(err) }); + } + return error.FetchFailed; + }; + if (self.color) { std.debug.print("\x1b[2m→\x1b[0m loading \x1b[4m{s}\x1b[0m\n", .{resolved}); } else { diff --git a/src/cdp/client.zig b/src/cdp/client.zig index ae8964d..e8ee4c7 100644 --- a/src/cdp/client.zig +++ b/src/cdp/client.zig @@ -30,11 +30,11 @@ pub const EventBuffer = struct { } } - /// Check if any buffered event matches a method name. + /// Check if any buffered event matches a CDP method name exactly. pub fn hasEvent(self: *EventBuffer, method: []const u8) bool { for (self.items[0..self.len]) |item| { if (item) |ev| { - if (std.mem.indexOf(u8, ev, method) != null) return true; + if (eventMatchesMethod(ev, method)) return true; } } return false; @@ -105,8 +105,8 @@ pub const CdpClient = struct { var ws = &(self.ws orelse return error.ConnectionRefused); - const msg = try self.buildMessage(allocator, method, params_json); - const sent_id = self.next_id.load(.monotonic) - 1; // ID we just used + const sent_id = self.nextId(); + const msg = try self.buildMessageWithId(allocator, sent_id, method, params_json); defer allocator.free(msg); ws.sendText(msg) catch return error.ConnectionRefused; @@ -149,9 +149,8 @@ pub const CdpClient = struct { return parsed_id == expected_id; } - /// Build a JSON-RPC message for a CDP command. - pub fn buildMessage(self: *CdpClient, allocator: std.mem.Allocator, method: []const u8, params_json: ?[]const u8) ![]const u8 { - const id = self.nextId(); + /// Build a JSON-RPC message for a CDP command with an explicit ID. + pub fn buildMessageWithId(_: *CdpClient, allocator: std.mem.Allocator, id: u32, method: []const u8, params_json: ?[]const u8) ![]const u8 { if (params_json) |p| { return std.fmt.allocPrint(allocator, "{{\"id\":{d},\"method\":\"{s}\",\"params\":{s}}}", .{ id, method, p }); } else { @@ -159,6 +158,11 @@ pub const CdpClient = struct { } } + /// Build a JSON-RPC message for a CDP command (auto-assigns next ID). + pub fn buildMessage(self: *CdpClient, allocator: std.mem.Allocator, method: []const u8, params_json: ?[]const u8) ![]const u8 { + return self.buildMessageWithId(allocator, self.nextId(), method, params_json); + } + pub fn disconnect(self: *CdpClient) void { if (self.ws) |*ws| { ws.close(); @@ -177,7 +181,7 @@ pub const CdpClient = struct { var attempts: u32 = 0; while (attempts < max_attempts) : (attempts += 1) { const response = ws.receiveMessageAlloc(allocator, 2 * 1024 * 1024) catch return false; - if (std.mem.indexOf(u8, response, method) != null) { + if (eventMatchesMethod(response, method)) { allocator.free(response); return true; } @@ -192,6 +196,14 @@ pub const CdpClient = struct { } }; +fn eventMatchesMethod(event_json: []const u8, method: []const u8) bool { + var match_buf: [256]u8 = undefined; + const match_pattern = std.fmt.bufPrint(&match_buf, "\"method\":\"{s}\"", .{method}) catch { + return std.mem.indexOf(u8, event_json, method) != null; + }; + return std.mem.indexOf(u8, event_json, match_pattern) != null; +} + test "CdpClient message building" { var client = CdpClient.init(std.testing.allocator, "ws://localhost:9222"); defer client.deinit(); diff --git a/src/cdp/har.zig b/src/cdp/har.zig index fc42abe..d57b15f 100644 --- a/src/cdp/har.zig +++ b/src/cdp/har.zig @@ -56,6 +56,14 @@ pub const HarRecorder = struct { } self.entries.clearRetainingCapacity(); + var pending_it = self.pending_requests.iterator(); + while (pending_it.next()) |kv| { + self.allocator.free(kv.key_ptr.*); + self.allocator.free(kv.value_ptr.url); + self.allocator.free(kv.value_ptr.method); + } + self.pending_requests.clearRetainingCapacity(); + // Enable Network domain via CDP _ = client.send(self.allocator, "Network.enable", null) catch |err| { std.log.warn("HAR: Network.enable failed: {s}", .{@errorName(err)}); @@ -78,12 +86,20 @@ pub const HarRecorder = struct { /// Add a manually observed request/response to the HAR log. pub fn addEntry(self: *HarRecorder, entry: HarEntry) !void { + const owned_url = try self.allocator.dupe(u8, entry.url); + errdefer self.allocator.free(owned_url); + const owned_method = try self.allocator.dupe(u8, entry.method); + errdefer self.allocator.free(owned_method); + const owned_status_text = try self.allocator.dupe(u8, entry.status_text); + errdefer self.allocator.free(owned_status_text); + const owned_mime_type = try self.allocator.dupe(u8, entry.mime_type); + errdefer self.allocator.free(owned_mime_type); const owned = HarEntry{ - .url = try self.allocator.dupe(u8, entry.url), - .method = try self.allocator.dupe(u8, entry.method), + .url = owned_url, + .method = owned_method, .status = entry.status, - .status_text = try self.allocator.dupe(u8, entry.status_text), - .mime_type = try self.allocator.dupe(u8, entry.mime_type), + .status_text = owned_status_text, + .mime_type = owned_mime_type, .timestamp = entry.timestamp, .duration_ms = entry.duration_ms, .request_size = entry.request_size, @@ -175,7 +191,13 @@ pub const HarRecorder = struct { }; } else if (std.mem.indexOf(u8, event_json, "\"Network.responseReceived\"") != null) { const request_id = extractField(event_json, "requestId") orelse return; - const pending = self.pending_requests.get(request_id) orelse return; + const pending_kv = self.pending_requests.fetchRemove(request_id) orelse return; + const pending = pending_kv.value; + defer { + self.allocator.free(pending_kv.key); + self.allocator.free(pending.url); + self.allocator.free(pending.method); + } // Extract status and mimeType from the response object const status_str = extractField(event_json, "status"); @@ -327,6 +349,33 @@ test "HarRecorder handleCdpEvent processes request and response" { // Send responseReceived for the same requestId rec.handleCdpEvent("{\"method\":\"Network.responseReceived\",\"params\":{\"requestId\":\"req1\",\"response\":{\"status\":200}}}"); try std.testing.expectEqual(@as(usize, 1), rec.entryCount()); + try std.testing.expectEqual(@as(usize, 0), rec.pending_requests.count()); +} + +test "HarRecorder start clears stale pending requests before enabling network" { + var rec = HarRecorder.init(std.testing.allocator); + defer rec.deinit(); + + const owned_id = try std.testing.allocator.dupe(u8, "stale"); + const owned_url = try std.testing.allocator.dupe(u8, "https://example.com/stale"); + const owned_method = try std.testing.allocator.dupe(u8, "GET"); + try rec.pending_requests.put(owned_id, .{ + .url = owned_url, + .method = owned_method, + .timestamp = 123, + }); + + rec.recording = true; + rec.handleCdpEvent("{\"method\":\"Network.requestWillBeSent\",\"params\":{\"requestId\":\"req1\",\"url\":\"https://example.com/page\",\"method\":\"GET\"}}"); + try std.testing.expectEqual(@as(usize, 2), rec.pending_requests.count()); + + var client = CdpClient.init(std.testing.allocator, "ws://127.0.0.1:1/devtools/browser/test"); + defer client.deinit(); + try std.testing.expectError(error.ConnectionRefused, rec.start(&client)); + + try std.testing.expect(rec.isRecording()); + try std.testing.expectEqual(@as(usize, 0), rec.entryCount()); + try std.testing.expectEqual(@as(usize, 0), rec.pending_requests.count()); } test "HarRecorder extractField helper" { diff --git a/src/cdp/websocket.zig b/src/cdp/websocket.zig index ffe9001..f944aa5 100644 --- a/src/cdp/websocket.zig +++ b/src/cdp/websocket.zig @@ -36,7 +36,9 @@ pub const WebSocketClient = struct { // Set read timeout so we don't block forever const timeout = std.posix.timeval{ .sec = 10, .usec = 0 }; - std.posix.setsockopt(stream.handle, std.posix.SOL.SOCKET, std.posix.SO.RCVTIMEO, std.mem.asBytes(&timeout)) catch {}; + std.posix.setsockopt(stream.handle, std.posix.SOL.SOCKET, std.posix.SO.RCVTIMEO, std.mem.asBytes(&timeout)) catch |err| { + std.log.warn("websocket: failed to set read timeout: {s}", .{@errorName(err)}); + }; var ws = WebSocketClient{ .allocator = allocator, @@ -136,7 +138,7 @@ pub const WebSocketClient = struct { if (n == 0) return Error.HandshakeFailed; const response = self.read_buf[0..n]; - if (std.mem.indexOf(u8, response, "101") == null) { + if (!std.mem.startsWith(u8, response, "HTTP/1.1 101")) { return Error.HandshakeFailed; } } @@ -298,6 +300,7 @@ pub const WebSocketClient = struct { } if (payload_len > max_size) return Error.MessageTooLarge; + if (payload_len > std.math.maxInt(usize)) return Error.MessageTooLarge; const len: usize = @intCast(payload_len); var mask_key: [4]u8 = undefined; diff --git a/src/chrome/launcher.zig b/src/chrome/launcher.zig index b6d5ca8..45fd26f 100644 --- a/src/chrome/launcher.zig +++ b/src/chrome/launcher.zig @@ -25,6 +25,17 @@ pub const Launcher = struct { ws_url: ?[]const u8, }; + pub const StartResult = struct { + cdp_port: u16, + cdp_url: []const u8, + }; + + const ParsedUrl = struct { + host: []const u8, + port: u16, + path: []const u8, + }; + const default_cdp_port: u16 = 9222; const max_restarts: u8 = 3; const health_timeout_ms: u32 = 2_000; @@ -35,6 +46,7 @@ pub const Launcher = struct { "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", }, else => &[_][]const u8{ + "chrome", "google-chrome", "chromium-browser", "chromium", @@ -58,23 +70,26 @@ pub const Launcher = struct { } /// Start Chrome or connect to an existing instance. - /// Returns the CDP port to connect to. - pub fn start(self: *Launcher, cfg: config.Config) !u16 { + /// Returns the resolved websocket CDP endpoint and port. + pub fn start(self: *Launcher, cfg: config.Config) !StartResult { switch (self.mode) { .external => { - // Validate the external Chrome is reachable - const status = self.healthCheck(); - if (!status.alive) { - std.log.warn("external Chrome at port {d} is not reachable", .{self.cdp_port}); - } - return self.cdp_port; + const raw_url = cfg.cdp_url orelse return error.MissingCdpUrl; + try self.resolveExternal(raw_url); + return .{ + .cdp_port = self.cdp_port, + .cdp_url = self.cdpUrl() orelse return error.MissingDebuggerUrl, + }; }, .managed => { - _ = cfg; // Find a free CDP port self.cdp_port = try findFreePort(default_cdp_port); try self.launchChrome(); - return self.cdp_port; + try self.waitForDebuggerUrl(); + return .{ + .cdp_port = self.cdp_port, + .cdp_url = self.cdpUrl() orelse return error.MissingDebuggerUrl, + }; }, } } @@ -147,6 +162,11 @@ pub const Launcher = struct { return httpProbeJsonVersion(self.cdp_port); } + pub fn cdpUrl(self: *const Launcher) ?[]const u8 { + if (self.ws_url_len == 0) return null; + return self.ws_url_buf[0..self.ws_url_len]; + } + /// Supervise Chrome — call periodically. Restarts on crash. pub fn supervise(self: *Launcher) !void { if (self.mode == .external) return; @@ -196,6 +216,53 @@ pub const Launcher = struct { } return null; } + + fn resolveExternal(self: *Launcher, raw_url: []const u8) !void { + if (std.mem.startsWith(u8, raw_url, "ws://") or std.mem.startsWith(u8, raw_url, "wss://")) { + const parsed = parseSocketUrl(raw_url) orelse return error.InvalidCdpUrl; + self.cdp_port = parsed.port; + try self.storeWsUrl(raw_url); + + const status = httpProbe(raw_url, parsed.host, parsed.port, "/json/version"); + if (!status.alive) return error.ConnectionRefused; + return; + } + + if (std.mem.startsWith(u8, raw_url, "http://")) { + const parsed = parseHttpUrl(raw_url) orelse return error.InvalidCdpUrl; + self.cdp_port = parsed.port; + const status = httpProbe(raw_url, parsed.host, parsed.port, parsed.path); + if (!status.alive) return error.ConnectionRefused; + const ws_url = status.ws_url orelse return error.MissingDebuggerUrl; + try self.storeWsUrl(ws_url); + return; + } + + if (std.mem.startsWith(u8, raw_url, "https://")) { + return error.UnsupportedCdpScheme; + } + + return error.InvalidCdpUrl; + } + + fn waitForDebuggerUrl(self: *Launcher) !void { + var attempts: u8 = 0; + while (attempts < 20) : (attempts += 1) { + const status = self.healthCheck(); + if (status.alive and status.ws_url != null) { + try self.storeWsUrl(status.ws_url.?); + return; + } + std.Thread.sleep(250 * std.time.ns_per_ms); + } + return error.ConnectionRefused; + } + + fn storeWsUrl(self: *Launcher, ws_url: []const u8) !void { + if (ws_url.len > self.ws_url_buf.len) return error.NameTooLong; + @memcpy(self.ws_url_buf[0..ws_url.len], ws_url); + self.ws_url_len = ws_url.len; + } }; // ── Extension utilities ───────────────────────────────────────────────── @@ -265,13 +332,19 @@ pub fn isPortInUse(port: u16) bool { /// Probe Chrome's /json/version endpoint via raw TCP HTTP GET. /// Returns alive status and optional webSocketDebuggerUrl. fn httpProbeJsonVersion(port: u16) Launcher.ChromeStatus { - const addr = std.net.Address.initIp4(.{ 127, 0, 0, 1 }, port); - var stream = std.net.tcpConnectToAddress(addr) catch + return httpProbe("/json/version", "127.0.0.1", port, "/json/version"); +} + +fn httpProbe(raw_url: []const u8, host: []const u8, port: u16, path: []const u8) Launcher.ChromeStatus { + const connect_host = normalizeHost(host); + var stream = std.net.tcpConnectToHost(std.heap.page_allocator, connect_host, port) catch return .{ .alive = false, .ws_url = null }; defer stream.close(); - const request = "GET /json/version HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n\r\n"; + var req_buf: [512]u8 = undefined; + const request = std.fmt.bufPrint(&req_buf, "GET {s} HTTP/1.1\r\nHost: {s}:{d}\r\nConnection: close\r\n\r\n", .{ path, host, port }) catch + return .{ .alive = false, .ws_url = null }; _ = stream.write(request) catch return .{ .alive = false, .ws_url = null }; @@ -292,9 +365,53 @@ fn httpProbeJsonVersion(port: u16) Launcher.ChromeStatus { // Try to extract webSocketDebuggerUrl const ws_url = extractWsUrl(body); + _ = raw_url; return .{ .alive = true, .ws_url = ws_url }; } +fn normalizeHost(host: []const u8) []const u8 { + if (std.mem.eql(u8, host, "localhost")) return "127.0.0.1"; + return host; +} + +fn parseSocketUrl(url: []const u8) ?Launcher.ParsedUrl { + var remainder = url; + if (std.mem.startsWith(u8, url, "ws://")) { + remainder = url[5..]; + } else if (std.mem.startsWith(u8, url, "wss://")) { + remainder = url[6..]; + } else { + return null; + } + return parseHostPortPath(remainder, Launcher.default_cdp_port, "/"); +} + +fn parseHttpUrl(url: []const u8) ?Launcher.ParsedUrl { + if (!std.mem.startsWith(u8, url, "http://")) return null; + return parseHostPortPath(url[7..], 80, "/json/version"); +} + +fn parseHostPortPath(remainder: []const u8, default_port: u16, default_path: []const u8) ?Launcher.ParsedUrl { + const slash = std.mem.indexOfScalar(u8, remainder, '/') orelse remainder.len; + const host_port = remainder[0..slash]; + if (host_port.len == 0) return null; + + var host = host_port; + var port = default_port; + if (std.mem.indexOfScalar(u8, host_port, ':')) |colon| { + host = host_port[0..colon]; + port = std.fmt.parseInt(u16, host_port[colon + 1 ..], 10) catch return null; + } + if (host.len == 0) return null; + + const path = if (slash < remainder.len) remainder[slash..] else default_path; + return .{ + .host = normalizeHost(host), + .port = port, + .path = path, + }; +} + /// Extract the webSocketDebuggerUrl value from a JSON response body. fn extractWsUrl(body: []const u8) ?[]const u8 { const key = "\"webSocketDebuggerUrl\""; @@ -345,6 +462,20 @@ test "extractWsUrl returns null for missing key" { try std.testing.expect(extractWsUrl(body) == null); } +test "parseHttpUrl extracts host port and path" { + const parsed = parseHttpUrl("http://localhost:9333/json/version").?; + try std.testing.expectEqualStrings("127.0.0.1", parsed.host); + try std.testing.expectEqual(@as(u16, 9333), parsed.port); + try std.testing.expectEqualStrings("/json/version", parsed.path); +} + +test "parseSocketUrl extracts websocket port" { + const parsed = parseSocketUrl("ws://127.0.0.1:9444/devtools/browser/abc").?; + try std.testing.expectEqualStrings("127.0.0.1", parsed.host); + try std.testing.expectEqual(@as(u16, 9444), parsed.port); + try std.testing.expectEqualStrings("/devtools/browser/abc", parsed.path); +} + test "Launcher init managed mode" { const cfg = config.Config{ .host = "127.0.0.1", diff --git a/src/crawler/fetcher.zig b/src/crawler/fetcher.zig index 33b65ae..a9450ef 100644 --- a/src/crawler/fetcher.zig +++ b/src/crawler/fetcher.zig @@ -98,7 +98,10 @@ pub fn extractHtmlValue(json: []const u8, arena: std.mem.Allocator) ![]const u8 var i = str_start; while (i < json.len) : (i += 1) { if (json[i] == '"') break; - if (json[i] == '\\') i += 1; // skip next char (escape sequence) + if (json[i] == '\\') { + if (i + 1 >= json.len) return FetchError.FetchFailed; + i += 1; // skip next char (escape sequence) + } } if (i >= json.len) return FetchError.FetchFailed; diff --git a/src/crawler/markdown.zig b/src/crawler/markdown.zig index c2af9d6..a806026 100644 --- a/src/crawler/markdown.zig +++ b/src/crawler/markdown.zig @@ -89,6 +89,24 @@ pub fn htmlToMarkdown(html: []const u8, allocator: std.mem.Allocator) ![]const u } else if (std.mem.startsWith(u8, html[i..], " ")) { try writer.writeByte(' '); i += 6; + } else if (std.mem.startsWith(u8, html[i..], "'")) { + try writer.writeByte('\''); + i += 6; + } else if (decodeNumericEntity(html[i..])) |decoded| { + // Numeric entities: { (decimal) or { (hex) + if (decoded.codepoint < 128) { + try writer.writeByte(@intCast(decoded.codepoint)); + } else { + // Encode as UTF-8 + var utf8_buf: [4]u8 = undefined; + const utf8_len = std.unicode.utf8Encode(@intCast(decoded.codepoint), &utf8_buf) catch { + try writer.writeByte('?'); + i += decoded.len; + continue; + }; + try writer.writeAll(utf8_buf[0..utf8_len]); + } + i += decoded.len; } else { try writer.writeByte(html[i]); i += 1; @@ -103,6 +121,29 @@ pub fn htmlToMarkdown(html: []const u8, allocator: std.mem.Allocator) ![]const u return buf.toOwnedSlice(allocator); } +const NumericEntity = struct { + codepoint: u21, + len: usize, +}; + +fn decodeNumericEntity(entity: []const u8) ?NumericEntity { + if (!std.mem.startsWith(u8, entity, "&#")) return null; + + const is_hex = entity.len > 2 and (entity[2] == 'x' or entity[2] == 'X'); + const digits_start: usize = if (is_hex) 3 else 2; + const semicolon = std.mem.indexOfScalarPos(u8, entity, digits_start, ';') orelse return null; + if (semicolon == digits_start) return null; + + const digits = entity[digits_start..semicolon]; + const radix: u8 = if (is_hex) 16 else 10; + const codepoint = std.fmt.parseUnsigned(u21, digits, radix) catch return null; + + return .{ + .codepoint = codepoint, + .len = semicolon + 1, + }; +} + fn extractTagName(tag: []const u8) []const u8 { var end: usize = 0; while (end < tag.len and tag[end] != ' ' and tag[end] != '/' and tag[end] != '>') : (end += 1) {} diff --git a/src/crawler/pipeline.zig b/src/crawler/pipeline.zig index 485a860..3c10f96 100644 --- a/src/crawler/pipeline.zig +++ b/src/crawler/pipeline.zig @@ -43,10 +43,10 @@ pub const ThreadPool = struct { self.queue.deinit(self.allocator); } - pub fn submit(self: *ThreadPool, work_fn: *const fn (*anyopaque) void, context: *anyopaque) void { + pub fn submit(self: *ThreadPool, work_fn: *const fn (*anyopaque) void, context: *anyopaque) !void { self.mutex.lock(); defer self.mutex.unlock(); - self.queue.append(self.allocator, .{ .func = work_fn, .context = context }) catch return; + try self.queue.append(self.allocator, .{ .func = work_fn, .context = context }); } pub fn pendingCount(self: *ThreadPool) usize { diff --git a/src/main.zig b/src/main.zig index 3c08554..c7d2afe 100644 --- a/src/main.zig +++ b/src/main.zig @@ -10,6 +10,7 @@ pub fn main() !void { const gpa = gpa_impl.allocator(); const cfg = config.load(); + var runtime_cfg = cfg; std.log.info("kuri v0.1.0", .{}); std.log.info("listening on {s}:{d}", .{ cfg.host, cfg.port }); @@ -24,18 +25,23 @@ pub fn main() !void { std.log.info("launching managed Chrome instance", .{}); } - const cdp_port = chrome.start(cfg) catch |err| blk: { - std.log.warn("Chrome launch failed: {s}, continuing without Chrome", .{@errorName(err)}); - break :blk @as(u16, 9222); - }; - std.log.info("CDP port: {d}", .{cdp_port}); + const start_result = try chrome.start(cfg); + runtime_cfg.cdp_url = start_result.cdp_url; + std.log.info("CDP endpoint: {s}", .{start_result.cdp_url}); + std.log.info("CDP port: {d}", .{start_result.cdp_port}); // Initialize bridge (central state) var bridge = Bridge.init(gpa); defer bridge.deinit(); + // Hydrate the bridge before serving so first-run /tabs works immediately. + var startup_arena_impl = std.heap.ArenaAllocator.init(gpa); + defer startup_arena_impl.deinit(); + const startup_discovered = try server.discoverTabs(startup_arena_impl.allocator(), &bridge, runtime_cfg, start_result.cdp_port); + std.log.info("startup discovery registered {d} tabs", .{startup_discovered}); + // Start HTTP server - try server.run(gpa, &bridge, cfg); + try server.run(gpa, &bridge, runtime_cfg, start_result.cdp_port); } test { diff --git a/src/server/response.zig b/src/server/response.zig index faf1ecb..08a141f 100644 --- a/src/server/response.zig +++ b/src/server/response.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const json_util = @import("../util/json.zig"); pub fn sendJson(request: *std.http.Server.Request, body: []const u8) void { request.respond(body, .{ @@ -6,22 +7,29 @@ pub fn sendJson(request: *std.http.Server.Request, body: []const u8) void { .{ .name = "content-type", .value = "application/json" }, .{ .name = "access-control-allow-origin", .value = "*" }, }, - }) catch {}; + }) catch |err| { + std.log.err("sendJson: failed to respond: {s}", .{@errorName(err)}); + }; } pub fn sendError(request: *std.http.Server.Request, status_code: u10, message: []const u8) void { const status: std.http.Status = @enumFromInt(status_code); - var buf: [256]u8 = undefined; - const body = std.fmt.bufPrint(&buf, "{{\"error\":\"{s}\"}}", .{message}) catch message; + const escaped = json_util.jsonEscape(message, std.heap.page_allocator) catch null; + defer if (escaped) |safe| std.heap.page_allocator.free(safe); + + var body_buf: [512]u8 = undefined; + const body = if (escaped) |safe| + std.fmt.bufPrint(&body_buf, "{{\"error\":\"{s}\"}}", .{safe}) catch "{\"error\":\"Internal Server Error\"}" + else + "{\"error\":\"Internal Server Error\"}"; + request.respond(body, .{ .status = status, .extra_headers = &.{ .{ .name = "content-type", .value = "application/json" }, .{ .name = "access-control-allow-origin", .value = "*" }, }, - }) catch {}; -} - -test "response helpers compile" { - try std.testing.expect(true); + }) catch |err| { + std.log.err("sendError: failed to respond (status {d}): {s}", .{ status_code, @errorName(err) }); + }; } diff --git a/src/server/router.zig b/src/server/router.zig index 4c929bb..26d593f 100644 --- a/src/server/router.zig +++ b/src/server/router.zig @@ -12,7 +12,7 @@ const protocol = @import("../cdp/protocol.zig"); const HarRecorder = @import("../cdp/har.zig").HarRecorder; const CdpClient = @import("../cdp/client.zig").CdpClient; -pub fn run(gpa: std.mem.Allocator, bridge: *Bridge, cfg: Config) !void { +pub fn run(gpa: std.mem.Allocator, bridge: *Bridge, cfg: Config, cdp_port: u16) !void { const address = try net.Address.parseIp4(cfg.host, cfg.port); var tcp_server = try address.listen(.{ .reuse_address = true, @@ -27,7 +27,7 @@ pub fn run(gpa: std.mem.Allocator, bridge: *Bridge, cfg: Config) !void { continue; }; - const thread = std.Thread.spawn(.{}, handleConnection, .{ gpa, bridge, cfg, conn }) catch |err| { + const thread = std.Thread.spawn(.{}, handleConnection, .{ gpa, bridge, cfg, cdp_port, conn }) catch |err| { std.log.err("thread spawn error: {s}", .{@errorName(err)}); conn.stream.close(); continue; @@ -36,7 +36,7 @@ pub fn run(gpa: std.mem.Allocator, bridge: *Bridge, cfg: Config) !void { } } -fn handleConnection(gpa: std.mem.Allocator, bridge: *Bridge, cfg: Config, conn: net.Server.Connection) void { +fn handleConnection(gpa: std.mem.Allocator, bridge: *Bridge, cfg: Config, cdp_port: u16, conn: net.Server.Connection) void { defer conn.stream.close(); var arena_impl = std.heap.ArenaAllocator.init(gpa); @@ -62,7 +62,7 @@ fn handleConnection(gpa: std.mem.Allocator, bridge: *Bridge, cfg: Config, conn: return; } - route(&request, arena, bridge, cfg); + route(&request, arena, bridge, cfg, cdp_port); if (!request.head.keep_alive) return; @@ -71,7 +71,7 @@ fn handleConnection(gpa: std.mem.Allocator, bridge: *Bridge, cfg: Config, conn: } } -fn route(request: *std.http.Server.Request, arena: std.mem.Allocator, bridge: *Bridge, cfg: Config) void { +fn route(request: *std.http.Server.Request, arena: std.mem.Allocator, bridge: *Bridge, cfg: Config, cdp_port: u16) void { const path = request.head.target; const clean_path = if (std.mem.indexOfScalar(u8, path, '?')) |idx| path[0..idx] else path; @@ -80,7 +80,7 @@ fn route(request: *std.http.Server.Request, arena: std.mem.Allocator, bridge: *B } else if (std.mem.eql(u8, clean_path, "/tabs")) { handleTabs(request, arena, bridge); } else if (std.mem.eql(u8, clean_path, "/discover")) { - handleDiscover(request, arena, bridge, cfg); + handleDiscover(request, arena, bridge, cfg, cdp_port); } else if (std.mem.eql(u8, clean_path, "/navigate")) { handleNavigate(request, arena, bridge, cfg); } else if (std.mem.eql(u8, clean_path, "/snapshot")) { @@ -284,7 +284,13 @@ fn handleTabs(request: *std.http.Server.Request, arena: std.mem.Allocator, bridg writer.writeAll("[") catch return; for (tabs, 0..) |tab, i| { if (i > 0) writer.writeAll(",") catch return; - writer.print("{{\"id\":\"{s}\",\"url\":\"{s}\",\"title\":\"{s}\"}}", .{ tab.id, tab.url, tab.title }) catch return; + writer.writeAll("{") catch return; + writeJsonField(writer, arena, "id", tab.id) catch return; + writer.writeAll(",") catch return; + writeJsonField(writer, arena, "url", tab.url) catch return; + writer.writeAll(",") catch return; + writeJsonField(writer, arena, "title", tab.title) catch return; + writer.writeAll("}") catch return; } writer.writeAll("]") catch return; @@ -420,7 +426,7 @@ fn handleSnapshot(request: *std.http.Server.Request, arena: std.mem.Allocator, b }; // Clear old refs and repopulate - ref_cache.refs.clearRetainingCapacity(); + ref_cache.clear(); for (snapshot) |node| { if (node.backend_node_id) |bid| { const owned_ref = bridge.allocator.dupe(u8, node.ref) catch continue; @@ -451,9 +457,15 @@ fn sendSnapshotResponse(request: *std.http.Server.Request, arena: std.mem.Alloca writer.writeAll("[") catch return; for (snapshot, 0..) |node, i| { if (i > 0) writer.writeAll(",") catch return; - writer.print("{{\"ref\":\"{s}\",\"role\":\"{s}\",\"name\":\"{s}\"", .{ node.ref, node.role, node.name }) catch return; + writer.writeAll("{") catch return; + writeJsonField(writer, arena, "ref", node.ref) catch return; + writer.writeAll(",") catch return; + writeJsonField(writer, arena, "role", node.role) catch return; + writer.writeAll(",") catch return; + writeJsonField(writer, arena, "name", node.name) catch return; if (node.value.len > 0) { - writer.print(",\"value\":\"{s}\"", .{node.value}) catch return; + writer.writeAll(",") catch return; + writeJsonField(writer, arena, "value", node.value) catch return; } writer.writeAll("}") catch return; } @@ -698,36 +710,13 @@ fn handleBrowdie(request: *std.http.Server.Request) void { resp.sendJson(request, browdie); } -fn handleDiscover(request: *std.http.Server.Request, arena: std.mem.Allocator, bridge: *Bridge, cfg: Config) void { - const cdp_base = cfg.cdp_url orelse { - resp.sendError(request, 400, "No CDP_URL configured"); - return; - }; - - // Parse host:port from CDP URL (strip ws:// prefix and path) - const after_scheme = if (std.mem.startsWith(u8, cdp_base, "ws://")) - cdp_base[5..] - else - cdp_base; - const host_end = std.mem.indexOfScalar(u8, after_scheme, '/') orelse after_scheme.len; - const host_port = after_scheme[0..host_end]; - - var host: []const u8 = "127.0.0.1"; - var port: u16 = 9222; - if (std.mem.indexOfScalar(u8, host_port, ':')) |colon| { - host = host_port[0..colon]; - if (std.mem.eql(u8, host, "localhost")) host = "127.0.0.1"; - port = std.fmt.parseInt(u16, host_port[colon + 1 ..], 10) catch 9222; - } +pub fn discoverTabs(arena: std.mem.Allocator, bridge: *Bridge, cfg: Config, cdp_port: u16) !usize { + const cdp_addr = parseCdpAddress(cfg.cdp_url, cdp_port); + const host = cdp_addr.host; + const port = cdp_addr.port; - const address = net.Address.parseIp4(host, port) catch { - resp.sendError(request, 502, "Cannot resolve Chrome address"); - return; - }; - const stream = net.tcpConnectToAddress(address) catch { - resp.sendError(request, 502, "Cannot connect to Chrome"); - return; - }; + const address = net.Address.parseIp4(host, port) catch return error.CannotResolveChromeAddress; + const stream = net.tcpConnectToAddress(address) catch return error.CannotConnectToChrome; defer stream.close(); // Set read timeout (2 seconds) to avoid blocking forever @@ -735,14 +724,8 @@ fn handleDiscover(request: *std.http.Server.Request, arena: std.mem.Allocator, b std.posix.setsockopt(stream.handle, std.posix.SOL.SOCKET, std.posix.SO.RCVTIMEO, std.mem.asBytes(&timeout)) catch {}; // HTTP/1.1 required — Chrome ignores HTTP/1.0 - const http_req = std.fmt.allocPrint(arena, "GET /json/list HTTP/1.1\r\nHost: {s}:{d}\r\nConnection: close\r\n\r\n", .{ host, port }) catch { - resp.sendError(request, 500, "Internal Server Error"); - return; - }; - stream.writeAll(http_req) catch { - resp.sendError(request, 502, "Failed to send request to Chrome"); - return; - }; + const http_req = try std.fmt.allocPrint(arena, "GET /json/list HTTP/1.1\r\nHost: {s}:{d}\r\nConnection: close\r\n\r\n", .{ host, port }); + try stream.writeAll(http_req); // Read response with Content-Length awareness var response_buf: [65536]u8 = undefined; @@ -761,16 +744,10 @@ fn handleDiscover(request: *std.http.Server.Request, arena: std.mem.Allocator, b } } - if (total == 0) { - resp.sendError(request, 502, "Empty response from Chrome"); - return; - } + if (total == 0) return error.EmptyResponseFromChrome; const raw_response = response_buf[0..total]; - const body_start = (std.mem.indexOf(u8, raw_response, "\r\n\r\n") orelse { - resp.sendError(request, 502, "Invalid response from Chrome"); - return; - }) + 4; + const body_start = (std.mem.indexOf(u8, raw_response, "\r\n\r\n") orelse return error.InvalidChromeResponse) + 4; const body = raw_response[body_start..total]; // Parse targets and register tabs @@ -789,16 +766,15 @@ fn handleDiscover(request: *std.http.Server.Request, arena: std.mem.Allocator, b const ws_val = extractSimpleJsonString(body, id_start, "\"webSocketDebuggerUrl\"") orelse ""; if (std.mem.eql(u8, type_val, "page") and ws_val.len > 0) { - // Dupe strings into arena so they outlive the stack buffer const entry = TabEntry{ - .id = arena.dupe(u8, id_val) catch id_val, - .url = arena.dupe(u8, url_val) catch url_val, - .title = arena.dupe(u8, title_val) catch title_val, - .ws_url = arena.dupe(u8, ws_val) catch ws_val, + .id = id_val, + .url = url_val, + .title = title_val, + .ws_url = ws_val, .created_at = @intCast(std.time.timestamp()), .last_accessed = @intCast(std.time.timestamp()), }; - bridge.putTab(entry) catch {}; + try bridge.putTab(entry); registered += 1; } @@ -806,6 +782,21 @@ fn handleDiscover(request: *std.http.Server.Request, arena: std.mem.Allocator, b pos = next_id; } + return registered; +} + +fn handleDiscover(request: *std.http.Server.Request, arena: std.mem.Allocator, bridge: *Bridge, cfg: Config, cdp_port: u16) void { + const registered = discoverTabs(arena, bridge, cfg, cdp_port) catch |err| { + switch (err) { + error.CannotResolveChromeAddress => resp.sendError(request, 502, "Cannot resolve Chrome address"), + error.CannotConnectToChrome => resp.sendError(request, 502, "Cannot connect to Chrome"), + error.EmptyResponseFromChrome => resp.sendError(request, 502, "Empty response from Chrome"), + error.InvalidChromeResponse => resp.sendError(request, 502, "Invalid response from Chrome"), + else => resp.sendError(request, 500, "Internal Server Error"), + } + return; + }; + const result = std.fmt.allocPrint(arena, "{{\"discovered\":{d},\"total_tabs\":{d}}}", .{ registered, bridge.tabCount() }) catch { resp.sendError(request, 500, "Internal Server Error"); @@ -814,6 +805,16 @@ fn handleDiscover(request: *std.http.Server.Request, arena: std.mem.Allocator, b resp.sendJson(request, result); } +fn freeOwnedSnapshot(allocator: std.mem.Allocator, snapshot: []const @import("../snapshot/a11y.zig").A11yNode) void { + for (snapshot) |node| { + allocator.free(node.ref); + allocator.free(node.role); + allocator.free(node.name); + allocator.free(node.value); + } + allocator.free(snapshot); +} + fn findContentLength(headers: []const u8) ?usize { // Chrome sends "Content-Length:1773" (no space after colon) const patterns = [_][]const u8{ "Content-Length:", "Content-Length: ", "content-length:", "content-length: " }; @@ -828,6 +829,44 @@ fn findContentLength(headers: []const u8) ?usize { return null; } +const CdpAddress = struct { + host: []const u8, + port: u16, +}; + +fn parseCdpAddress(cdp_url: ?[]const u8, fallback_port: u16) CdpAddress { + const raw = cdp_url orelse return .{ .host = "127.0.0.1", .port = fallback_port }; + var remainder = raw; + var default_port = fallback_port; + + if (std.mem.startsWith(u8, raw, "ws://")) { + remainder = raw[5..]; + default_port = 80; + } else if (std.mem.startsWith(u8, raw, "wss://")) { + remainder = raw[6..]; + default_port = 443; + } else if (std.mem.startsWith(u8, raw, "http://")) { + remainder = raw[7..]; + default_port = 80; + } else if (std.mem.startsWith(u8, raw, "https://")) { + remainder = raw[8..]; + default_port = 443; + } + + const host_end = std.mem.indexOfScalar(u8, remainder, '/') orelse remainder.len; + const host_port = remainder[0..host_end]; + if (std.mem.indexOfScalar(u8, host_port, ':')) |colon| { + var host = host_port[0..colon]; + if (std.mem.eql(u8, host, "localhost")) host = "127.0.0.1"; + const port = std.fmt.parseInt(u16, host_port[colon + 1 ..], 10) catch default_port; + return .{ .host = host, .port = port }; + } + + var host = host_port; + if (std.mem.eql(u8, host, "localhost")) host = "127.0.0.1"; + return .{ .host = host, .port = default_port }; +} + fn extractSimpleJsonString(json: []const u8, start: usize, field: []const u8) ?[]const u8 { const field_pos = std.mem.indexOfPos(u8, json, start, field) orelse return null; if (field_pos - start > 1000) return null; @@ -1444,9 +1483,9 @@ fn handleDiffSnapshot(request: *std.http.Server.Request, arena: std.mem.Allocato }; // Get previous snapshot from bridge (empty if first call) - bridge.mu.lock(); + bridge.mu.lockShared(); const prev_nodes = if (bridge.prev_snapshots.get(tab_id)) |prev| prev else &[_]a11y.A11yNode{}; - bridge.mu.unlock(); + bridge.mu.unlockShared(); // Compute diff const diff_mod = @import("../snapshot/diff.zig"); @@ -1456,10 +1495,30 @@ fn handleDiffSnapshot(request: *std.http.Server.Request, arena: std.mem.Allocato }; // Store current snapshot as previous for next diff + const owned_current = bridge.cloneSnapshot(current) catch { + resp.sendError(request, 500, "Failed to persist snapshot"); + return; + }; { bridge.mu.lock(); defer bridge.mu.unlock(); - bridge.prev_snapshots.put(tab_id, current) catch {}; + + if (bridge.prev_snapshots.fetchRemove(tab_id)) |kv| { + freeOwnedSnapshot(bridge.allocator, kv.value); + bridge.allocator.free(kv.key); + } + + const owned_key = bridge.allocator.dupe(u8, tab_id) catch { + freeOwnedSnapshot(bridge.allocator, owned_current); + resp.sendError(request, 500, "Failed to persist snapshot"); + return; + }; + bridge.prev_snapshots.put(owned_key, owned_current) catch { + bridge.allocator.free(owned_key); + freeOwnedSnapshot(bridge.allocator, owned_current); + resp.sendError(request, 500, "Failed to persist snapshot"); + return; + }; } // Serialize diff as JSON @@ -1473,12 +1532,26 @@ fn handleDiffSnapshot(request: *std.http.Server.Request, arena: std.mem.Allocato .removed => "removed", .changed => "changed", }; - writer.print("{{\"kind\":\"{s}\",\"ref\":\"{s}\",\"role\":\"{s}\",\"name\":\"{s}\"}}", .{ kind_str, entry.node.ref, entry.node.role, entry.node.name }) catch return; + writer.writeAll("{") catch return; + writeJsonField(writer, arena, "kind", kind_str) catch return; + writer.writeAll(",") catch return; + writeJsonField(writer, arena, "ref", entry.node.ref) catch return; + writer.writeAll(",") catch return; + writeJsonField(writer, arena, "role", entry.node.role) catch return; + writer.writeAll(",") catch return; + writeJsonField(writer, arena, "name", entry.node.name) catch return; + writer.writeAll("}") catch return; } writer.writeAll("]") catch return; resp.sendJson(request, json_buf.items); } +fn writeJsonField(writer: anytype, allocator: std.mem.Allocator, key: []const u8, value: []const u8) !void { + const escaped = try json_util.jsonEscape(value, allocator); + defer allocator.free(escaped); + try writer.print("\"{s}\":\"{s}\"", .{ key, escaped }); +} + fn handleEmulate(request: *std.http.Server.Request, arena: std.mem.Allocator, bridge: *Bridge) void { const target = request.head.target; const tab_id = getQueryParam(target, "tab_id") orelse { @@ -3611,6 +3684,33 @@ test "jsonEscapeAlloc escapes special chars" { try std.testing.expectEqualStrings("line1\\nline2\\r\\n", nl); } +test "parseCdpAddress falls back to managed chrome port" { + const addr = parseCdpAddress(null, 9224); + try std.testing.expectEqualStrings("127.0.0.1", addr.host); + try std.testing.expectEqual(@as(u16, 9224), addr.port); +} + +test "parseCdpAddress accepts http discovery endpoint" { + const addr = parseCdpAddress("http://localhost:9333/json/version", 9224); + try std.testing.expectEqualStrings("127.0.0.1", addr.host); + try std.testing.expectEqual(@as(u16, 9333), addr.port); +} + +test "parseCdpAddress accepts websocket endpoint path" { + const addr = parseCdpAddress("ws://127.0.0.1:9444/devtools/browser/abc", 9224); + try std.testing.expectEqualStrings("127.0.0.1", addr.host); + try std.testing.expectEqual(@as(u16, 9444), addr.port); +} + +test "writeJsonField escapes embedded quotes" { + var buf: std.ArrayList(u8) = .empty; + defer buf.deinit(std.testing.allocator); + const writer = buf.writer(std.testing.allocator); + + try writeJsonField(writer, std.testing.allocator, "title", "say \"hello\"\nnext"); + try std.testing.expectEqualStrings("\"title\":\"say \\\"hello\\\"\\nnext\"", buf.items); +} + test "script/inject accepts POST body" { // Route matching test — verify POST method is supported const path = "/script/inject?tab_id=abc"; diff --git a/src/snapshot/a11y.zig b/src/snapshot/a11y.zig index 28a418d..cac53df 100644 --- a/src/snapshot/a11y.zig +++ b/src/snapshot/a11y.zig @@ -126,6 +126,10 @@ pub fn buildSnapshot( allocator: std.mem.Allocator, ) ![]A11yNode { var result: std.ArrayList(A11yNode) = .empty; + errdefer { + for (result.items) |node| allocator.free(node.ref); + result.deinit(allocator); + } for (nodes) |node| { if (opts.max_depth) |max| { @@ -164,6 +168,7 @@ pub fn buildSnapshot( if (opts.compact) { // Collect all non-StaticText names var name_set: std.StringHashMap(void) = .init(allocator); + defer name_set.deinit(); for (result.items) |node| { if (!std.mem.eql(u8, node.role, "StaticText") and node.name.len > 2) { try name_set.put(node.name, {}); @@ -171,6 +176,12 @@ pub fn buildSnapshot( } // Filter: keep StaticText only if its name is NOT in the set var filtered: std.ArrayList(A11yNode) = .empty; + errdefer { + for (filtered.items) |node| { + if (node.ref.len != 0) allocator.free(node.ref); + } + filtered.deinit(allocator); + } var ref_idx: usize = 0; for (result.items) |node| { if (std.mem.eql(u8, node.role, "StaticText")) { @@ -199,6 +210,8 @@ pub fn buildSnapshot( .depth = node.depth, }); } + for (result.items) |node| allocator.free(node.ref); + result.deinit(allocator); return filtered.toOwnedSlice(allocator); } diff --git a/src/test/integration.zig b/src/test/integration.zig index b7b7a48..2c73e93 100644 --- a/src/test/integration.zig +++ b/src/test/integration.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const net = std.net; // Import all modules under test const config_mod = @import("../bridge/config.zig"); @@ -14,6 +15,50 @@ const validator = @import("../crawler/validator.zig"); const json_util = @import("../util/json.zig"); const harness_mod = @import("harness.zig"); const launcher_mod = @import("../chrome/launcher.zig"); +const router_mod = @import("../server/router.zig"); + +const FakeChromeServer = struct { + port: u16, + thread: std.Thread, + + fn start(body: []const u8) !FakeChromeServer { + var port: u16 = 19440; + while (port < 19540) : (port += 1) { + const address = try net.Address.parseIp4("127.0.0.1", port); + const server = address.listen(.{ .reuse_address = true }) catch |err| switch (err) { + error.AddressInUse => continue, + else => return err, + }; + const thread = try std.Thread.spawn(.{}, serveOnce, .{ server, body }); + return .{ .port = port, .thread = thread }; + } + return error.NoFreePort; + } + + fn stop(self: *FakeChromeServer) void { + self.thread.join(); + } + + fn serveOnce(server: net.Server, body: []const u8) !void { + var tcp_server = server; + defer tcp_server.deinit(); + + const conn = try tcp_server.accept(); + defer conn.stream.close(); + + var req_buf: [2048]u8 = undefined; + _ = conn.stream.read(&req_buf) catch 0; + + const response = try std.fmt.allocPrint( + std.heap.page_allocator, + "HTTP/1.1 200 OK\r\nContent-Length:{d}\r\nContent-Type: application/json\r\nConnection: close\r\n\r\n{s}", + .{ body.len, body }, + ); + defer std.heap.page_allocator.free(response); + + try conn.stream.writeAll(response); + } +}; // ─── Config Tests ─────────────────────────────────────────────────────── @@ -23,12 +68,67 @@ test "config defaults are sensible" { try std.testing.expectEqual(@as(u16, 8080), cfg.port); try std.testing.expectEqual(@as(?[]const u8, null), cfg.cdp_url); try std.testing.expectEqual(@as(?[]const u8, null), cfg.auth_secret); - try std.testing.expectEqualStrings(".browdie", cfg.state_dir); + try std.testing.expectEqualStrings(".kuri", cfg.state_dir); try std.testing.expectEqual(@as(u32, 30), cfg.stale_tab_interval_s); try std.testing.expectEqual(@as(u32, 30_000), cfg.request_timeout_ms); try std.testing.expectEqual(@as(u32, 30_000), cfg.navigate_timeout_ms); } +test "discoverTabs hydrates bridge from Chrome target list" { + const body = + \\[ + \\ { + \\ "id":"page-1", + \\ "type":"page", + \\ "url":"https://example.com", + \\ "title":"Example", + \\ "webSocketDebuggerUrl":"ws://127.0.0.1:9222/devtools/page/page-1" + \\ }, + \\ { + \\ "id":"worker-1", + \\ "type":"service_worker", + \\ "url":"https://example.com/sw.js", + \\ "title":"Worker", + \\ "webSocketDebuggerUrl":"ws://127.0.0.1:9222/devtools/page/worker-1" + \\ } + \\] + ; + + var fake = try FakeChromeServer.start(body); + defer fake.stop(); + + const cdp_url = try std.fmt.allocPrint(std.testing.allocator, "http://127.0.0.1:{d}/json/version", .{fake.port}); + defer std.testing.allocator.free(cdp_url); + + var bridge = Bridge.init(std.testing.allocator); + defer bridge.deinit(); + + const cfg = config_mod.Config{ + .host = "127.0.0.1", + .port = 8080, + .cdp_url = cdp_url, + .auth_secret = null, + .state_dir = ".kuri", + .stale_tab_interval_s = 30, + .request_timeout_ms = 30_000, + .navigate_timeout_ms = 30_000, + .extensions = null, + .headless = true, + }; + + var arena_impl = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_impl.deinit(); + + const discovered = try router_mod.discoverTabs(arena_impl.allocator(), &bridge, cfg, fake.port); + try std.testing.expectEqual(@as(usize, 1), discovered); + try std.testing.expectEqual(@as(usize, 1), bridge.tabCount()); + + const tab = bridge.getTab("page-1").?; + try std.testing.expectEqualStrings("https://example.com", tab.url); + try std.testing.expectEqualStrings("Example", tab.title); + try std.testing.expectEqualStrings("ws://127.0.0.1:9222/devtools/page/page-1", tab.ws_url); +} + // ─── Bridge Stress Tests ──────────────────────────────────────────────── test "bridge handles many tabs" {