diff --git a/README.md b/README.md index b79e315..7abf7d8 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,8 @@ > **Alpha software — API is stabilizing but may change** > > codedb works and is used daily in production AI workflows, but: -> - **Language support** — Zig, Python, TypeScript/JavaScript, Rust, Go, PHP, Ruby, HCL, R, Dart/Flutter +> - **Parser support** — Zig, C/C++, Python, TypeScript/JavaScript, Rust, Go, PHP, Ruby, HCL, R, Dart/Flutter +> - **Language detection** — also tags Java, Kotlin, Svelte, Vue, Astro, shell, CSS/SCSS, SQL, protobuf, Fortran, LLVM IR, MLIR, and TableGen files in trees/snapshots > - **No auth** — HTTP server binds to localhost only > - **Snapshot format** may change between versions > - **MCP protocol** is JSON-RPC 2.0 over stdio (stable) diff --git a/src/explore.zig b/src/explore.zig index 810c3ba..931d839 100644 --- a/src/explore.zig +++ b/src/explore.zig @@ -100,6 +100,20 @@ pub const Language = enum(u8) { yaml, unknown, dart, + java, + kotlin, + svelte, + vue, + astro, + shell, + css, + scss, + sql, + protobuf, + fortran, + llvm_ir, + mlir, + tablegen, }; pub fn detectLanguage(path: []const u8) Language { @@ -107,7 +121,8 @@ pub fn detectLanguage(path: []const u8) Language { if (std.mem.endsWith(u8, path, ".c") or std.mem.endsWith(u8, path, ".h")) return .c; if (std.mem.endsWith(u8, path, ".cpp") or std.mem.endsWith(u8, path, ".hpp") or std.mem.endsWith(u8, path, ".cc") or std.mem.endsWith(u8, path, ".hh") or - std.mem.endsWith(u8, path, ".cxx") or std.mem.endsWith(u8, path, ".hxx")) + std.mem.endsWith(u8, path, ".cxx") or std.mem.endsWith(u8, path, ".hxx") or + std.mem.endsWith(u8, path, ".mm")) return .cpp; if (std.mem.endsWith(u8, path, ".py")) return .python; if (std.mem.endsWith(u8, path, ".js") or std.mem.endsWith(u8, path, ".jsx")) return .javascript; @@ -122,6 +137,20 @@ pub fn detectLanguage(path: []const u8) Language { if (std.mem.endsWith(u8, path, ".json")) return .json; if (std.mem.endsWith(u8, path, ".yaml") or std.mem.endsWith(u8, path, ".yml")) return .yaml; if (std.mem.endsWith(u8, path, ".dart")) return .dart; + if (std.mem.endsWith(u8, path, ".java")) return .java; + if (std.mem.endsWith(u8, path, ".kt")) return .kotlin; + if (std.mem.endsWith(u8, path, ".svelte")) return .svelte; + if (std.mem.endsWith(u8, path, ".vue")) return .vue; + if (std.mem.endsWith(u8, path, ".astro")) return .astro; + if (std.mem.endsWith(u8, path, ".sh")) return .shell; + if (std.mem.endsWith(u8, path, ".css")) return .css; + if (std.mem.endsWith(u8, path, ".scss")) return .scss; + if (std.mem.endsWith(u8, path, ".sql")) return .sql; + if (std.mem.endsWith(u8, path, ".proto")) return .protobuf; + if (std.mem.endsWith(u8, path, ".f90")) return .fortran; + if (std.mem.endsWith(u8, path, ".ll")) return .llvm_ir; + if (std.mem.endsWith(u8, path, ".mlir")) return .mlir; + if (std.mem.endsWith(u8, path, ".td")) return .tablegen; return .unknown; } @@ -3031,9 +3060,15 @@ pub fn isCommentOrBlank(line: []const u8, language: Language) bool { if (trimmed.len == 0) return true; return switch (language) { .zig, .rust, .go_lang => std.mem.startsWith(u8, trimmed, "//"), - .python, .ruby, .r => std.mem.startsWith(u8, trimmed, "#"), + .python, .ruby, .r, .shell => std.mem.startsWith(u8, trimmed, "#"), .hcl => std.mem.startsWith(u8, trimmed, "#") or std.mem.startsWith(u8, trimmed, "//") or std.mem.startsWith(u8, trimmed, "/*") or std.mem.startsWith(u8, trimmed, "*"), - .javascript, .typescript, .c, .cpp, .dart => std.mem.startsWith(u8, trimmed, "//") or std.mem.startsWith(u8, trimmed, "/*") or std.mem.startsWith(u8, trimmed, "*"), + .javascript, .typescript, .c, .cpp, .dart, .java, .kotlin, .protobuf, .mlir, .tablegen => std.mem.startsWith(u8, trimmed, "//") or std.mem.startsWith(u8, trimmed, "/*") or std.mem.startsWith(u8, trimmed, "*"), + .svelte, .vue, .astro => std.mem.startsWith(u8, trimmed, "//") or std.mem.startsWith(u8, trimmed, "/*") or std.mem.startsWith(u8, trimmed, "*") or std.mem.startsWith(u8, trimmed, "", .svelte)); + try testing.expect(isCommentOrBlank(" ", .vue)); + try testing.expect(isCommentOrBlank(" ", .astro)); + try testing.expect(isCommentOrBlank(" # shell comment", .shell)); + try testing.expect(isCommentOrBlank(" /* css block comment */", .css)); + try testing.expect(isCommentOrBlank(" // scss line comment", .scss)); + try testing.expect(isCommentOrBlank(" -- sql comment", .sql)); + try testing.expect(isCommentOrBlank(" // proto comment", .protobuf)); + try testing.expect(isCommentOrBlank(" ! fortran comment", .fortran)); + try testing.expect(isCommentOrBlank(" ; llvm ir comment", .llvm_ir)); + try testing.expect(isCommentOrBlank(" // mlir comment", .mlir)); + try testing.expect(isCommentOrBlank(" // tablegen comment", .tablegen)); + try testing.expect(!isCommentOrBlank(" SELECT * FROM users;", .sql)); +} + test "isCommentOrBlank: tabs and mixed whitespace" { try testing.expect(isCommentOrBlank("\t\t// tabbed comment", .zig)); try testing.expect(isCommentOrBlank(" \t \t ", .zig)); @@ -2146,6 +2164,7 @@ test "detectLanguage: all supported extensions" { try testing.expect(explore.detectLanguage("app.hh") == .cpp); try testing.expect(explore.detectLanguage("app.cxx") == .cpp); try testing.expect(explore.detectLanguage("app.hxx") == .cpp); + try testing.expect(explore.detectLanguage("bridge.mm") == .cpp); try testing.expect(explore.detectLanguage("script.py") == .python); try testing.expect(explore.detectLanguage("app.js") == .javascript); try testing.expect(explore.detectLanguage("comp.jsx") == .javascript); @@ -2158,6 +2177,20 @@ test "detectLanguage: all supported extensions" { try testing.expect(explore.detectLanguage("pkg.json") == .json); try testing.expect(explore.detectLanguage("config.yaml") == .yaml); try testing.expect(explore.detectLanguage("config.yml") == .yaml); + try testing.expect(explore.detectLanguage("Main.java") == .java); + try testing.expect(explore.detectLanguage("App.kt") == .kotlin); + try testing.expect(explore.detectLanguage("Widget.svelte") == .svelte); + try testing.expect(explore.detectLanguage("Widget.vue") == .vue); + try testing.expect(explore.detectLanguage("Page.astro") == .astro); + try testing.expect(explore.detectLanguage("bootstrap.sh") == .shell); + try testing.expect(explore.detectLanguage("styles.css") == .css); + try testing.expect(explore.detectLanguage("styles.scss") == .scss); + try testing.expect(explore.detectLanguage("schema.sql") == .sql); + try testing.expect(explore.detectLanguage("service.proto") == .protobuf); + try testing.expect(explore.detectLanguage("solver.f90") == .fortran); + try testing.expect(explore.detectLanguage("module.ll") == .llvm_ir); + try testing.expect(explore.detectLanguage("dialect.mlir") == .mlir); + try testing.expect(explore.detectLanguage("records.td") == .tablegen); try testing.expect(explore.detectLanguage("Makefile") == .unknown); try testing.expect(explore.detectLanguage("no_ext") == .unknown); }