Skip to content

Commit 927c610

Browse files
authored
Merge pull request #36301 from vespa-engine/thomasht86/vespa-tmgrammar
Add TextMate grammar for Vespa schema language
2 parents 43b254d + bbcc40b commit 927c610

18 files changed

Lines changed: 4392 additions & 0 deletions

File tree

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
2+
package ai.vespa.schemals;
3+
4+
import java.io.File;
5+
import java.io.FileWriter;
6+
import java.io.IOException;
7+
import java.nio.file.Files;
8+
import java.nio.file.Path;
9+
import java.nio.file.Paths;
10+
import java.util.ArrayList;
11+
import java.util.LinkedHashMap;
12+
import java.util.List;
13+
import java.util.Map;
14+
import java.util.stream.Stream;
15+
16+
import org.eclipse.lsp4j.SemanticTokensLegend;
17+
import org.eclipse.lsp4j.SemanticTokensWithRegistrationOptions;
18+
import org.eclipse.lsp4j.TextDocumentIdentifier;
19+
import org.eclipse.lsp4j.TextDocumentItem;
20+
21+
import com.google.gson.Gson;
22+
import com.google.gson.GsonBuilder;
23+
import com.yahoo.io.IOUtils;
24+
25+
import ai.vespa.schemals.common.ClientLogger;
26+
import ai.vespa.schemals.context.EventDocumentContext;
27+
import ai.vespa.schemals.context.InvalidContextException;
28+
import ai.vespa.schemals.index.SchemaIndex;
29+
import ai.vespa.schemals.lsp.common.semantictokens.CommonSemanticTokens;
30+
import ai.vespa.schemals.lsp.schema.semantictokens.SchemaSemanticTokens;
31+
import ai.vespa.schemals.schemadocument.SchemaDocumentScheduler;
32+
import ai.vespa.schemals.testutils.TestLogger;
33+
import ai.vespa.schemals.testutils.TestSchemaDiagnosticsHandler;
34+
import ai.vespa.schemals.testutils.TestSchemaMessageHandler;
35+
import ai.vespa.schemals.testutils.TestSchemaProgressHandler;
36+
37+
/**
38+
* Dumps the Java LSP's semantic tokens to JSON for comparison with
39+
* the TextMate grammar in integration/tmgrammar/.
40+
*
41+
* This is a utility, not a test. It lives in test sources because it
42+
* depends on test scaffolding (TestLogger, etc.).
43+
*
44+
* Run from the language-server directory:
45+
* mvn test-compile exec:java \
46+
* -Dexec.mainClass=ai.vespa.schemals.SemanticTokenDumper \
47+
* -Dexec.classpathScope=test
48+
*/
49+
public class SemanticTokenDumper {
50+
51+
private static final Path SD_FILES_DIR = Paths.get("src/test/sdfiles");
52+
private static final Path OUTPUT_PATH = Paths.get("../../tmgrammar/tools/java_tokens.json");
53+
54+
public static void main(String[] args) throws IOException, InvalidContextException {
55+
SemanticTokensWithRegistrationOptions options = CommonSemanticTokens.getSemanticTokensRegistrationOptions();
56+
SemanticTokensLegend legend = options.getLegend();
57+
List<String> tokenTypes = legend.getTokenTypes();
58+
List<String> tokenModifiers = legend.getTokenModifiers();
59+
60+
// Find all .sd files
61+
List<Path> sdFiles;
62+
try (Stream<Path> walk = Files.walk(SD_FILES_DIR)) {
63+
sdFiles = walk.filter(p -> p.toString().endsWith(".sd"))
64+
.sorted()
65+
.toList();
66+
}
67+
68+
Map<String, Object> output = new LinkedHashMap<>();
69+
output.put("legend", tokenTypes);
70+
Map<String, Object> filesMap = new LinkedHashMap<>();
71+
72+
for (Path sdFile : sdFiles) {
73+
File file = sdFile.toFile();
74+
String fileURI = file.toURI().toString();
75+
String fileContent = IOUtils.readFile(file);
76+
77+
// Set up test scaffolding - fresh for each file
78+
TestSchemaMessageHandler messageHandler = new TestSchemaMessageHandler();
79+
TestSchemaProgressHandler progressHandler = new TestSchemaProgressHandler();
80+
ClientLogger logger = new TestLogger(messageHandler);
81+
SchemaIndex schemaIndex = new SchemaIndex(logger);
82+
TestSchemaDiagnosticsHandler diagnosticsHandler = new TestSchemaDiagnosticsHandler(new ArrayList<>());
83+
SchemaDocumentScheduler scheduler = new SchemaDocumentScheduler(logger, diagnosticsHandler, schemaIndex, messageHandler, progressHandler);
84+
85+
scheduler.openDocument(new TextDocumentItem(fileURI, "vespaSchema", 0, fileContent));
86+
87+
EventDocumentContext context = new EventDocumentContext(
88+
scheduler, schemaIndex, messageHandler,
89+
new TextDocumentIdentifier(fileURI)
90+
);
91+
92+
List<Integer> data = SchemaSemanticTokens.getSemanticTokens(context).getData();
93+
94+
// Decode delta-encoded tokens
95+
String[] lines = fileContent.split("\n", -1);
96+
List<Map<String, Object>> tokens = new ArrayList<>();
97+
int prevLine = 0, prevCol = 0;
98+
99+
for (int i = 0; i + 4 < data.size(); i += 5) {
100+
int deltaLine = data.get(i);
101+
int deltaCol = data.get(i + 1);
102+
int len = data.get(i + 2);
103+
int typeIndex = data.get(i + 3);
104+
int modBits = data.get(i + 4);
105+
106+
int line = prevLine + deltaLine;
107+
int col = (deltaLine == 0) ? prevCol + deltaCol : deltaCol;
108+
prevLine = line;
109+
prevCol = col;
110+
111+
String typeName = (typeIndex >= 0 && typeIndex < tokenTypes.size())
112+
? tokenTypes.get(typeIndex) : "unknown(" + typeIndex + ")";
113+
114+
List<String> mods = new ArrayList<>();
115+
for (int bit = 0; bit < tokenModifiers.size(); bit++) {
116+
if ((modBits & (1 << bit)) != 0) {
117+
mods.add(tokenModifiers.get(bit));
118+
}
119+
}
120+
121+
String text = "";
122+
if (line >= 0 && line < lines.length) {
123+
int end = Math.min(col + len, lines[line].length());
124+
if (col >= 0 && col <= lines[line].length()) {
125+
text = lines[line].substring(col, end);
126+
}
127+
}
128+
129+
Map<String, Object> token = new LinkedHashMap<>();
130+
token.put("line", line);
131+
token.put("col", col);
132+
token.put("len", len);
133+
token.put("text", text);
134+
token.put("type", typeName);
135+
token.put("modifiers", mods);
136+
137+
// Deduplicate: if previous token has same position and length,
138+
// the LSP emitted overlapping tokens (e.g. dataType + keyword).
139+
// Keep the first one (more specific AST-level token).
140+
if (!tokens.isEmpty()) {
141+
Map<String, Object> prev = tokens.get(tokens.size() - 1);
142+
if (prev.get("line").equals(line)
143+
&& prev.get("col").equals(col)
144+
&& prev.get("len").equals(len)) {
145+
continue; // skip duplicate
146+
}
147+
}
148+
tokens.add(token);
149+
}
150+
151+
String relPath = SD_FILES_DIR.relativize(sdFile).toString();
152+
Map<String, Object> fileEntry = new LinkedHashMap<>();
153+
fileEntry.put("tokens", tokens);
154+
filesMap.put(relPath, fileEntry);
155+
}
156+
157+
output.put("files", filesMap);
158+
159+
Gson gson = new GsonBuilder().setPrettyPrinting().create();
160+
try (FileWriter writer = new FileWriter(OUTPUT_PATH.toFile())) {
161+
gson.toJson(output, writer);
162+
}
163+
164+
System.out.println("Wrote " + filesMap.size() + " files to " + OUTPUT_PATH.toAbsolutePath().normalize());
165+
}
166+
}

0 commit comments

Comments
 (0)