Skip to content

Commit 7b3c0be

Browse files
feat(lsp): JRL embedded-block semantic tokens + Zed JSON injection for client blocks
LSP (JrlHandler.handleSemanticTokens): - Scans every triple-quote/backtick embedded value + escaped-double-quote client form - Java keys (serviceScript, javaCode, …) emit `type` tokens for dotted identifiers that resolve in the FOAM registry (longest-prefix match so foo.X.Builder highlights as foo.X) - `client` blocks emit `class` tokens for verified "class":"…" values, both literal and escape-encoded (\\"class\\":\\"foam.dao.EasyDAO\\") - Top-level "class":"…" tokenization split into collectClassValueTokens_; encoding shared Zed tree-sitter injections: - Add #eq? @_key "client" → json injection for triple-quoted and backtick client values - Canonical languages/jrl/ (grammars/ subtree is gitignored build area) Tests (8 new, 495 total): - type (0) tokens emitted for registered class IDs in serviceScript - class (1) tokens for nested "class":"…" inside client triple-quote + escaped forms - unknown dotted identifiers DON'T emit type tokens (registry-verified) - Zed injections.scm declares json injection for client - VS Code foam-jrl grammar parity (already had json-block-triple/backtick)
1 parent 3cd92ef commit 7b3c0be

3 files changed

Lines changed: 336 additions & 9 deletions

File tree

tools/lsp/editors/zed-foam3/languages/jrl/injections.scm

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,41 @@
9090
"javaImports"
9191
"code" "serviceScript")
9292
(#set! injection.language "java"))
93+
94+
; ============================================================
95+
; JSON injection for `client` values — nested FObject specs
96+
; are authored as JSON literals inside triple-quoted or
97+
; backtick-delimited strings.
98+
; ============================================================
99+
100+
; --- JSON injection for triple-quoted strings (quoted keys) ---
101+
102+
(pair
103+
key: (key (string (string_content) @_key))
104+
value: (triple_string (triple_string_content) @injection.content)
105+
(#eq? @_key "client")
106+
(#set! injection.language "json"))
107+
108+
; --- JSON injection for triple-quoted strings (unquoted keys) ---
109+
110+
(pair
111+
key: (key (identifier) @_key)
112+
value: (triple_string (triple_string_content) @injection.content)
113+
(#eq? @_key "client")
114+
(#set! injection.language "json"))
115+
116+
; --- JSON injection for backtick strings (quoted keys) ---
117+
118+
(pair
119+
key: (key (string (string_content) @_key))
120+
value: (backtick_string (backtick_string_content) @injection.content)
121+
(#eq? @_key "client")
122+
(#set! injection.language "json"))
123+
124+
; --- JSON injection for backtick strings (unquoted keys) ---
125+
126+
(pair
127+
key: (key (identifier) @_key)
128+
value: (backtick_string (backtick_string_content) @injection.content)
129+
(#eq? @_key "client")
130+
(#set! injection.language "json"))

tools/lsp/handlers/JrlHandler.js

Lines changed: 199 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,20 @@ foam.CLASS({
1515
'foam.parse.lsp.CursorAnalyzer'
1616
],
1717

18+
constants: {
19+
JAVA_EMBED_KEYS_: {
20+
javaCode: true, javaFactory: true, javaGetter: true, javaSetter: true,
21+
javaPreSet: true, javaPostSet: true, javaAdapt: true, javaCompare: true,
22+
javaComparePropertyToObject: true, javaComparePropertyToValue: true,
23+
javaCloneProperty: true, javaDiffProperty: true,
24+
javaFormatJSON: true, javaJSONParser: true, javaCSVParser: true,
25+
javaQueryParser: true, javaToCSV: true, javaToCSVLabel: true,
26+
javaFromCSVLabelMapping: true, javaAssertValue: true,
27+
javaValidateObj: true, javaCondition: true, javaValue: true,
28+
javaImports: true, code: true, serviceScript: true
29+
}
30+
},
31+
1832
properties: [
1933
{
2034
class: 'FObjectProperty',
@@ -226,18 +240,34 @@ foam.CLASS({
226240
},
227241

228242
function handleSemanticTokens(text) {
229-
var lines = text.split('\n');
243+
/**
244+
* JRL semantic tokens complement the TextMate/tree-sitter grammar by
245+
* emitting registry-verified highlights the grammar cannot reach:
246+
* • Verified `"class":"…"` values (top-level and inside embedded JSON)
247+
* • Dotted class IDs inside embedded Java blocks (serviceScript,
248+
* javaCode, javaFactory, etc.)
249+
* • Dotted class IDs inside escaped-in-double-quote client strings
250+
*
251+
* Token types: 0=type, 1=class, 2=variable, 3=keyword, 4=string,
252+
* 5=comment, 6=number, 7=operator, 8=method.
253+
*/
230254
var tokens = [];
255+
this.collectClassValueTokens_(text, tokens);
256+
this.collectEmbeddedBlockTokens_(text, tokens);
257+
258+
tokens.sort(function(a, b) {
259+
return a.line !== b.line ? a.line - b.line : a.char - b.char;
260+
});
261+
return this.encodeTokens_(tokens);
262+
},
231263

232-
// Only emit semantic tokens for things the TextMate grammar can't resolve:
233-
// - Class values verified against the FOAM registry (type=1 class)
234-
// Token types: 0=type, 1=class, 2=variable, 3=keyword, 4=string,
235-
// 5=comment, 6=number, 7=operator, 8=method
264+
function collectClassValueTokens_(text, tokens) {
265+
/** Line-by-line scan for "class":"…" and class:"…" verified values. */
266+
var lines = text.split('\n');
236267
for ( var lineNum = 0 ; lineNum < lines.length ; lineNum++ ) {
237268
var line = lines[lineNum];
238269
if ( ! line.trim() || /^\s*\/\//.test(line) ) continue;
239270

240-
// Highlight verified class values — both "class":"value" and class:"value"
241271
var classRegex = /(?:"class"|(?<=[{,])\s*class)\s*:\s*(?:"([^"]+)"|'([^']+)')/g;
242272
var cm;
243273
while ( ( cm = classRegex.exec(line) ) !== null ) {
@@ -250,12 +280,172 @@ foam.CLASS({
250280
}
251281
}
252282
}
283+
},
253284

254-
// Sort and encode
255-
tokens.sort(function(a, b) {
256-
return a.line !== b.line ? a.line - b.line : a.char - b.char;
285+
function collectEmbeddedBlockTokens_(text, tokens) {
286+
/**
287+
* Walk every embedded value block in the file and emit tokens for
288+
* registry-verified identifiers inside. Handles:
289+
* 1. Triple-quoted values: "key": """…"""
290+
* 2. Backtick values: "key": `…`
291+
* 3. Escaped-in-double-quote values: "key": "…" (for client only)
292+
*
293+
* Dispatch by key:
294+
* • Java keys (serviceScript, javaCode, …) → Java tokenization
295+
* • `client` → JSON tokenization
296+
*/
297+
var blocks = this.findEmbeddedBlocks_(text);
298+
for ( var i = 0 ; i < blocks.length ; i++ ) {
299+
var b = blocks[i];
300+
if ( this.JAVA_EMBED_KEYS_[b.key] ) {
301+
this.collectJavaEmbedTokens_(text, b, tokens);
302+
} else if ( b.key === 'client' ) {
303+
this.collectJsonEmbedTokens_(text, b, tokens);
304+
}
305+
}
306+
},
307+
308+
function findEmbeddedBlocks_(text) {
309+
/**
310+
* Scan the full text for every triple-quote and backtick embedded
311+
* value. Returns array of { key, contentStart, contentEnd, delim }
312+
* where delim is '"""' or '`'. Skips `//` line comments.
313+
*
314+
* Approach: find `"key":` then the opening delimiter right after.
315+
* Matches BOTH quoted-key (`"javaCode"`) and unquoted-key (`javaCode`).
316+
*/
317+
var out = [];
318+
var keyDelimRe = /(?:"([a-zA-Z_][\w$]*)"|([a-zA-Z_][\w$]*))\s*:\s*("""|`)/g;
319+
var m;
320+
while ( ( m = keyDelimRe.exec(text) ) !== null ) {
321+
var key = m[1] || m[2];
322+
if ( ! key ) continue;
323+
var delim = m[3];
324+
var openStart = m.index + m[0].length - delim.length;
325+
var contentStart = openStart + delim.length;
326+
var contentEnd = text.indexOf(delim, contentStart);
327+
if ( contentEnd === -1 ) break;
328+
out.push({ key: key, contentStart: contentStart, contentEnd: contentEnd, delim: delim });
329+
keyDelimRe.lastIndex = contentEnd + delim.length;
330+
}
331+
332+
// Escaped-in-double-quote form: `"client": "…"` where inner quotes
333+
// are `\"`. Only honor `client` (FObject JSON); serviceScript also
334+
// uses this form but we leave Java highlighting to grammar injection
335+
// there since escaping makes it hard to detect reliably.
336+
var escRe = /"(client)"\s*:\s*"(?!"")((?:\\.|[^"\\\n])*)"/g;
337+
var em;
338+
while ( ( em = escRe.exec(text) ) !== null ) {
339+
var vStart = em.index + em[0].length - em[2].length - 1;
340+
out.push({
341+
key: em[1],
342+
contentStart: vStart + 1,
343+
contentEnd: vStart + 1 + em[2].length,
344+
delim: '"',
345+
escaped: true
346+
});
347+
}
348+
return out;
349+
},
350+
351+
function collectJavaEmbedTokens_(text, block, tokens) {
352+
/**
353+
* Emit `type` tokens (0) for dotted class IDs and short class names
354+
* that the registry resolves. Registry-verified only — no hardcoded
355+
* list. The surrounding grammar handles Java keyword / string /
356+
* comment highlighting; we add what the grammar can't know:
357+
* which identifiers are actually FOAM classes.
358+
*/
359+
var content = text.substring(block.contentStart, block.contentEnd);
360+
var lineOffsets = this.computeLineOffsets_(text);
361+
362+
// Dotted identifier — a.b.c.D — followed by optional `.getOwnClassInfo`
363+
var dottedRe = /\b([a-z][\w$]*(?:\.[a-zA-Z_][\w$]*)+)\b/g;
364+
var dm;
365+
while ( ( dm = dottedRe.exec(content) ) !== null ) {
366+
var id = dm[1];
367+
var hit = this.resolveRegisteredPrefix_(id);
368+
if ( ! hit ) continue;
369+
this.pushTokenAt_(tokens, block.contentStart + dm.index, hit.length, 0, lineOffsets);
370+
}
371+
},
372+
373+
function collectJsonEmbedTokens_(text, block, tokens) {
374+
/**
375+
* Emit `class` tokens (1) for registry-verified `"class":"…"` values
376+
* inside an embedded JSON block. If the block is escaped-in-double-
377+
* quote form, `\"class\":\"…\"` — handle both literal and escaped.
378+
*/
379+
var content = text.substring(block.contentStart, block.contentEnd);
380+
var lineOffsets = this.computeLineOffsets_(text);
381+
382+
// Literal: "class":"com.foo.Bar"
383+
var litRe = /"class"\s*:\s*"([^"\n]+)"/g;
384+
var lm;
385+
while ( ( lm = litRe.exec(content) ) !== null ) {
386+
var cid = lm[1];
387+
if ( ! this.index.classExists(cid) ) continue;
388+
var valIdx = content.indexOf(cid, lm.index);
389+
if ( valIdx === -1 ) continue;
390+
this.pushTokenAt_(tokens, block.contentStart + valIdx, cid.length, 1, lineOffsets);
391+
}
392+
393+
// Escaped: \"class\":\"com.foo.Bar\"
394+
var escRe = /\\"class\\"\s*:\s*\\"([^"\\\n]+)\\"/g;
395+
var em;
396+
while ( ( em = escRe.exec(content) ) !== null ) {
397+
var ecid = em[1];
398+
if ( ! this.index.classExists(ecid) ) continue;
399+
var eIdx = content.indexOf(ecid, em.index);
400+
if ( eIdx === -1 ) continue;
401+
this.pushTokenAt_(tokens, block.contentStart + eIdx, ecid.length, 1, lineOffsets);
402+
}
403+
},
404+
405+
function resolveRegisteredPrefix_(dottedId) {
406+
/**
407+
* Given `foo.X.Builder`, return the longest prefix that exists in the
408+
* FOAM registry. Returns { length } (char length of the matched
409+
* prefix) or null.
410+
*/
411+
if ( ! dottedId ) return null;
412+
var cand = dottedId;
413+
while ( cand ) {
414+
if ( this.index.classExists(cand) ) return { length: cand.length };
415+
var dot = cand.lastIndexOf('.');
416+
if ( dot === -1 ) return null;
417+
cand = cand.substring(0, dot);
418+
}
419+
return null;
420+
},
421+
422+
function computeLineOffsets_(text) {
423+
/** Pre-computed line-start offsets for fast offset → {line,char}. */
424+
var offs = [0];
425+
for ( var i = 0 ; i < text.length ; i++ ) {
426+
if ( text.charCodeAt(i) === 10 ) offs.push(i + 1);
427+
}
428+
return offs;
429+
},
430+
431+
function pushTokenAt_(tokens, offset, length, type, lineOffsets) {
432+
/** Binary-search offset → line/char and push a semantic token. */
433+
var lo = 0, hi = lineOffsets.length - 1;
434+
while ( lo < hi ) {
435+
var mid = (lo + hi + 1) >> 1;
436+
if ( lineOffsets[mid] <= offset ) lo = mid; else hi = mid - 1;
437+
}
438+
tokens.push({
439+
line: lo,
440+
char: offset - lineOffsets[lo],
441+
length: length,
442+
type: type,
443+
modifiers: 0
257444
});
445+
},
258446

447+
function encodeTokens_(tokens) {
448+
/** LSP delta-encoding: [dL, dC, length, type, modifiers] per token. */
259449
var data = [];
260450
var prevLine = 0, prevChar = 0;
261451
for ( var i = 0 ; i < tokens.length ; i++ ) {

tools/tests/testFoamLSP.js

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2700,6 +2700,105 @@ if ( index.classExists('foam.dao.EasyDAO') ) {
27002700
}
27012701
}
27022702

2703+
// === EMBEDDED-BLOCK SEMANTIC TOKENS ===
2704+
section('JRL embedded-block semantic tokens — Java + client JSON');
2705+
2706+
// Java serviceScript with a dotted FOAM class id inside.
2707+
var embedJava = [
2708+
'p({',
2709+
' "class": "foam.core.boot.CSpec",',
2710+
' "id": "myService",',
2711+
' "serviceScript": """',
2712+
' return new foam.dao.EasyDAO.Builder(x).setOf(foam.lang.FObject.getOwnClassInfo()).build();',
2713+
' """',
2714+
'})'
2715+
].join('\n');
2716+
var javaTokens = jrlH3.handleSemanticTokens(embedJava);
2717+
// data format: [dL, dC, len, type, mods, …]. Expect at least one type=0
2718+
// token (class reference) somewhere inside the serviceScript body.
2719+
function hasTokenOfType(data, type) {
2720+
for ( var i = 3 ; i < data.length ; i += 5 ) {
2721+
if ( data[i] === type ) return true;
2722+
}
2723+
return false;
2724+
}
2725+
test(hasTokenOfType(javaTokens.data, 0),
2726+
'Embedded Java block emits type (0) tokens for registered class IDs');
2727+
2728+
// Verify class:"..." at top level is tagged as class (type=1).
2729+
test(hasTokenOfType(javaTokens.data, 1),
2730+
'Top-level "class":"…" value still emitted as class (1) token');
2731+
2732+
// JSON client block — verified "class":"…" inside the nested JSON.
2733+
var embedClient = [
2734+
'p({',
2735+
' "class": "foam.core.boot.CSpec",',
2736+
' "id": "myCSpec",',
2737+
' "client": """',
2738+
' {',
2739+
' "class": "foam.dao.EasyDAO",',
2740+
' "of": "foam.lang.FObject"',
2741+
' }',
2742+
' """',
2743+
'})'
2744+
].join('\n');
2745+
var clientTokens = jrlH3.handleSemanticTokens(embedClient);
2746+
// Expect TWO class tokens: top-level foam.core.boot.CSpec AND nested foam.dao.EasyDAO.
2747+
var classTokenCount = 0;
2748+
for ( var i = 3 ; i < clientTokens.data.length ; i += 5 ) {
2749+
if ( clientTokens.data[i] === 1 ) classTokenCount++;
2750+
}
2751+
test(classTokenCount >= 2,
2752+
'Nested "class":"…" inside triple-quoted client block emits class token (got ' + classTokenCount + ')');
2753+
2754+
// Escaped-in-double-quote client form.
2755+
var embedClientEsc = [
2756+
'p({',
2757+
' "class": "foam.core.boot.CSpec",',
2758+
' "id": "myCSpec",',
2759+
' "client": "{\\"class\\":\\"foam.dao.EasyDAO\\",\\"of\\":\\"foam.lang.FObject\\"}"',
2760+
'})'
2761+
].join('\n');
2762+
var escTokens = jrlH3.handleSemanticTokens(embedClientEsc);
2763+
var escClassCount = 0;
2764+
for ( var i = 3 ; i < escTokens.data.length ; i += 5 ) {
2765+
if ( escTokens.data[i] === 1 ) escClassCount++;
2766+
}
2767+
test(escClassCount >= 2,
2768+
'Escaped "class":"…" inside escaped-double-quote client block emits class token (got ' + escClassCount + ')');
2769+
2770+
// Unknown class inside serviceScript should NOT emit a type token.
2771+
var embedUnknown = [
2772+
'p({',
2773+
' "class": "foam.core.boot.CSpec",',
2774+
' "serviceScript": """',
2775+
' return totally.not.a.real.Class.foo();',
2776+
' """',
2777+
'})'
2778+
].join('\n');
2779+
var unkTokens = jrlH3.handleSemanticTokens(embedUnknown);
2780+
// Should only have the top-level class token (type=1), no type=0 from the unknown ID.
2781+
var unkTypeCount = 0;
2782+
for ( var i = 3 ; i < unkTokens.data.length ; i += 5 ) {
2783+
if ( unkTokens.data[i] === 0 ) unkTypeCount++;
2784+
}
2785+
test(unkTypeCount === 0,
2786+
'Unknown dotted identifier in serviceScript does not emit type token (got ' + unkTypeCount + ')');
2787+
2788+
// === ZED TREE-SITTER — JSON INJECTION FOR client BLOCKS ===
2789+
section('Zed tree-sitter grammar: JSON injection for client blocks');
2790+
var fs_ = require('fs');
2791+
var path_ = require('path');
2792+
var zedInj = fs_.readFileSync(path_.join(__dirname, '../lsp/editors/zed-foam3/languages/jrl/injections.scm'), 'utf8');
2793+
test(/injection\.language\s+"json"/.test(zedInj),
2794+
'Zed JRL injections.scm declares JSON injection');
2795+
test(/#eq\?\s+@_key\s+"client"/.test(zedInj),
2796+
'Zed JRL injections.scm matches `client` key for JSON injection');
2797+
// VS Code grammar parity.
2798+
var vscodeJrl = JSON.parse(fs_.readFileSync(path_.join(__dirname, '../lsp/editors/vscode/syntaxes/foam-jrl.tmLanguage.json'), 'utf8'));
2799+
test(!! vscodeJrl.repository['json-block-triple'] && !! vscodeJrl.repository['json-block-backtick'],
2800+
'VS Code foam-jrl grammar has JSON injections for client triple/backtick');
2801+
27032802
// === SUMMARY ===
27042803

27052804
section('SUMMARY');

0 commit comments

Comments
 (0)