@@ -15,6 +15,20 @@ foam.CLASS({
1515 'foam.parse.lsp.CursorAnalyzer'
1616 ] ,
1717
18+ constants : {
19+ JAVA_EMBED_KEYS_ : {
20+ javaCode : true , javaFactory : true , javaGetter : true , javaSetter : true ,
21+ javaPreSet : true , javaPostSet : true , javaAdapt : true , javaCompare : true ,
22+ javaComparePropertyToObject : true , javaComparePropertyToValue : true ,
23+ javaCloneProperty : true , javaDiffProperty : true ,
24+ javaFormatJSON : true , javaJSONParser : true , javaCSVParser : true ,
25+ javaQueryParser : true , javaToCSV : true , javaToCSVLabel : true ,
26+ javaFromCSVLabelMapping : true , javaAssertValue : true ,
27+ javaValidateObj : true , javaCondition : true , javaValue : true ,
28+ javaImports : true , code : true , serviceScript : true
29+ }
30+ } ,
31+
1832 properties : [
1933 {
2034 class : 'FObjectProperty' ,
@@ -226,18 +240,34 @@ foam.CLASS({
226240 } ,
227241
228242 function handleSemanticTokens ( text ) {
229- var lines = text . split ( '\n' ) ;
243+ /**
244+ * JRL semantic tokens complement the TextMate/tree-sitter grammar by
245+ * emitting registry-verified highlights the grammar cannot reach:
246+ * • Verified `"class":"…"` values (top-level and inside embedded JSON)
247+ * • Dotted class IDs inside embedded Java blocks (serviceScript,
248+ * javaCode, javaFactory, etc.)
249+ * • Dotted class IDs inside escaped-in-double-quote client strings
250+ *
251+ * Token types: 0=type, 1=class, 2=variable, 3=keyword, 4=string,
252+ * 5=comment, 6=number, 7=operator, 8=method.
253+ */
230254 var tokens = [ ] ;
255+ this . collectClassValueTokens_ ( text , tokens ) ;
256+ this . collectEmbeddedBlockTokens_ ( text , tokens ) ;
257+
258+ tokens . sort ( function ( a , b ) {
259+ return a . line !== b . line ? a . line - b . line : a . char - b . char ;
260+ } ) ;
261+ return this . encodeTokens_ ( tokens ) ;
262+ } ,
231263
232- // Only emit semantic tokens for things the TextMate grammar can't resolve:
233- // - Class values verified against the FOAM registry (type=1 class)
234- // Token types: 0=type, 1=class, 2=variable, 3=keyword, 4=string,
235- // 5=comment, 6=number, 7=operator, 8=method
264+ function collectClassValueTokens_ ( text , tokens ) {
265+ /** Line-by-line scan for "class":"…" and class:"…" verified values. */
266+ var lines = text . split ( '\n' ) ;
236267 for ( var lineNum = 0 ; lineNum < lines . length ; lineNum ++ ) {
237268 var line = lines [ lineNum ] ;
238269 if ( ! line . trim ( ) || / ^ \s * \/ \/ / . test ( line ) ) continue ;
239270
240- // Highlight verified class values — both "class":"value" and class:"value"
241271 var classRegex = / (?: " c l a s s " | (?< = [ { , ] ) \s * c l a s s ) \s * : \s * (?: " ( [ ^ " ] + ) " | ' ( [ ^ ' ] + ) ' ) / g;
242272 var cm ;
243273 while ( ( cm = classRegex . exec ( line ) ) !== null ) {
@@ -250,12 +280,172 @@ foam.CLASS({
250280 }
251281 }
252282 }
283+ } ,
253284
254- // Sort and encode
255- tokens . sort ( function ( a , b ) {
256- return a . line !== b . line ? a . line - b . line : a . char - b . char ;
285+ function collectEmbeddedBlockTokens_ ( text , tokens ) {
286+ /**
287+ * Walk every embedded value block in the file and emit tokens for
288+ * registry-verified identifiers inside. Handles:
289+ * 1. Triple-quoted values: "key": """…"""
290+ * 2. Backtick values: "key": `…`
291+ * 3. Escaped-in-double-quote values: "key": "…" (for client only)
292+ *
293+ * Dispatch by key:
294+ * • Java keys (serviceScript, javaCode, …) → Java tokenization
295+ * • `client` → JSON tokenization
296+ */
297+ var blocks = this . findEmbeddedBlocks_ ( text ) ;
298+ for ( var i = 0 ; i < blocks . length ; i ++ ) {
299+ var b = blocks [ i ] ;
300+ if ( this . JAVA_EMBED_KEYS_ [ b . key ] ) {
301+ this . collectJavaEmbedTokens_ ( text , b , tokens ) ;
302+ } else if ( b . key === 'client' ) {
303+ this . collectJsonEmbedTokens_ ( text , b , tokens ) ;
304+ }
305+ }
306+ } ,
307+
308+ function findEmbeddedBlocks_ ( text ) {
309+ /**
310+ * Scan the full text for every triple-quote and backtick embedded
311+ * value. Returns array of { key, contentStart, contentEnd, delim }
312+ * where delim is '"""' or '`'. Skips `//` line comments.
313+ *
314+ * Approach: find `"key":` then the opening delimiter right after.
315+ * Matches BOTH quoted-key (`"javaCode"`) and unquoted-key (`javaCode`).
316+ */
317+ var out = [ ] ;
318+ var keyDelimRe = / (?: " ( [ a - z A - Z _ ] [ \w $ ] * ) " | ( [ a - z A - Z _ ] [ \w $ ] * ) ) \s * : \s * ( " " " | ` ) / g;
319+ var m ;
320+ while ( ( m = keyDelimRe . exec ( text ) ) !== null ) {
321+ var key = m [ 1 ] || m [ 2 ] ;
322+ if ( ! key ) continue ;
323+ var delim = m [ 3 ] ;
324+ var openStart = m . index + m [ 0 ] . length - delim . length ;
325+ var contentStart = openStart + delim . length ;
326+ var contentEnd = text . indexOf ( delim , contentStart ) ;
327+ if ( contentEnd === - 1 ) break ;
328+ out . push ( { key : key , contentStart : contentStart , contentEnd : contentEnd , delim : delim } ) ;
329+ keyDelimRe . lastIndex = contentEnd + delim . length ;
330+ }
331+
332+ // Escaped-in-double-quote form: `"client": "…"` where inner quotes
333+ // are `\"`. Only honor `client` (FObject JSON); serviceScript also
334+ // uses this form but we leave Java highlighting to grammar injection
335+ // there since escaping makes it hard to detect reliably.
336+ var escRe = / " ( c l i e n t ) " \s * : \s * " (? ! " " ) ( (?: \\ .| [ ^ " \\ \n ] ) * ) " / g;
337+ var em ;
338+ while ( ( em = escRe . exec ( text ) ) !== null ) {
339+ var vStart = em . index + em [ 0 ] . length - em [ 2 ] . length - 1 ;
340+ out . push ( {
341+ key : em [ 1 ] ,
342+ contentStart : vStart + 1 ,
343+ contentEnd : vStart + 1 + em [ 2 ] . length ,
344+ delim : '"' ,
345+ escaped : true
346+ } ) ;
347+ }
348+ return out ;
349+ } ,
350+
351+ function collectJavaEmbedTokens_ ( text , block , tokens ) {
352+ /**
353+ * Emit `type` tokens (0) for dotted class IDs and short class names
354+ * that the registry resolves. Registry-verified only — no hardcoded
355+ * list. The surrounding grammar handles Java keyword / string /
356+ * comment highlighting; we add what the grammar can't know:
357+ * which identifiers are actually FOAM classes.
358+ */
359+ var content = text . substring ( block . contentStart , block . contentEnd ) ;
360+ var lineOffsets = this . computeLineOffsets_ ( text ) ;
361+
362+ // Dotted identifier — a.b.c.D — followed by optional `.getOwnClassInfo`
363+ var dottedRe = / \b ( [ a - z ] [ \w $ ] * (?: \. [ a - z A - Z _ ] [ \w $ ] * ) + ) \b / g;
364+ var dm ;
365+ while ( ( dm = dottedRe . exec ( content ) ) !== null ) {
366+ var id = dm [ 1 ] ;
367+ var hit = this . resolveRegisteredPrefix_ ( id ) ;
368+ if ( ! hit ) continue ;
369+ this . pushTokenAt_ ( tokens , block . contentStart + dm . index , hit . length , 0 , lineOffsets ) ;
370+ }
371+ } ,
372+
373+ function collectJsonEmbedTokens_ ( text , block , tokens ) {
374+ /**
375+ * Emit `class` tokens (1) for registry-verified `"class":"…"` values
376+ * inside an embedded JSON block. If the block is escaped-in-double-
377+ * quote form, `\"class\":\"…\"` — handle both literal and escaped.
378+ */
379+ var content = text . substring ( block . contentStart , block . contentEnd ) ;
380+ var lineOffsets = this . computeLineOffsets_ ( text ) ;
381+
382+ // Literal: "class":"com.foo.Bar"
383+ var litRe = / " c l a s s " \s * : \s * " ( [ ^ " \n ] + ) " / g;
384+ var lm ;
385+ while ( ( lm = litRe . exec ( content ) ) !== null ) {
386+ var cid = lm [ 1 ] ;
387+ if ( ! this . index . classExists ( cid ) ) continue ;
388+ var valIdx = content . indexOf ( cid , lm . index ) ;
389+ if ( valIdx === - 1 ) continue ;
390+ this . pushTokenAt_ ( tokens , block . contentStart + valIdx , cid . length , 1 , lineOffsets ) ;
391+ }
392+
393+ // Escaped: \"class\":\"com.foo.Bar\"
394+ var escRe = / \\ " c l a s s \\ " \s * : \s * \\ " ( [ ^ " \\ \n ] + ) \\ " / g;
395+ var em ;
396+ while ( ( em = escRe . exec ( content ) ) !== null ) {
397+ var ecid = em [ 1 ] ;
398+ if ( ! this . index . classExists ( ecid ) ) continue ;
399+ var eIdx = content . indexOf ( ecid , em . index ) ;
400+ if ( eIdx === - 1 ) continue ;
401+ this . pushTokenAt_ ( tokens , block . contentStart + eIdx , ecid . length , 1 , lineOffsets ) ;
402+ }
403+ } ,
404+
405+ function resolveRegisteredPrefix_ ( dottedId ) {
406+ /**
407+ * Given `foo.X.Builder`, return the longest prefix that exists in the
408+ * FOAM registry. Returns { length } (char length of the matched
409+ * prefix) or null.
410+ */
411+ if ( ! dottedId ) return null ;
412+ var cand = dottedId ;
413+ while ( cand ) {
414+ if ( this . index . classExists ( cand ) ) return { length : cand . length } ;
415+ var dot = cand . lastIndexOf ( '.' ) ;
416+ if ( dot === - 1 ) return null ;
417+ cand = cand . substring ( 0 , dot ) ;
418+ }
419+ return null ;
420+ } ,
421+
422+ function computeLineOffsets_ ( text ) {
423+ /** Pre-computed line-start offsets for fast offset → {line,char}. */
424+ var offs = [ 0 ] ;
425+ for ( var i = 0 ; i < text . length ; i ++ ) {
426+ if ( text . charCodeAt ( i ) === 10 ) offs . push ( i + 1 ) ;
427+ }
428+ return offs ;
429+ } ,
430+
431+ function pushTokenAt_ ( tokens , offset , length , type , lineOffsets ) {
432+ /** Binary-search offset → line/char and push a semantic token. */
433+ var lo = 0 , hi = lineOffsets . length - 1 ;
434+ while ( lo < hi ) {
435+ var mid = ( lo + hi + 1 ) >> 1 ;
436+ if ( lineOffsets [ mid ] <= offset ) lo = mid ; else hi = mid - 1 ;
437+ }
438+ tokens . push ( {
439+ line : lo ,
440+ char : offset - lineOffsets [ lo ] ,
441+ length : length ,
442+ type : type ,
443+ modifiers : 0
257444 } ) ;
445+ } ,
258446
447+ function encodeTokens_ ( tokens ) {
448+ /** LSP delta-encoding: [dL, dC, length, type, modifiers] per token. */
259449 var data = [ ] ;
260450 var prevLine = 0 , prevChar = 0 ;
261451 for ( var i = 0 ; i < tokens . length ; i ++ ) {
0 commit comments