Skip to content

Commit ed1396a

Browse files
committed
no breaking changes
1 parent 50c8ba6 commit ed1396a

3 files changed

Lines changed: 32 additions & 3 deletions

File tree

packages/rules-sparql-1-2/lib/grammar.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -813,8 +813,9 @@ export const iriFull: SparqlGrammarRule<'iriFull', T11.TermIriFull> = {
813813
const raw = iriToken.image.slice(1, -1);
814814
return C.astFactory.termNamed(
815815
C.astFactory.sourceLocation(iriToken),
816-
raw.replaceAll(/\\u([0-9a-fA-F]{4})|\\U([0-9a-fA-F]{8})/gu, (_, unicode4, unicode8) =>
817-
decodeUchar((unicode4 ?? unicode8))),
816+
// TODO: next major replace with implementation of codePointEscape.
817+
// The function no longer serves the intended purpose since it is not reusable for `string`.
818+
C.codepointEscape(raw),
818819
);
819820
});
820821
},

packages/rules-sparql-1-2/lib/parserUtils.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,22 @@ import { AstFactory } from './AstFactory.js';
33
import type { SparqlContext, SparqlGeneratorContext } from './sparql12HelperTypes.js';
44
import type { Sparql12Nodes } from './sparql12Types.js';
55

6+
/**
7+
* Decode UCHAR codepoint escapes (\\uXXXX / \\UXXXXXXXX) within a string according to
8+
* [SPARQL 1.2 §19.2](https://www.w3.org/TR/sparql12-query/#sec-escapes).
9+
*
10+
* Unlike the SPARQL 1.1 variant, this function rejects surrogate code points (U+D800–U+DFFF)
11+
* even when they would form a valid surrogate pair.
12+
* @deprecated will be removed in next MAJOR in favor of the less usecase dependent {@link decodeUchar}.
13+
*/
14+
export function sparql12CodepointEscape(input: string): string {
15+
return input.replaceAll(
16+
/\\u([0-9a-fA-F]{4})|\\U([0-9a-fA-F]{8})/gu,
17+
(_, unicode4: string | undefined, unicode8: string | undefined) =>
18+
decodeUchar((unicode4 ?? unicode8)!),
19+
);
20+
}
21+
622
export function decodeUchar(hex: string): string {
723
const codePoint = Number.parseInt(hex, 16);
824
if (codePoint >= 0xD800 && codePoint <= 0xDFFF) {
@@ -20,6 +36,10 @@ export function completeParseContext(
2036
prefixes: Object.assign(Object.create(null), context.prefixes),
2137
parseMode: context.parseMode ? new Set(context.parseMode) : new Set([ 'canParseVars', 'canCreateBlankNodes' ]),
2238
skipValidation: context.skipValidation ?? false,
39+
/**
40+
* @deprecated since it cannot be used for string decoding.
41+
*/
42+
codepointEscape: context.codepointEscape ?? sparql12CodepointEscape,
2343
};
2444
}
2545

packages/rules-sparql-1-2/lib/sparql12HelperTypes.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,5 +52,13 @@ export type SparqlGrammarRule<
5252
ParamType extends any[] = [],
5353
> = ParserRule<SparqlContext, NameType, ReturnType, ParamType>;
5454

55-
export type SparqlContext = T11.SparqlContext & { astFactory: AstFactory };
55+
export type SparqlContext = T11.SparqlContext & {
56+
astFactory: AstFactory;
57+
/**
58+
* Function that decodes UCHAR codepoint escapes (\\uXXXX / \\UXXXXXXXX) within a string.
59+
* In SPARQL 1.2 this is applied per-rule rather than as a query pre-processor.
60+
* @deprecated no longer used since it did not properly implement the decuding of sting literals.
61+
*/
62+
codepointEscape: (input: string) => string;
63+
};
5664
export type SparqlGeneratorContext = T11.SparqlGeneratorContext & { astFactory: AstFactory };

0 commit comments

Comments
 (0)