Skip to content

Commit a9c5f11

Browse files
authored
feat: add support for strict mode in JSON/YAML parser adapters (#22)
1 parent dd4af10 commit a9c5f11

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+391
-62
lines changed

package-lock.json

Lines changed: 4 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/apidom-parser-adapter-json/README.md

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,14 @@ Defines list of media types that this parser adapter recognizes.
5252

5353
### detect
5454

55-
[Detection](https://github.com/speclynx/apidom/blob/main/packages/apidom-parser-adapter-json/src/adapter.ts#L3) is based on using [JSON.parse](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON/parse) to indicate whether the provided source string is or isn't JSON string.
55+
Detection indicates whether the provided source string is valid JSON.
56+
57+
Option | Type | Default | Description
58+
--- | --- | --- | ---
59+
<a name="detect-strict"></a>`strict` | `Boolean` | `false` | Use strict detection mode (native `JSON.parse`).
60+
61+
In default mode, detection uses tree-sitter for parsing with error recovery.
62+
In strict mode, detection uses native `JSON.parse` which is faster but requires valid JSON.
5663

5764
### namespace
5865

@@ -65,9 +72,26 @@ This adapter exposes an instance of [base ApiDOM namespace](https://github.com/s
6572
Option | Type | Default | Description
6673
--- | --- | --- | ---
6774
<a name="sourceMap"></a>`sourceMap` | `Boolean` | `false` | Indicate whether to generate source maps.
75+
<a name="strict"></a>`strict` | `Boolean` | `false` | Use strict parsing mode (native `JSON.parse`). When `true`, parsing is faster but throws on invalid JSON and doesn't support source maps.
6876

6977
All unrecognized arbitrary options will be ignored.
7078

79+
#### Parsing modes
80+
81+
This adapter supports two parsing modes:
82+
83+
**Tree-sitter mode** (default, `strict: false`):
84+
- Uses [web-tree-sitter](https://www.npmjs.com/package/web-tree-sitter) for parsing
85+
- Provides error recovery for malformed JSON
86+
- Supports source map generation
87+
- Slightly slower but more resilient
88+
89+
**Strict mode** (`strict: true`):
90+
- Uses native `JSON.parse` for parsing
91+
- Faster performance
92+
- Throws `SyntaxError` on invalid JSON
93+
- Does not support source maps (throws error if both `strict` and `sourceMap` are `true`)
94+
7195
## Usage
7296

7397
This parser adapter can be used directly or indirectly via [@speclynx/apidom-parser](https://github.com/speclynx/apidom/tree/main/packages/apidom-parser).
@@ -80,12 +104,19 @@ with [supported media types](#mediatypes).
80104
```js
81105
import { parse, detect } from '@speclynx/apidom-parser-adapter-json';
82106

83-
// detecting
107+
// detecting (tree-sitter mode - default)
84108
await detect('{"prop": "value"}'); // => true
85109
await detect('test'); // => false
86110

87-
// parsing
111+
// detecting (strict mode)
112+
await detect('{"prop": "value"}', { strict: true }); // => true
113+
await detect('{invalid}', { strict: true }); // => false
114+
115+
// parsing (tree-sitter mode - default, with source maps)
88116
const parseResult = await parse('{"prop": "value"}', { sourceMap: true });
117+
118+
// parsing (strict mode - faster, no source maps)
119+
const parseResultStrict = await parse('{"prop": "value"}', { strict: true });
89120
```
90121

91122
### Indirect usage
Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
import { ParseResultElement, Namespace } from '@speclynx/apidom-datamodel';
2-
import type { Tree } from 'web-tree-sitter';
2+
import { ApiDOMError } from '@speclynx/apidom-error';
33

4-
import lexicalAnalysis from './lexical-analysis/index.ts';
5-
import syntacticAnalysis from './syntactic-analysis/index.ts';
4+
import * as native from './native/index.ts';
5+
import * as treeSitter from './tree-sitter/index.ts';
66

77
export type { JSONMediaTypes } from './media-types.ts';
8-
export type { Tree };
8+
export type { Tree } from './tree-sitter/index.ts';
99

10-
export { lexicalAnalysis, syntacticAnalysis };
10+
export { lexicalAnalysis, syntacticAnalysis } from './tree-sitter/index.ts';
1111

1212
/**
1313
* @public
@@ -22,34 +22,34 @@ export const namespace = new Namespace();
2222
/**
2323
* @public
2424
*/
25-
export const detectionRegExp =
26-
// eslint-disable-next-line no-control-regex
27-
/(?<true>^\s*true\s*$)|(?<false>^\s*false\s*$)|(?<null>^\s*null\s*$)|(?<number>^\s*\d+\s*$)|(?<object>^\s*{\s*)|(?<array>^\s*\[\s*)|(?<string>^\s*"(((?=\\)\\(["\\/bfnrt]|u[0-9a-fA-F]{4}))|[^"\\\x00-\x1F\x7F])*"\s*$)/;
25+
export const detectionRegExp = treeSitter.detectionRegExp;
2826

2927
/**
3028
* @public
3129
*/
32-
export const detect = async (source: string): Promise<boolean> => {
33-
if (!detectionRegExp.test(source)) {
34-
return false;
35-
}
30+
export interface DetectOptions {
31+
strict?: boolean;
32+
}
3633

37-
let cst: Tree | null = null;
38-
try {
39-
cst = await lexicalAnalysis(source);
40-
return cst.rootNode.type !== 'ERROR';
41-
} catch {
42-
return false;
43-
} finally {
44-
cst?.delete();
34+
/**
35+
* @public
36+
*/
37+
export const detect = async (
38+
source: string,
39+
{ strict = false }: DetectOptions = {},
40+
): Promise<boolean> => {
41+
if (strict) {
42+
return native.detect(source);
4543
}
44+
return treeSitter.detect(source);
4645
};
4746

4847
/**
4948
* @public
5049
*/
5150
export interface ParseFunctionOptions {
5251
sourceMap?: boolean;
52+
strict?: boolean;
5353
}
5454

5555
/**
@@ -63,11 +63,16 @@ export type ParseFunction = (
6363
/**
6464
* @public
6565
*/
66-
export const parse: ParseFunction = async (source, { sourceMap = false } = {}) => {
67-
const cst = await lexicalAnalysis(source);
68-
try {
69-
return syntacticAnalysis(cst, { sourceMap });
70-
} finally {
71-
cst.delete();
66+
export const parse: ParseFunction = async (source, { sourceMap = false, strict = false } = {}) => {
67+
if (strict && sourceMap) {
68+
throw new ApiDOMError(
69+
'Cannot use sourceMap with strict parsing. Strict parsing does not support source maps.',
70+
);
71+
}
72+
73+
if (strict) {
74+
return native.parse(source);
7275
}
76+
77+
return treeSitter.parse(source, { sourceMap });
7378
};
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import { ParseResultElement, refract } from '@speclynx/apidom-datamodel';
2+
3+
/**
4+
* @public
5+
*/
6+
export const detect = async (source: string): Promise<boolean> => {
7+
try {
8+
JSON.parse(source);
9+
return true;
10+
} catch {
11+
return false;
12+
}
13+
};
14+
15+
/**
16+
* @public
17+
*/
18+
export const parse = async (source: string): Promise<ParseResultElement> => {
19+
const pojo = JSON.parse(source);
20+
const element = refract(pojo);
21+
const parseResult = new ParseResultElement();
22+
element.classes.push('result');
23+
parseResult.push(element);
24+
return parseResult;
25+
};
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import { ParseResultElement } from '@speclynx/apidom-datamodel';
2+
import type { Tree } from 'web-tree-sitter';
3+
4+
import lexicalAnalysis from './lexical-analysis/index.ts';
5+
import syntacticAnalysis from './syntactic-analysis/index.ts';
6+
7+
export type { Tree };
8+
export { lexicalAnalysis, syntacticAnalysis };
9+
10+
/**
11+
* @public
12+
*/
13+
export const detectionRegExp =
14+
// eslint-disable-next-line no-control-regex
15+
/(?<true>^\s*true\s*$)|(?<false>^\s*false\s*$)|(?<null>^\s*null\s*$)|(?<number>^\s*\d+\s*$)|(?<object>^\s*{\s*)|(?<array>^\s*\[\s*)|(?<string>^\s*"(((?=\\)\\(["\\/bfnrt]|u[0-9a-fA-F]{4}))|[^"\\\x00-\x1F\x7F])*"\s*$)/;
16+
17+
/**
18+
* @public
19+
*/
20+
export const detect = async (source: string): Promise<boolean> => {
21+
if (!detectionRegExp.test(source)) {
22+
return false;
23+
}
24+
25+
let cst: Tree | null = null;
26+
try {
27+
cst = await lexicalAnalysis(source);
28+
return cst.rootNode.type !== 'ERROR';
29+
} catch {
30+
return false;
31+
} finally {
32+
cst?.delete();
33+
}
34+
};
35+
36+
/**
37+
* @public
38+
*/
39+
export interface ParseOptions {
40+
sourceMap?: boolean;
41+
}
42+
43+
/**
44+
* @public
45+
*/
46+
export const parse = async (
47+
source: string,
48+
{ sourceMap = false }: ParseOptions = {},
49+
): Promise<ParseResultElement> => {
50+
const cst = await lexicalAnalysis(source);
51+
try {
52+
return syntacticAnalysis(cst, { sourceMap });
53+
} finally {
54+
cst.delete();
55+
}
56+
};

packages/apidom-parser-adapter-json/src/lexical-analysis/index.ts renamed to packages/apidom-parser-adapter-json/src/tree-sitter/lexical-analysis/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { Tree, Parser, Language } from 'web-tree-sitter';
22
import treeSitter from 'web-tree-sitter/tree-sitter.wasm';
3+
// eslint-disable-next-line import/no-extraneous-dependencies
34
import treeSitterJson from 'tree-sitter-json/tree-sitter-json.wasm';
45
import { ApiDOMError } from '@speclynx/apidom-error';
56

packages/apidom-parser-adapter-json/src/syntactic-analysis/index.ts renamed to packages/apidom-parser-adapter-json/src/tree-sitter/syntactic-analysis/index.ts

File renamed without changes.

packages/apidom-parser-adapter-json/test/adapter.ts

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,46 @@ describe('adapter', function () {
8787
}
8888
});
8989
});
90+
91+
context('strict mode', function () {
92+
context('detect', function () {
93+
specify('should detect valid JSON', async function () {
94+
assert.isTrue(await adapter.detect('{"key": "value"}', { strict: true }));
95+
});
96+
97+
specify('should not detect invalid JSON', async function () {
98+
assert.isFalse(await adapter.detect('{key: value}', { strict: true }));
99+
});
100+
});
101+
102+
context('parse', function () {
103+
specify('should parse valid JSON', async function () {
104+
const parseResult = await adapter.parse(spec, { strict: true });
105+
106+
assert.isTrue(isParseResultElement(parseResult));
107+
assert.isTrue(isObjectElement(parseResult.result));
108+
});
109+
110+
specify('should throw on invalid JSON', async function () {
111+
const invalidJson = '{key: value}';
112+
113+
try {
114+
await adapter.parse(invalidJson, { strict: true });
115+
assert.fail('Should have thrown an error');
116+
} catch (error) {
117+
assert.instanceOf(error, SyntaxError);
118+
}
119+
});
120+
121+
specify('should throw when strict and sourceMap are both true', async function () {
122+
try {
123+
await adapter.parse(spec, { strict: true, sourceMap: true });
124+
assert.fail('Should have thrown an error');
125+
} catch (error) {
126+
assert.instanceOf(error, Error);
127+
assert.include((error as Error).message, 'Cannot use sourceMap with strict parsing');
128+
}
129+
});
130+
});
131+
});
90132
});

packages/apidom-parser-adapter-json/test/perf/lexical-analysis.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { fileURLToPath } from 'node:url';
44
import Benchmark from 'benchmark';
55
import type { Deferred } from 'benchmark';
66

7-
import analyze from '../../src/lexical-analysis/index.ts';
7+
import analyze from '../../src/tree-sitter/lexical-analysis/index.ts';
88

99
const __dirname = path.dirname(fileURLToPath(import.meta.url));
1010
const fixturePath = path.join(__dirname, 'fixtures/data.json');

0 commit comments

Comments
 (0)