Skip to content

Commit 7e7aa9a

Browse files
authored
Merge pull request #71 from ryleelyman/split_parser
add basic latex support
2 parents abea13b + 7acd0c5 commit 7e7aa9a

File tree

10 files changed

+30528
-25526
lines changed

10 files changed

+30528
-25526
lines changed

common/grammar.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ exports.EXTENSION_PIPE_TABLE = process.env.EXTENSION_PIPE_TABLE || exports.EXTEN
66
exports.EXTENSION_MINUS_METADATA = process.env.EXTENSION_MINUS_METADATA || exports.EXTENSION_DEFAULT || process.env.ALL_EXTENSIONS;
77
exports.EXTENSION_PLUS_METADATA = process.env.EXTENSION_PLUS_METADATA || exports.EXTENSION_DEFAULT || process.env.ALL_EXTENSIONS;
88
exports.EXTENSION_TAGS = process.env.EXTENSION_TAGS || process.env.ALL_EXTENSIONS;
9+
exports.EXTENSION_LATEX = process.env.EXTENSION_LATEX || exports.EXTENSION_DEFAULT || process.env.ALL_EXTENSIONS;
910

1011
const PUNCTUATION_CHARACTERS_REGEX = '!-/:-@\\[-`\\{-~';
1112
const PUNCTUATION_CHARACTERS_ARRAY = [

package-lock.json

Lines changed: 7 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"license": "MIT",
1616
"dependencies": {
1717
"@mapbox/node-pre-gyp": "^1.0.9",
18-
"nan": "^2.14.0",
18+
"nan": "^2.17.0",
1919
"node-pre-gyp": "^0.17.0"
2020
},
2121
"devDependencies": {
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
================================================================================
2+
Basic LaTeX parsing.
3+
================================================================================
4+
$$This$$ has $$an odd$$ number of instances of $$.
5+
6+
-------------------------------------------------------------------------------
7+
(inline
8+
(latex_block
9+
(latex_span_delimiter)
10+
(latex_span_delimiter))
11+
(latex_block
12+
(latex_span_delimiter)
13+
(latex_span_delimiter)))
14+
15+
================================================================================
16+
LaTeX and markup clashes.
17+
================================================================================
18+
$$This should prevent *this from parsing$$ the bold.*
19+
20+
-------------------------------------------------------------------------------
21+
(inline
22+
(latex_block
23+
(latex_span_delimiter)
24+
(latex_span_delimiter)))
25+
26+
================================================================================
27+
LaTeX and link clashes
28+
================================================================================
29+
$$This should prevent [this from parsing$$ the link](https://google.com)
30+
31+
-------------------------------------------------------------------------------
32+
(inline
33+
(latex_block
34+
(latex_span_delimiter)
35+
(latex_span_delimiter)))
36+
================================================================================
37+
LaTeX inside markup
38+
================================================================================
39+
*This bold $$should still parse $$*.
40+
41+
-------------------------------------------------------------------------------
42+
(inline
43+
(emphasis
44+
(emphasis_delimiter)
45+
(latex_block
46+
(latex_span_delimiter)
47+
(latex_span_delimiter))
48+
(emphasis_delimiter)))
49+
================================================================================
50+
LaTeX within one paragraph
51+
================================================================================
52+
$$This should all be captured
53+
as one instance of LaTeX.$$
54+
55+
$$This presumably
56+
57+
should not, but will because we need the blocks.$$
58+
--------------------------------------------------------------------------------
59+
(inline
60+
(latex_block
61+
(latex_span_delimiter)
62+
(latex_span_delimiter))
63+
(latex_block
64+
(latex_span_delimiter)
65+
(latex_span_delimiter)))

tree-sitter-markdown-inline/grammar.js

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ const PRECEDENCE_LEVEL_EMPHASIS = 1;
1010
const PRECEDENCE_LEVEL_LINK = 10;
1111
const PRECEDENCE_LEVEL_HTML = 100;
1212
const PRECEDENCE_LEVEL_CODE_SPAN = 100;
13+
const PRECEDENCE_LEVEL_LATEX = 100;
1314

1415
// Punctuation characters as specified in
1516
// https://github.github.com/gfm/#ascii-punctuation-character
@@ -64,6 +65,11 @@ module.exports = grammar(add_inline_rules({
6465

6566
$._strikethrough_open,
6667
$._strikethrough_close,
68+
69+
// Opening and closing delimiters for latex. These are sequences of one or more dollar signs.
70+
// An opening token does not mean the text after has to be latex if there is no closing token
71+
$._latex_span_start,
72+
$._latex_span_close,
6773
],
6874
precedences: $ => [
6975
// [$._strong_emphasis_star, $._inline_element_no_star],
@@ -76,6 +82,7 @@ module.exports = grammar(add_inline_rules({
7682
// More conflicts are defined in `add_inline_rules`
7783
conflicts: $ => [
7884
[$.code_span, $._inline_base],
85+
[$.latex_block, $._inline_base],
7986

8087
[$._closing_tag, $._text_base],
8188
[$._open_tag, $._text_base],
@@ -115,6 +122,7 @@ module.exports = grammar(add_inline_rules({
115122
//
116123
// * collections of inlines
117124
// * code spans
125+
// * latex spans
118126
// * emphasis
119127
// * textual content
120128
//
@@ -127,6 +135,12 @@ module.exports = grammar(add_inline_rules({
127135
alias($._code_span_close, $.code_span_delimiter)
128136
)),
129137

138+
latex_block: $ => prec.dynamic(PRECEDENCE_LEVEL_LATEX, seq(
139+
alias($._latex_span_start, $.latex_span_delimiter),
140+
repeat(choice($._text_base, '[', ']', $._soft_line_break, $._html_tag)),
141+
alias($._latex_span_close, $.latex_span_delimiter),
142+
)),
143+
130144
// Different kinds of links:
131145
// * inline links (https://github.github.com/gfm/#inline-link)
132146
// * full reference links (https://github.github.com/gfm/#full-reference-link)
@@ -328,11 +342,13 @@ module.exports = grammar(add_inline_rules({
328342
$.email_autolink,
329343
$.entity_reference,
330344
$.numeric_character_reference,
345+
(common.EXTENSION_LATEX ? $.latex_block : choice()),
331346
$.code_span,
332347
alias($._html_tag, $.html_tag),
333348
$._text_base,
334349
$._code_span_start,
335350
common.EXTENSION_TAGS ? $.tag : choice(),
351+
(common.EXTENSION_LATEX ? $._latex_span_start : choice()),
336352
))),
337353
_text_base: $ => choice(
338354
$._word,
@@ -346,6 +362,7 @@ module.exports = grammar(add_inline_rules({
346362
_text_inline_no_link: $ => choice(
347363
$._text_base,
348364
$._code_span_start,
365+
$._latex_span_start,
349366
$._emphasis_open_star,
350367
$._emphasis_open_underscore,
351368
),
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
((html_tag) @injection.content (#set! injection.language "html"))
2+
((latex_block) @injection.content (#set! injection.language "latex"))

tree-sitter-markdown-inline/src/grammar.json

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1512,6 +1512,61 @@
15121512
]
15131513
}
15141514
},
1515+
"latex_block": {
1516+
"type": "PREC_DYNAMIC",
1517+
"value": 100,
1518+
"content": {
1519+
"type": "SEQ",
1520+
"members": [
1521+
{
1522+
"type": "ALIAS",
1523+
"content": {
1524+
"type": "SYMBOL",
1525+
"name": "_latex_span_start"
1526+
},
1527+
"named": true,
1528+
"value": "latex_span_delimiter"
1529+
},
1530+
{
1531+
"type": "REPEAT",
1532+
"content": {
1533+
"type": "CHOICE",
1534+
"members": [
1535+
{
1536+
"type": "SYMBOL",
1537+
"name": "_text_base"
1538+
},
1539+
{
1540+
"type": "STRING",
1541+
"value": "["
1542+
},
1543+
{
1544+
"type": "STRING",
1545+
"value": "]"
1546+
},
1547+
{
1548+
"type": "SYMBOL",
1549+
"name": "_soft_line_break"
1550+
},
1551+
{
1552+
"type": "SYMBOL",
1553+
"name": "_html_tag"
1554+
}
1555+
]
1556+
}
1557+
},
1558+
{
1559+
"type": "ALIAS",
1560+
"content": {
1561+
"type": "SYMBOL",
1562+
"name": "_latex_span_close"
1563+
},
1564+
"named": true,
1565+
"value": "latex_span_delimiter"
1566+
}
1567+
]
1568+
}
1569+
},
15151570
"_link_text": {
15161571
"type": "PREC_DYNAMIC",
15171572
"value": 10,
@@ -4139,6 +4194,10 @@
41394194
"type": "SYMBOL",
41404195
"name": "numeric_character_reference"
41414196
},
4197+
{
4198+
"type": "SYMBOL",
4199+
"name": "latex_block"
4200+
},
41424201
{
41434202
"type": "SYMBOL",
41444203
"name": "code_span"
@@ -4163,6 +4222,10 @@
41634222
{
41644223
"type": "CHOICE",
41654224
"members": []
4225+
},
4226+
{
4227+
"type": "SYMBOL",
4228+
"name": "_latex_span_start"
41664229
}
41674230
]
41684231
}
@@ -4350,6 +4413,10 @@
43504413
"type": "SYMBOL",
43514414
"name": "_code_span_start"
43524415
},
4416+
{
4417+
"type": "SYMBOL",
4418+
"name": "_latex_span_start"
4419+
},
43534420
{
43544421
"type": "SYMBOL",
43554422
"name": "_emphasis_open_star"
@@ -5493,6 +5560,10 @@
54935560
"code_span",
54945561
"_inline_base"
54955562
],
5563+
[
5564+
"latex_block",
5565+
"_inline_base"
5566+
],
54965567
[
54975568
"_closing_tag",
54985569
"_text_base"
@@ -5809,6 +5880,14 @@
58095880
{
58105881
"type": "SYMBOL",
58115882
"name": "_strikethrough_close"
5883+
},
5884+
{
5885+
"type": "SYMBOL",
5886+
"name": "_latex_span_start"
5887+
},
5888+
{
5889+
"type": "SYMBOL",
5890+
"name": "_latex_span_close"
58125891
}
58135892
],
58145893
"inline": [],

0 commit comments

Comments
 (0)