Skip to content

Commit 31a7ac2

Browse files
2 parents 5362975 + 35d83f2 commit 31a7ac2

File tree

1 file changed

+104
-101
lines changed

1 file changed

+104
-101
lines changed

lib/model/content.dart

+104-101
Original file line numberDiff line numberDiff line change
@@ -786,7 +786,7 @@ class MathInlineNode extends InlineContentNode {
786786
class GlobalTimeNode extends InlineContentNode {
787787
const GlobalTimeNode({super.debugHtmlNode, required this.datetime});
788788

789-
/// Always in UTC, enforced in [_ZulipContentParser.parseInlineContent].
789+
/// Always in UTC, enforced in [_ZulipInlineContentParser.parseInlineContent].
790790
final DateTime datetime;
791791

792792
@override
@@ -806,72 +806,68 @@ class GlobalTimeNode extends InlineContentNode {
806806

807807
////////////////////////////////////////////////////////////////
808808
809-
/// What sort of nodes a [_ZulipContentParser] is currently expecting to find.
810-
enum _ParserContext {
811-
/// The parser is currently looking for block nodes.
812-
block,
809+
String? _parseMath(dom.Element element, {required bool block}) {
810+
final dom.Element katexElement;
811+
if (!block) {
812+
assert(element.localName == 'span' && element.className == 'katex');
813813

814-
/// The parser is currently looking for inline nodes.
815-
inline,
816-
}
817-
818-
class _ZulipContentParser {
819-
/// The current state of what sort of nodes the parser is looking for.
820-
///
821-
/// This exists for the sake of debug-mode checks,
822-
/// and should be read or updated only inside an assertion.
823-
_ParserContext _debugParserContext = _ParserContext.block;
824-
825-
String? parseMath(dom.Element element, {required bool block}) {
826-
assert(block == (_debugParserContext == _ParserContext.block));
827-
828-
final dom.Element katexElement;
829-
if (!block) {
830-
assert(element.localName == 'span' && element.className == 'katex');
814+
katexElement = element;
815+
} else {
816+
assert(element.localName == 'span' && element.className == 'katex-display');
831817

832-
katexElement = element;
833-
} else {
834-
assert(element.localName == 'span' && element.className == 'katex-display');
835-
836-
if (element.nodes.length != 1) return null;
837-
final child = element.nodes.single;
838-
if (child is! dom.Element) return null;
839-
if (child.localName != 'span') return null;
840-
if (child.className != 'katex') return null;
841-
katexElement = child;
842-
}
843-
844-
// Expect two children span.katex-mathml, span.katex-html .
845-
// For now we only care about the .katex-mathml .
846-
if (katexElement.nodes.isEmpty) return null;
847-
final child = katexElement.nodes.first;
818+
if (element.nodes.length != 1) return null;
819+
final child = element.nodes.single;
848820
if (child is! dom.Element) return null;
849821
if (child.localName != 'span') return null;
850-
if (child.className != 'katex-mathml') return null;
851-
852-
if (child.nodes.length != 1) return null;
853-
final grandchild = child.nodes.single;
854-
if (grandchild is! dom.Element) return null;
855-
if (grandchild.localName != 'math') return null;
856-
if (grandchild.attributes['display'] != (block ? 'block' : null)) return null;
857-
if (grandchild.namespaceUri != 'http://www.w3.org/1998/Math/MathML') return null;
858-
859-
if (grandchild.nodes.length != 1) return null;
860-
final greatgrand = grandchild.nodes.single;
861-
if (greatgrand is! dom.Element) return null;
862-
if (greatgrand.localName != 'semantics') return null;
863-
864-
if (greatgrand.nodes.isEmpty) return null;
865-
final descendant4 = greatgrand.nodes.last;
866-
if (descendant4 is! dom.Element) return null;
867-
if (descendant4.localName != 'annotation') return null;
868-
if (descendant4.attributes['encoding'] != 'application/x-tex') return null;
869-
870-
return descendant4.text.trim();
822+
if (child.className != 'katex') return null;
823+
katexElement = child;
824+
}
825+
826+
// Expect two children span.katex-mathml, span.katex-html .
827+
// For now we only care about the .katex-mathml .
828+
if (katexElement.nodes.isEmpty) return null;
829+
final child = katexElement.nodes.first;
830+
if (child is! dom.Element) return null;
831+
if (child.localName != 'span') return null;
832+
if (child.className != 'katex-mathml') return null;
833+
834+
if (child.nodes.length != 1) return null;
835+
final grandchild = child.nodes.single;
836+
if (grandchild is! dom.Element) return null;
837+
if (grandchild.localName != 'math') return null;
838+
if (grandchild.attributes['display'] != (block ? 'block' : null)) return null;
839+
if (grandchild.namespaceUri != 'http://www.w3.org/1998/Math/MathML') return null;
840+
841+
if (grandchild.nodes.length != 1) return null;
842+
final greatgrand = grandchild.nodes.single;
843+
if (greatgrand is! dom.Element) return null;
844+
if (greatgrand.localName != 'semantics') return null;
845+
846+
if (greatgrand.nodes.isEmpty) return null;
847+
final descendant4 = greatgrand.nodes.last;
848+
if (descendant4 is! dom.Element) return null;
849+
if (descendant4.localName != 'annotation') return null;
850+
if (descendant4.attributes['encoding'] != 'application/x-tex') return null;
851+
852+
return descendant4.text.trim();
853+
}
854+
855+
/// Parser for the inline-content subtrees within Zulip content HTML.
856+
///
857+
/// The only entry point to this class is [parseBlockInline].
858+
///
859+
/// After a call to [parseBlockInline] returns, the [_ZulipInlineContentParser]
860+
/// instance has been reset to its starting state, and can be re-used for
861+
/// parsing other subtrees.
862+
class _ZulipInlineContentParser {
863+
InlineContentNode? parseInlineMath(dom.Element element) {
864+
final debugHtmlNode = kDebugMode ? element : null;
865+
final texSource = _parseMath(element, block: false);
866+
if (texSource == null) return null;
867+
return MathInlineNode(texSource: texSource, debugHtmlNode: debugHtmlNode);
871868
}
872869

873870
UserMentionNode? parseUserMention(dom.Element element) {
874-
assert(_debugParserContext == _ParserContext.inline);
875871
assert(element.localName == 'span');
876872
final debugHtmlNode = kDebugMode ? element : null;
877873

@@ -945,7 +941,6 @@ class _ZulipContentParser {
945941
static final _emojiCodeFromClassNameRegexp = RegExp(r"emoji-([^ ]+)");
946942

947943
InlineContentNode parseInlineContent(dom.Node node) {
948-
assert(_debugParserContext == _ParserContext.inline);
949944
final debugHtmlNode = kDebugMode ? node : null;
950945
InlineContentNode unimplemented() => UnimplementedInlineContentNode(htmlNode: node);
951946

@@ -1025,36 +1020,49 @@ class _ZulipContentParser {
10251020
}
10261021

10271022
if (localName == 'span' && className == 'katex') {
1028-
final texSource = parseMath(element, block: false);
1029-
if (texSource == null) return unimplemented();
1030-
return MathInlineNode(texSource: texSource, debugHtmlNode: debugHtmlNode);
1023+
return parseInlineMath(element) ?? unimplemented();
10311024
}
10321025

10331026
// TODO more types of node
10341027
return unimplemented();
10351028
}
10361029

10371030
List<InlineContentNode> parseInlineContentList(List<dom.Node> nodes) {
1038-
assert(_debugParserContext == _ParserContext.inline);
10391031
return nodes.map(parseInlineContent).toList(growable: false);
10401032
}
10411033

1034+
/// Parse the children of a [BlockInlineContainerNode], making up a
1035+
/// complete subtree of inline content with no further inline ancestors.
10421036
({List<InlineContentNode> nodes, List<LinkNode>? links}) parseBlockInline(List<dom.Node> nodes) {
1043-
assert(_debugParserContext == _ParserContext.block);
1044-
assert(() {
1045-
_debugParserContext = _ParserContext.inline;
1046-
return true;
1047-
}());
10481037
final resultNodes = parseInlineContentList(nodes);
1049-
assert(() {
1050-
_debugParserContext = _ParserContext.block;
1051-
return true;
1052-
}());
10531038
return (nodes: resultNodes, links: _takeLinkNodes());
10541039
}
1040+
}
1041+
1042+
/// Parser for a complete piece of Zulip HTML content, a [ZulipContent].
1043+
///
1044+
/// The only entry point to this class is [parse].
1045+
class _ZulipContentParser {
1046+
/// The single inline-content parser used and re-used throughout parsing of
1047+
/// a complete piece of Zulip HTML content.
1048+
///
1049+
/// Because block content can never appear nested inside inline content,
1050+
/// there's never a need for more than one of these at a time,
1051+
/// so we can allocate just one up front.
1052+
final inlineParser = _ZulipInlineContentParser();
1053+
1054+
({List<InlineContentNode> nodes, List<LinkNode>? links}) parseBlockInline(List<dom.Node> nodes) {
1055+
return inlineParser.parseBlockInline(nodes);
1056+
}
1057+
1058+
BlockContentNode parseMathBlock(dom.Element element) {
1059+
final debugHtmlNode = kDebugMode ? element : null;
1060+
final texSource = _parseMath(element, block: true);
1061+
if (texSource == null) return UnimplementedBlockContentNode(htmlNode: element);
1062+
return MathBlockNode(texSource: texSource, debugHtmlNode: debugHtmlNode);
1063+
}
10551064

10561065
BlockContentNode parseListNode(dom.Element element) {
1057-
assert(_debugParserContext == _ParserContext.block);
10581066
ListStyle? listStyle;
10591067
switch (element.localName) {
10601068
case 'ol': listStyle = ListStyle.ordered; break;
@@ -1077,7 +1085,6 @@ class _ZulipContentParser {
10771085
}
10781086

10791087
BlockContentNode parseSpoilerNode(dom.Element divElement) {
1080-
assert(_debugParserContext == _ParserContext.block);
10811088
assert(divElement.localName == 'div'
10821089
&& divElement.className == 'spoiler-block');
10831090

@@ -1097,7 +1104,6 @@ class _ZulipContentParser {
10971104
}
10981105

10991106
BlockContentNode parseCodeBlock(dom.Element divElement) {
1100-
assert(_debugParserContext == _ParserContext.block);
11011107
final mainElement = () {
11021108
assert(divElement.localName == 'div'
11031109
&& divElement.className == "codehilite");
@@ -1180,7 +1186,6 @@ class _ZulipContentParser {
11801186
static final _imageDimensionsRegExp = RegExp(r'^(\d+)x(\d+)$');
11811187

11821188
BlockContentNode parseImageNode(dom.Element divElement) {
1183-
assert(_debugParserContext == _ParserContext.block);
11841189
final elements = () {
11851190
assert(divElement.localName == 'div'
11861191
&& divElement.className == 'message_inline_image');
@@ -1272,7 +1277,6 @@ class _ZulipContentParser {
12721277
}();
12731278

12741279
BlockContentNode parseInlineVideoNode(dom.Element divElement) {
1275-
assert(_debugParserContext == _ParserContext.block);
12761280
assert(divElement.localName == 'div'
12771281
&& _videoClassNameRegexp.hasMatch(divElement.className));
12781282

@@ -1305,7 +1309,6 @@ class _ZulipContentParser {
13051309
}
13061310

13071311
BlockContentNode parseEmbedVideoNode(dom.Element divElement) {
1308-
assert(_debugParserContext == _ParserContext.block);
13091312
assert(divElement.localName == 'div'
13101313
&& _videoClassNameRegexp.hasMatch(divElement.className));
13111314

@@ -1344,7 +1347,6 @@ class _ZulipContentParser {
13441347
}
13451348

13461349
BlockContentNode parseTableContent(dom.Element tableElement) {
1347-
assert(_debugParserContext == _ParserContext.block);
13481350
assert(tableElement.localName == 'table'
13491351
&& tableElement.className.isEmpty);
13501352

@@ -1452,7 +1454,6 @@ class _ZulipContentParser {
14521454
}
14531455

14541456
BlockContentNode parseBlockContent(dom.Node node) {
1455-
assert(_debugParserContext == _ParserContext.block);
14561457
final debugHtmlNode = kDebugMode ? node : null;
14571458
if (node is! dom.Element) {
14581459
return UnimplementedBlockContentNode(htmlNode: node);
@@ -1480,9 +1481,7 @@ class _ZulipContentParser {
14801481
// The case with the `<br>\n` can happen when at the end of a quote;
14811482
// it seems like a glitch in the server's Markdown processing,
14821483
// so hopefully there just aren't any further such glitches.
1483-
final texSource = parseMath(child, block: true);
1484-
if (texSource == null) return UnimplementedBlockContentNode(htmlNode: node);
1485-
return MathBlockNode(texSource: texSource, debugHtmlNode: debugHtmlNode);
1484+
return parseMathBlock(child);
14861485
}
14871486
}
14881487
}
@@ -1579,10 +1578,15 @@ class _ZulipContentParser {
15791578
///
15801579
/// See [ParagraphNode].
15811580
List<BlockContentNode> parseImplicitParagraphBlockContentList(dom.NodeList nodes) {
1582-
assert(_debugParserContext == _ParserContext.block);
15831581
final List<BlockContentNode> result = [];
1584-
final List<dom.Node> currentParagraph = [];
1582+
15851583
List<ImageNode> imageNodes = [];
1584+
void consumeImageNodes() {
1585+
result.add(ImageNodeList(imageNodes));
1586+
imageNodes = [];
1587+
}
1588+
1589+
final List<dom.Node> currentParagraph = [];
15861590
void consumeParagraph() {
15871591
final parsed = parseBlockInline(currentParagraph);
15881592
result.add(ParagraphNode(
@@ -1597,8 +1601,7 @@ class _ZulipContentParser {
15971601

15981602
if (_isPossibleInlineNode(node)) {
15991603
if (imageNodes.isNotEmpty) {
1600-
result.add(ImageNodeList(imageNodes));
1601-
imageNodes = [];
1604+
consumeImageNodes();
16021605
// In a context where paragraphs are implicit it should be impossible
16031606
// to have more paragraph content after image previews.
16041607
result.add(UnimplementedBlockContentNode(htmlNode: node));
@@ -1613,24 +1616,25 @@ class _ZulipContentParser {
16131616
imageNodes.add(block);
16141617
continue;
16151618
}
1616-
if (imageNodes.isNotEmpty) {
1617-
result.add(ImageNodeList(imageNodes));
1618-
imageNodes = [];
1619-
}
1619+
if (imageNodes.isNotEmpty) consumeImageNodes();
16201620
result.add(block);
16211621
}
16221622
if (currentParagraph.isNotEmpty) consumeParagraph();
1623-
if (imageNodes.isNotEmpty) result.add(ImageNodeList(imageNodes));
1624-
1623+
if (imageNodes.isNotEmpty) consumeImageNodes();
16251624
return result;
16261625
}
16271626

16281627
static final _redundantLineBreaksRegexp = RegExp(r'^\n+$');
16291628

16301629
List<BlockContentNode> parseBlockContentList(dom.NodeList nodes) {
1631-
assert(_debugParserContext == _ParserContext.block);
16321630
final List<BlockContentNode> result = [];
1631+
16331632
List<ImageNode> imageNodes = [];
1633+
void consumeImageNodes() {
1634+
result.add(ImageNodeList(imageNodes));
1635+
imageNodes = [];
1636+
}
1637+
16341638
for (final node in nodes) {
16351639
// We get a bunch of newline Text nodes between paragraphs.
16361640
// A browser seems to ignore these; let's do the same.
@@ -1643,13 +1647,10 @@ class _ZulipContentParser {
16431647
imageNodes.add(block);
16441648
continue;
16451649
}
1646-
if (imageNodes.isNotEmpty) {
1647-
result.add(ImageNodeList(imageNodes));
1648-
imageNodes = [];
1649-
}
1650+
if (imageNodes.isNotEmpty) consumeImageNodes();
16501651
result.add(block);
16511652
}
1652-
if (imageNodes.isNotEmpty) result.add(ImageNodeList(imageNodes));
1653+
if (imageNodes.isNotEmpty) consumeImageNodes();
16531654
return result;
16541655
}
16551656

@@ -1660,6 +1661,8 @@ class _ZulipContentParser {
16601661
}
16611662
}
16621663

1664+
/// Parse a complete piece of Zulip HTML content,
1665+
/// such as an entire value of [Message.content].
16631666
ZulipContent parseContent(String html) {
16641667
return _ZulipContentParser().parse(html);
16651668
}

0 commit comments

Comments
 (0)