Skip to content

Commit 3c7d205

Browse files
committed
parse comments
ensure fragments works
1 parent 039879f commit 3c7d205

File tree

13 files changed

+115
-14
lines changed

13 files changed

+115
-14
lines changed

src/Ast/NodeType.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ enum NodeType
88
case ELEMENT;
99
case TEXT;
1010
case RAW;
11+
case COMMENT;
1112
}

src/Ast/Parser.php

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,32 +44,34 @@ protected function parseNode(): ?Node
4444
}
4545
if ($token->type === TokenType::DOCTYPE) {
4646
$this->advance();
47-
4847
return new Node(NodeType::DOCTYPE, '', [], [], $token->value);
4948
}
5049
if ($token->type === TokenType::TEXT) {
5150
$this->advance();
52-
5351
return new Node(NodeType::TEXT, '', [], [], $token->value);
5452
}
53+
// NEW: Parse a comment token and create a COMMENT node.
54+
if ($token->type === TokenType::COMMENT) {
55+
$this->advance();
56+
return new Node(NodeType::COMMENT, '', [], [], $token->value);
57+
}
5558
if ($token->type === TokenType::RAW) {
5659
$this->advance();
57-
5860
return new Node(NodeType::RAW, '', [], [], $token->value);
5961
}
6062
if ($token->type === TokenType::TAG_OPEN) {
6163
return $this->parseElement();
6264
}
6365
if ($token->type === TokenType::TAG_CLOSE) {
6466
$this->consumeClosingTag();
65-
6667
return null;
6768
}
6869
$this->advance();
6970

7071
return null;
7172
}
7273

74+
7375
protected function parseElement(): Node
7476
{
7577
// Consume the open tag token (e.g. from '<script').

src/Lexer/Lexer.php

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,14 +102,22 @@ protected function consumeDoctype(): void
102102
protected function consumeComment(): void
103103
{
104104
$this->consume(4); // consumes '<!--'
105+
$start = $this->position;
105106
$end = strpos($this->input, '-->', $this->position);
106107
if ($end === false) {
108+
// If there is no closing tag, capture the rest of the input as the comment.
109+
$commentText = substr($this->input, $start);
107110
$this->position = $this->length;
108111
} else {
109-
$this->position = $end + 3;
112+
// Extract the comment text (everything up to but not including '-->')
113+
$commentText = substr($this->input, $start, $end - $start);
114+
$this->position = $end + 3; // consume the closing '-->'
110115
}
116+
// Add a token for the comment.
117+
$this->addToken(TokenType::COMMENT, $commentText);
111118
}
112119

120+
113121
/**
114122
* Consume text until a '<' is encountered.
115123
* Only produces a token if the text contains at least one non-whitespace character.

src/Lexer/TokenType.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,5 @@ enum TokenType
1313
case RAW;
1414
case ATTR_NAME;
1515
case ATTR_VALUE;
16+
case COMMENT;
1617
}

src/Printer.php

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ public function render(array $nodes, int $level = 0): string
3939
$html .= $this->renderNode($node, $level);
4040
}
4141

42-
return $html;
42+
return rtrim($html); //ensure there is no linebreak on last element
4343
}
4444

4545
protected function renderNode(Node $node, int $level): string
@@ -56,6 +56,10 @@ protected function renderNode(Node $node, int $level): string
5656
return $this->renderRaw($node->content, $level);
5757
}
5858

59+
if ($node->type === NodeType::COMMENT) {
60+
return $this->renderComment($node->content, $indent);
61+
}
62+
5963
// For plain text nodes.
6064
if ($node->type === NodeType::TEXT) {
6165
return $indent . $node->content . $this->newline;
@@ -133,4 +137,9 @@ protected function renderRaw(string $raw, int $level): string
133137

134138
return implode($this->newline, $newLines) . $this->newline;
135139
}
140+
141+
private function renderComment(string $content, string $indent)
142+
{
143+
return $indent . '<!--' . $content . '-->' . $this->newline;
144+
}
136145
}

tests/Feature/LexerTest.php

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,24 @@
55

66
it('can read an html file and output a tokens array', function () {
77
$html = getFixture('basic.html');
8-
$lexer = new Lexer($html);
8+
$lexer = new Lexer($html);
99
expect($lexer->lex())->toHaveCount(69);
1010
});
1111

1212
it('has the same output no matter format of the input', function () {
1313
$html = getFixture('basic.html');
14-
$lexer = Lexer::fromString($html);
14+
$lexer = Lexer::fromString($html);
1515
$tokens1 = $lexer->lex();
1616

1717
$html = getFixture('basic-scrambled.html');
18-
$lexer = Lexer::fromString($html);
18+
$lexer = Lexer::fromString($html);
1919
$tokens2 = $lexer->lex();
2020
expect($tokens1)->toEqual($tokens2);
2121
});
2222

2323
it('can read get open and closing div tags', function () {
2424
$html = getFixture('basic.html');
25-
$lexer = new Lexer($html);
25+
$lexer = new Lexer($html);
2626
$tokens = $lexer->lex();
2727

2828
//Opening div tag
@@ -42,7 +42,7 @@
4242

4343
it('can read get open and closing script tags', function () {
4444
$html = getFixture('complex.html');
45-
$lexer = new Lexer($html);
45+
$lexer = new Lexer($html);
4646
$tokens = $lexer->lex();
4747

4848
//Opening script tag
@@ -58,4 +58,18 @@
5858
expect($tokens[62])
5959
->toHaveKey('value', 'script')
6060
->toHaveKey('type', TokenType::TAG_CLOSE);
61+
});
62+
63+
it('can handle comments', function () {
64+
$lexer = Lexer::fromString(getFixture('comments.html'));
65+
66+
$tokens = $lexer->lex();
67+
68+
expect($tokens[7])
69+
->toHaveKey('type', TokenType::COMMENT)
70+
->toHaveKey('value', ' title tag ');
71+
72+
expect($tokens[13])
73+
->toHaveKey('type', TokenType::COMMENT)
74+
->toHaveKey('value', ' Body ');
6175
});

tests/Feature/PrintTest.php

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,30 @@
3737

3838
$nodes = $ast->parse();
3939

40+
$printer = new Printer();
41+
expect($printer->render($nodes))->toEqual($html);
42+
});
43+
44+
it('can print fragments', function () {
45+
$html = getFixture('fragment.html');
46+
$lexer = new Lexer($html);
47+
48+
$ast = new Parser($lexer->lex());
49+
50+
$nodes = $ast->parse();
51+
52+
$printer = new Printer();
53+
expect($printer->render($nodes))->toEqual($html);
54+
});
55+
56+
it('can print comments', function () {
57+
$html = getFixture('comments.html');
58+
$lexer = new Lexer($html);
59+
60+
$ast = new Parser($lexer->lex());
61+
62+
$nodes = $ast->parse();
63+
4064
$printer = new Printer();
4165
expect($printer->render($nodes))->toEqual($html);
4266
});

tests/fixtures/basic-scrambled.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,4 @@
2222
Smaller text
2323

2424
</div>
25-
</body></html>
25+
</body></html>

tests/fixtures/basic.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,4 @@
1919
Smaller text
2020
</div>
2121
</body>
22-
</html>
22+
</html>

tests/fixtures/broken.html

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
5+
<title>
6+
Test page
7+
</title>
8+
</head>
9+
<body style="width:1024px;" class="h-full">
10+
<div class="wrapper" style="font-size: 10px">
11+
Smaller text
12+
</body>
13+
</html>

0 commit comments

Comments
 (0)