Skip to content

Commit 158a0ff

Browse files
authored
Merge pull request #29 from wmde/mex
Support single-file component inputs
2 parents 3bfe90f + a27aaad commit 158a0ff

File tree

7 files changed

+281
-73
lines changed

7 files changed

+281
-73
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
vendor
44
node_modules/
55
composer.lock
6+
.phpunit.result.cache

composer.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
],
1010
"license": "LGPL-2.1-only",
1111
"require": {
12+
"ext-dom": "*",
13+
"ext-libxml": "*",
1214
"php": ">=7.2"
1315
},
1416
"autoload": {

src/Component.php

Lines changed: 13 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -4,104 +4,47 @@
44

55
use DOMAttr;
66
use DOMCharacterData;
7-
use DOMDocument;
87
use DOMElement;
98
use DOMNode;
109
use DOMNodeList;
1110
use DOMText;
12-
use Exception;
13-
use LibXMLError;
14-
1511
use WMDE\VueJsTemplating\JsParsing\BasicJsExpressionParser;
1612
use WMDE\VueJsTemplating\JsParsing\CachingExpressionParser;
1713
use WMDE\VueJsTemplating\JsParsing\JsExpressionParser;
1814

1915
class Component {
2016

2117
/**
22-
* @var string HTML
18+
* @var DOMElement
2319
*/
24-
private $template;
20+
private $rootNode;
2521

2622
/**
2723
* @var JsExpressionParser
2824
*/
2925
private $expressionParser;
3026

3127
/**
32-
* @param string $template HTML
28+
* @param DOMElement $rootNode
3329
* @param callable[] $methods
3430
*/
35-
public function __construct( $template, array $methods ) {
36-
$this->template = $template;
31+
public function __construct( DOMElement $rootNode, array $methods ) {
32+
$this->rootNode = $rootNode;
3733
$this->expressionParser = new CachingExpressionParser( new BasicJsExpressionParser( $methods ) );
3834
}
3935

4036
/**
37+
* Note: this method is not currently safe to call repeatedly
38+
* (the internal root node is modified in-place).
39+
*
4140
* @param array $data
4241
*
4342
* @return string HTML
4443
*/
4544
public function render( array $data ) {
46-
$document = $this->parseHtml( $this->template );
47-
48-
$rootNode = $this->getRootNode( $document );
49-
$this->handleNode( $rootNode, $data );
50-
51-
return $document->saveHTML( $rootNode );
52-
}
53-
54-
/**
55-
* @param string $html HTML
56-
*
57-
* @return DOMDocument
58-
*/
59-
private function parseHtml( $html ) {
60-
if ( LIBXML_VERSION < 20900 ) {
61-
$entityLoaderDisabled = libxml_disable_entity_loader( true );
62-
}
63-
$internalErrors = libxml_use_internal_errors( true );
64-
$document = new DOMDocument( '1.0', 'UTF-8' );
65-
66-
// Ensure $html is treated as UTF-8, see https://stackoverflow.com/a/8218649
67-
// LIBXML_NOBLANKS Constant excludes "ghost nodes" to avoid violating
68-
// vue's single root node constraint
69-
if ( !$document->loadHTML( '<?xml encoding="utf-8" ?>' . $html, LIBXML_NOBLANKS ) ) {
70-
//TODO Test failure
71-
}
72-
73-
/** @var LibXMLError[] $errors */
74-
$errors = libxml_get_errors();
75-
libxml_clear_errors();
76-
77-
// Restore previous state
78-
libxml_use_internal_errors( $internalErrors );
79-
if ( LIBXML_VERSION < 20900 ) {
80-
libxml_disable_entity_loader( $entityLoaderDisabled );
81-
}
82-
83-
foreach ( $errors as $error ) {
84-
//TODO html5 tags can fail parsing
85-
//TODO Throw an exception
86-
}
87-
88-
return $document;
89-
}
90-
91-
/**
92-
* @param DOMDocument $document
93-
*
94-
* @return DOMElement
95-
* @throws Exception
96-
*/
97-
private function getRootNode( DOMDocument $document ) {
98-
$rootNodes = $document->documentElement->childNodes->item( 0 )->childNodes;
99-
100-
if ( $rootNodes->length > 1 ) {
101-
throw new Exception( 'Template should have only one root node' );
102-
}
45+
$this->handleNode( $this->rootNode, $data );
10346

104-
return $rootNodes->item( 0 );
47+
return $this->rootNode->ownerDocument->saveHTML( $this->rootNode );
10548
}
10649

10750
/**
@@ -245,8 +188,9 @@ private function handleFor( DOMNode $node, array $data ) {
245188
}
246189

247190
private function appendHTML( DOMNode $parent, $source ) {
248-
$tmpDoc = $this->parseHtml( $source );
249-
foreach ( $tmpDoc->getElementsByTagName( 'body' )->item( 0 )->childNodes as $node ) {
191+
$htmlParser = new HtmlParser();
192+
$tmpDoc = $htmlParser->parseHtml( $source );
193+
foreach ( $htmlParser->getBodyElement( $tmpDoc )->childNodes as $node ) {
250194
$node = $parent->ownerDocument->importNode( $node, true );
251195
$parent->appendChild( $node );
252196
}

src/HtmlParser.php

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
<?php
2+
3+
declare( strict_types = 1 );
4+
5+
namespace WMDE\VueJsTemplating;
6+
7+
use DOMDocument;
8+
use DOMElement;
9+
use Exception;
10+
use LibXMLError;
11+
12+
/**
13+
* Methods for parsing HTML strings and extracting elements from them.
14+
*/
15+
class HtmlParser {
16+
17+
/**
18+
* Parse the given HTML string into a DOM document.
19+
*/
20+
public function parseHtml( string $html ): DOMDocument {
21+
if ( LIBXML_VERSION < 20900 ) {
22+
$entityLoaderDisabled = libxml_disable_entity_loader( true );
23+
}
24+
$internalErrors = libxml_use_internal_errors( true );
25+
$document = new DOMDocument( '1.0', 'UTF-8' );
26+
27+
// Ensure $html is treated as UTF-8, see https://stackoverflow.com/a/8218649
28+
// LIBXML_NOBLANKS Constant excludes "ghost nodes" to avoid violating
29+
// vue's single root node constraint
30+
if ( !$document->loadHTML( '<?xml encoding="utf-8" ?>' . $html, LIBXML_NOBLANKS ) ) {
31+
//TODO Test failure
32+
}
33+
34+
/** @var LibXMLError[] $errors */
35+
$errors = libxml_get_errors();
36+
libxml_clear_errors();
37+
38+
// Restore previous state
39+
libxml_use_internal_errors( $internalErrors );
40+
if ( LIBXML_VERSION < 20900 ) {
41+
libxml_disable_entity_loader( $entityLoaderDisabled );
42+
}
43+
44+
foreach ( $errors as $error ) {
45+
//TODO html5 tags can fail parsing
46+
//TODO Throw an exception
47+
}
48+
49+
return $document;
50+
}
51+
52+
/**
53+
* Get the root node of the template represented by the given document.
54+
*/
55+
public function getRootNode( DOMDocument $document ): DOMElement {
56+
$htmlElement = $this->getHtmlElement( $document );
57+
$headOrBody = $this->getSoleHeadOrBody( $htmlElement );
58+
$rootNodeParent = $this->getTemplateElement( $headOrBody ) ?? $headOrBody;
59+
$rootNode = $this->getOnlySubstantialChild( $rootNodeParent );
60+
return $rootNode;
61+
}
62+
63+
/**
64+
* Get the `<html>` element of the given document.
65+
*/
66+
public function getHtmlElement( DOMDocument $document ): DOMElement {
67+
$documentElement = $document->documentElement;
68+
if ( $documentElement === null ) {
69+
throw new Exception( 'Empty document' );
70+
}
71+
if ( $documentElement->tagName !== 'html' ) {
72+
throw new Exception( "Expected <html>, got <{$documentElement->tagName}>" );
73+
}
74+
return $documentElement;
75+
}
76+
77+
/**
78+
* Get the `<body>` element of the given document.
79+
*/
80+
public function getBodyElement( DOMDocument $document ): DOMElement {
81+
$htmlElement = $this->getHtmlElement( $document );
82+
$bodyElement = $htmlElement->childNodes[0];
83+
if ( $bodyElement->tagName !== 'body' ) {
84+
throw new Exception( "Expected <body>, got <{$bodyElement->tagName}>" );
85+
}
86+
return $bodyElement;
87+
}
88+
89+
/**
90+
* Get the `<head>` or `<body>` element of the given document,
91+
* asserting that it is the only child (cannot have both nor any other children).
92+
*/
93+
private function getSoleHeadOrBody( DOMElement $htmlElement ): DOMElement {
94+
$length = $htmlElement->childNodes->length;
95+
if ( $length !== 1 ) {
96+
throw new Exception( "Expected exactly 1 <html> child, got $length" );
97+
}
98+
99+
$child = $htmlElement->childNodes[0];
100+
$tagName = $child->tagName;
101+
if ( $tagName !== 'head' && $tagName !== 'body' ) {
102+
throw new Exception( "Expected <head> or <body>, got <$tagName>" );
103+
}
104+
105+
return $child;
106+
}
107+
108+
/**
109+
* Get the `<template>` element of the given `<head>` or `<body>` element,
110+
* discarding any adjacent `<script>` or `<style>` elements
111+
* if the input is in Single-File Component (SFC) syntax.
112+
*/
113+
private function getTemplateElement( DOMElement $rootElement ): ?DOMElement {
114+
$onlyTemplateElement = null;
115+
foreach ( $rootElement->childNodes as $node ) {
116+
if ( $node->nodeType === XML_COMMENT_NODE ) {
117+
// comment node, ignore
118+
continue;
119+
} elseif ( $node->nodeType === XML_TEXT_NODE ) {
120+
if ( trim( $node->textContent ) === '' ) {
121+
// whitespace-only text node, ignore
122+
continue;
123+
} else {
124+
// not SFC
125+
$onlyTemplateElement = null;
126+
break;
127+
}
128+
}
129+
if ( $node->tagName === 'template' ) {
130+
if ( $onlyTemplateElement === null ) {
131+
$onlyTemplateElement = $node;
132+
} else {
133+
// more than one <template>, handle as non-SFC and throw error below
134+
$onlyTemplateElement = null;
135+
break;
136+
}
137+
} elseif ( $node->tagName !== 'script' && $node->tagName !== 'style' ) {
138+
// top-level tag other than <template>, <script> or <style> => not SFC
139+
$onlyTemplateElement = null;
140+
break;
141+
}
142+
}
143+
return $onlyTemplateElement;
144+
}
145+
146+
/**
147+
* Get the only “substantial” child of the given element.
148+
* Ignore any adjacent comments or whitespace-only text nodes
149+
* (such as line breaks or indentation).
150+
*/
151+
private function getOnlySubstantialChild( DOMElement $element ): DOMElement {
152+
$onlySubstantialChild = null;
153+
foreach ( $element->childNodes as $node ) {
154+
if ( $node->nodeType === XML_COMMENT_NODE ) {
155+
// comment node, ignore
156+
continue;
157+
} elseif ( $node->nodeType === XML_TEXT_NODE && trim( $node->textContent ) === '' ) {
158+
// whitespace-only text node, ignore
159+
continue;
160+
}
161+
if ( $onlySubstantialChild === null ) {
162+
$onlySubstantialChild = $node;
163+
} else {
164+
throw new Exception( 'Template should only have one root node' );
165+
}
166+
}
167+
168+
if ( $onlySubstantialChild !== null ) {
169+
return $onlySubstantialChild;
170+
} else {
171+
throw new Exception( 'Template contained no root node' );
172+
}
173+
}
174+
175+
}

src/Templating.php

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@ class Templating {
1212
* @return string
1313
*/
1414
public function render( $template, array $data, array $methods = [] ) {
15-
$component = new Component( $template, $methods );
15+
$htmlParser = new HtmlParser();
16+
$document = $htmlParser->parseHtml( $template );
17+
$rootNode = $htmlParser->getRootNode( $document );
18+
$component = new Component( $rootNode, $methods );
1619
return $component->render( $data );
1720
}
1821

tests/php/HtmlParserTest.php

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
<?php
2+
3+
namespace WMDE\VueJsTemplating\Test;
4+
5+
use DOMElement;
6+
use Exception;
7+
use PHPUnit\Framework\TestCase;
8+
use WMDE\VueJsTemplating\HtmlParser;
9+
10+
/**
11+
* @covers \WMDE\VueJsTemplating\HtmlParser
12+
*/
13+
class HtmlParserTest extends TestCase {
14+
15+
private function parseAndGetRootNode( string $html ): DOMElement {
16+
$htmlParser = new HtmlParser();
17+
$document = $htmlParser->parseHtml( $html );
18+
return $htmlParser->getRootNode( $document );
19+
}
20+
21+
private function assertIsDivTest( DOMElement $element ): void {
22+
$this->assertSame( 'div', $element->tagName );
23+
$this->assertSame( 'test', $element->getAttribute( 'class' ) );
24+
}
25+
26+
public function testSingleRootNode(): void {
27+
$rootNode = $this->parseAndGetRootNode( '<div class="test"></div>' );
28+
$this->assertIsDivTest( $rootNode );
29+
}
30+
31+
public function testSingleFileComponent_OnlyTemplate(): void {
32+
$rootNode = $this->parseAndGetRootNode( '<template><div class="test"></div></template>' );
33+
$this->assertIsDivTest( $rootNode );
34+
}
35+
36+
public function testSingleFileComponent_TemplateAndScriptAndStyle(): void {
37+
$template = '<template><div class="test"></div></template><script></script><style></style>';
38+
$rootNode = $this->parseAndGetRootNode( $template );
39+
$this->assertIsDivTest( $rootNode );
40+
}
41+
42+
public function testSingleFileComponent_ScriptAndTemplateAndStyle(): void {
43+
$template = '<script></script><template><div class="test"></div></template><style></style>';
44+
$rootNode = $this->parseAndGetRootNode( $template );
45+
$this->assertIsDivTest( $rootNode );
46+
}
47+
48+
public function testEmptyDocument(): void {
49+
$this->expectException( Exception::class );
50+
$this->expectExceptionMessage( 'Empty document' );
51+
$this->parseAndGetRootNode( '' );
52+
}
53+
54+
public function testHeadElement(): void {
55+
$html = '<html><head><title>Title</title></head><body>ABC</body></html>';
56+
$this->expectException( Exception::class );
57+
$this->expectExceptionMessage( 'Expected exactly 1 <html> child, got 2' );
58+
$this->parseAndGetRootNode( $html );
59+
}
60+
61+
public function testTwoRootNodes() {
62+
$this->expectException( Exception::class );
63+
$this->parseAndGetRootNode( '<p></p><p></p>' );
64+
}
65+
66+
}

0 commit comments

Comments
 (0)