From efd89d0168a5367a8fff5e13d6f50623c08f8b17 Mon Sep 17 00:00:00 2001 From: Lucas Werkmeister Date: Wed, 4 Jun 2025 18:01:10 +0200 Subject: [PATCH 1/7] Add PHPUnit result cache to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index c56a231..0500c3c 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ vendor node_modules/ composer.lock +.phpunit.result.cache From ec9da4096f1d08df838f18df651a565077b123bf Mon Sep 17 00:00:00 2001 From: Lucas Werkmeister Date: Mon, 2 Jun 2025 18:11:34 +0200 Subject: [PATCH 2/7] Declare ext-dom, ext-libxml dependencies in composer.json Otherwise, IntelliJ complains that this is missing. --- composer.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/composer.json b/composer.json index cf2a9b7..f7aaadb 100644 --- a/composer.json +++ b/composer.json @@ -9,6 +9,8 @@ ], "license": "LGPL-2.1-only", "require": { + "ext-dom": "*", + "ext-libxml": "*", "php": ">=7.2" }, "autoload": { From 814df81e32dbc497c2f35f8cbdcb6ba1b154c44c Mon Sep 17 00:00:00 2001 From: Lucas Werkmeister Date: Wed, 4 Jun 2025 12:54:43 +0200 Subject: [PATCH 3/7] Extract HtmlParser class MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Component’s responsibilities now start at the DOMElement; HtmlParser contains the code to parse the HTML and extract the root node from the DOMDocument. (I want to extend the latter logic in a follow-up commit, and for this it’s nicer if it’s a separate class, IMHO.) In principle, Component::render() should probably operate on a deep clone of the root node; then a Component could be reused without having to parse it several times, which could come in handy later. However, at the moment, the Component is always freshly created, so the extra clone seems wasteful; so for now just document that render() shouldn’t be called repeatedly. The only “public” entry point to this library so far is Templating, which obeys this rule. Also add covers tags to TemplatingTest, since it’s kind of an integration test for much of the library (including JsParsing, but I can’t be bothered to add all those classes to the covers as well). Bug: T395802 --- src/Component.php | 79 +++++------------------------------- src/HtmlParser.php | 65 +++++++++++++++++++++++++++++ src/Templating.php | 5 ++- tests/php/HtmlParserTest.php | 36 ++++++++++++++++ tests/php/TemplatingTest.php | 7 +--- 5 files changed, 118 insertions(+), 74 deletions(-) create mode 100644 src/HtmlParser.php create mode 100644 tests/php/HtmlParserTest.php diff --git a/src/Component.php b/src/Component.php index 2389357..0e881fe 100644 --- a/src/Component.php +++ b/src/Component.php @@ -4,14 +4,10 @@ use DOMAttr; use DOMCharacterData; -use DOMDocument; use DOMElement; use DOMNode; use DOMNodeList; use DOMText; -use Exception; -use LibXMLError; - use WMDE\VueJsTemplating\JsParsing\BasicJsExpressionParser; use WMDE\VueJsTemplating\JsParsing\CachingExpressionParser; use WMDE\VueJsTemplating\JsParsing\JsExpressionParser; @@ -19,9 +15,9 @@ class Component { /** - * @var string HTML + * @var DOMElement */ - private $template; + private $rootNode; /** * @var JsExpressionParser @@ -29,79 +25,26 @@ class Component { private $expressionParser; /** - * @param string $template HTML + * @param DOMElement $rootNode * @param callable[] $methods */ - public function __construct( $template, array $methods ) { - $this->template = $template; + public function __construct( DOMElement $rootNode, array $methods ) { + $this->rootNode = $rootNode; $this->expressionParser = new CachingExpressionParser( new BasicJsExpressionParser( $methods ) ); } /** + * Note: this method is not currently safe to call repeatedly + * (the internal root node is modified in-place). + * * @param array $data * * @return string HTML */ public function render( array $data ) { - $document = $this->parseHtml( $this->template ); - - $rootNode = $this->getRootNode( $document ); - $this->handleNode( $rootNode, $data ); - - return $document->saveHTML( $rootNode ); - } - - /** - * @param string $html HTML - * - * @return DOMDocument - */ - private function parseHtml( $html ) { - if ( LIBXML_VERSION < 20900 ) { - $entityLoaderDisabled = libxml_disable_entity_loader( true ); - } - $internalErrors = libxml_use_internal_errors( true ); - $document = new DOMDocument( '1.0', 'UTF-8' ); - - // Ensure $html is treated as UTF-8, see https://stackoverflow.com/a/8218649 - // LIBXML_NOBLANKS Constant excludes "ghost nodes" to avoid violating - // vue's single root node constraint - if ( !$document->loadHTML( '' . $html, LIBXML_NOBLANKS ) ) { - //TODO Test failure - } - - /** @var LibXMLError[] $errors */ - $errors = libxml_get_errors(); - libxml_clear_errors(); - - // Restore previous state - libxml_use_internal_errors( $internalErrors ); - if ( LIBXML_VERSION < 20900 ) { - libxml_disable_entity_loader( $entityLoaderDisabled ); - } - - foreach ( $errors as $error ) { - //TODO html5 tags can fail parsing - //TODO Throw an exception - } - - return $document; - } - - /** - * @param DOMDocument $document - * - * @return DOMElement - * @throws Exception - */ - private function getRootNode( DOMDocument $document ) { - $rootNodes = $document->documentElement->childNodes->item( 0 )->childNodes; - - if ( $rootNodes->length > 1 ) { - throw new Exception( 'Template should have only one root node' ); - } + $this->handleNode( $this->rootNode, $data ); - return $rootNodes->item( 0 ); + return $this->rootNode->ownerDocument->saveHTML( $this->rootNode ); } /** @@ -245,7 +188,7 @@ private function handleFor( DOMNode $node, array $data ) { } private function appendHTML( DOMNode $parent, $source ) { - $tmpDoc = $this->parseHtml( $source ); + $tmpDoc = ( new HtmlParser() )->parseHtml( $source ); foreach ( $tmpDoc->getElementsByTagName( 'body' )->item( 0 )->childNodes as $node ) { $node = $parent->ownerDocument->importNode( $node, true ); $parent->appendChild( $node ); diff --git a/src/HtmlParser.php b/src/HtmlParser.php new file mode 100644 index 0000000..4674337 --- /dev/null +++ b/src/HtmlParser.php @@ -0,0 +1,65 @@ +loadHTML( '' . $html, LIBXML_NOBLANKS ) ) { + //TODO Test failure + } + + /** @var LibXMLError[] $errors */ + $errors = libxml_get_errors(); + libxml_clear_errors(); + + // Restore previous state + libxml_use_internal_errors( $internalErrors ); + if ( LIBXML_VERSION < 20900 ) { + libxml_disable_entity_loader( $entityLoaderDisabled ); + } + + foreach ( $errors as $error ) { + //TODO html5 tags can fail parsing + //TODO Throw an exception + } + + return $document; + } + + /** + * Get the root node of the template represented by the given document. + */ + public function getRootNode( DOMDocument $document ): DOMElement { + $rootNodes = $document->documentElement->childNodes->item( 0 )->childNodes; + + if ( $rootNodes->length > 1 ) { + throw new Exception( 'Template should have only one root node' ); + } + + return $rootNodes->item( 0 ); + } + +} diff --git a/src/Templating.php b/src/Templating.php index 77258e0..a7d3deb 100644 --- a/src/Templating.php +++ b/src/Templating.php @@ -12,7 +12,10 @@ class Templating { * @return string */ public function render( $template, array $data, array $methods = [] ) { - $component = new Component( $template, $methods ); + $htmlParser = new HtmlParser(); + $document = $htmlParser->parseHtml( $template ); + $rootNode = $htmlParser->getRootNode( $document ); + $component = new Component( $rootNode, $methods ); return $component->render( $data ); } diff --git a/tests/php/HtmlParserTest.php b/tests/php/HtmlParserTest.php new file mode 100644 index 0000000..81a89b6 --- /dev/null +++ b/tests/php/HtmlParserTest.php @@ -0,0 +1,36 @@ +parseHtml( $html ); + return $htmlParser->getRootNode( $document ); + } + + private function assertIsDivTest( DOMElement $element ): void { + $this->assertSame( 'div', $element->tagName ); + $this->assertSame( 'test', $element->getAttribute( 'class' ) ); + } + + public function testSingleRootNode(): void { + $rootNode = $this->parseAndGetRootNode( '
' ); + $this->assertIsDivTest( $rootNode ); + } + + public function testTwoRootNodes() { + $this->expectException( Exception::class ); + $this->parseAndGetRootNode( '

' ); + } + +} diff --git a/tests/php/TemplatingTest.php b/tests/php/TemplatingTest.php index a6dc50a..6b74e0d 100644 --- a/tests/php/TemplatingTest.php +++ b/tests/php/TemplatingTest.php @@ -7,6 +7,8 @@ use WMDE\VueJsTemplating\Templating; /** + * @covers \WMDE\VueJsTemplating\Component + * @covers \WMDE\VueJsTemplating\HtmlParser * @covers \WMDE\VueJsTemplating\Templating */ class TemplatingTest extends TestCase { @@ -17,11 +19,6 @@ public function testJustASingleEmptyHtmlElement() { $this->assertSame( '
', $result ); } - public function testTemplateHasTwoRootNodes_ThrowsAnException() { - $this->expectException( Exception::class ); - $this->createAndRender( '

', [] ); - } - public function testTemplateHasOnClickHandler_RemoveHandlerFormOutput() { $result = $this->createAndRender( '
', [] ); From da2e5a27e256d5d497d7333b4cccaf06516e131b Mon Sep 17 00:00:00 2001 From: Lucas Werkmeister Date: Mon, 2 Jun 2025 18:02:06 +0200 Subject: [PATCH 4/7] Handle empty documents slightly better MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, we’d crash because the documentElement is null in this case. --- src/HtmlParser.php | 4 ++++ tests/php/HtmlParserTest.php | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/src/HtmlParser.php b/src/HtmlParser.php index 4674337..6b54d14 100644 --- a/src/HtmlParser.php +++ b/src/HtmlParser.php @@ -53,6 +53,10 @@ public function parseHtml( string $html ): DOMDocument { * Get the root node of the template represented by the given document. */ public function getRootNode( DOMDocument $document ): DOMElement { + if ( $document->documentElement === null ) { + throw new Exception( 'Empty document' ); + } + $rootNodes = $document->documentElement->childNodes->item( 0 )->childNodes; if ( $rootNodes->length > 1 ) { diff --git a/tests/php/HtmlParserTest.php b/tests/php/HtmlParserTest.php index 81a89b6..a572d6f 100644 --- a/tests/php/HtmlParserTest.php +++ b/tests/php/HtmlParserTest.php @@ -28,6 +28,12 @@ public function testSingleRootNode(): void { $this->assertIsDivTest( $rootNode ); } + public function testEmptyDocument(): void { + $this->expectException( Exception::class ); + $this->expectExceptionMessage( 'Empty document' ); + $this->parseAndGetRootNode( '' ); + } + public function testTwoRootNodes() { $this->expectException( Exception::class ); $this->parseAndGetRootNode( '

' ); From e9a784aaa5dc69236ede4a5c14b1fa9a71f3665b Mon Sep 17 00:00:00 2001 From: Lucas Werkmeister Date: Wed, 4 Jun 2025 16:46:39 +0200 Subject: [PATCH 5/7] Extract HtmlParser::getBodyElement() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So we can also use it in Component::appendHTML() (v-html="" handling). Part of this will also be useful when getRootNode() gains support for SFC inputs later (which is why appendHTML() doesn’t use getRootNode() directly). In theory, we could skip over a element in the , but as I don’t think it makes sense to have a in any Vue component / template, let’s just throw an error in that case and include a test for that. (I don’t think we can test the “expected ” condition, that part is basically dead code – the parser should always synthesize and elements surrounding “normal” markup if it’s not already present in the source.) --- src/Component.php | 5 +++-- src/HtmlParser.php | 32 +++++++++++++++++++++++++++----- tests/php/HtmlParserTest.php | 7 +++++++ 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/src/Component.php b/src/Component.php index 0e881fe..50e3856 100644 --- a/src/Component.php +++ b/src/Component.php @@ -188,8 +188,9 @@ private function handleFor( DOMNode $node, array $data ) { } private function appendHTML( DOMNode $parent, $source ) { - $tmpDoc = ( new HtmlParser() )->parseHtml( $source ); - foreach ( $tmpDoc->getElementsByTagName( 'body' )->item( 0 )->childNodes as $node ) { + $htmlParser = new HtmlParser(); + $tmpDoc = $htmlParser->parseHtml( $source ); + foreach ( $htmlParser->getBodyElement( $tmpDoc )->childNodes as $node ) { $node = $parent->ownerDocument->importNode( $node, true ); $parent->appendChild( $node ); } diff --git a/src/HtmlParser.php b/src/HtmlParser.php index 6b54d14..e3335a5 100644 --- a/src/HtmlParser.php +++ b/src/HtmlParser.php @@ -53,11 +53,7 @@ public function parseHtml( string $html ): DOMDocument { * Get the root node of the template represented by the given document. */ public function getRootNode( DOMDocument $document ): DOMElement { - if ( $document->documentElement === null ) { - throw new Exception( 'Empty document' ); - } - - $rootNodes = $document->documentElement->childNodes->item( 0 )->childNodes; + $rootNodes = $this->getBodyElement( $document )->childNodes; if ( $rootNodes->length > 1 ) { throw new Exception( 'Template should have only one root node' ); @@ -66,4 +62,30 @@ public function getRootNode( DOMDocument $document ): DOMElement { return $rootNodes->item( 0 ); } + /** + * Get the `` element of the given document. + */ + public function getHtmlElement( DOMDocument $document ): DOMElement { + $documentElement = $document->documentElement; + if ( $documentElement === null ) { + throw new Exception( 'Empty document' ); + } + if ( $documentElement->tagName !== 'html' ) { + throw new Exception( "Expected , got <{$documentElement->tagName}>" ); + } + return $documentElement; + } + + /** + * Get the `` element of the given document. + */ + public function getBodyElement( DOMDocument $document ): DOMElement { + $htmlElement = $this->getHtmlElement( $document ); + $bodyElement = $htmlElement->childNodes->item( 0 ); + if ( $bodyElement->tagName !== 'body' ) { + throw new Exception( "Expected , got <{$bodyElement->tagName}>" ); + } + return $bodyElement; + } + } diff --git a/tests/php/HtmlParserTest.php b/tests/php/HtmlParserTest.php index a572d6f..c3ac18a 100644 --- a/tests/php/HtmlParserTest.php +++ b/tests/php/HtmlParserTest.php @@ -34,6 +34,13 @@ public function testEmptyDocument(): void { $this->parseAndGetRootNode( '' ); } + public function testHeadElement(): void { + $html = 'TitleABC'; + $this->expectException( Exception::class ); + $this->expectExceptionMessage( 'Expected , got ' ); + $this->parseAndGetRootNode( $html ); + } + public function testTwoRootNodes() { $this->expectException( Exception::class ); $this->parseAndGetRootNode( '

' ); From 5bbd75bc7e7ed17bcb57c3b983128541773e844f Mon Sep 17 00:00:00 2001 From: Lucas Werkmeister Date: Tue, 3 Jun 2025 16:46:20 +0200 Subject: [PATCH 6/7] Use DOMNodeList with array syntax This is not only more readable, but also a bit faster [1] (though I doubt this code is a bottleneck anywhere either way). [1]: https://3v4l.org/NGEKe --- src/HtmlParser.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/HtmlParser.php b/src/HtmlParser.php index e3335a5..109ca76 100644 --- a/src/HtmlParser.php +++ b/src/HtmlParser.php @@ -59,7 +59,7 @@ public function getRootNode( DOMDocument $document ): DOMElement { throw new Exception( 'Template should have only one root node' ); } - return $rootNodes->item( 0 ); + return $rootNodes[0]; } /** @@ -81,7 +81,7 @@ public function getHtmlElement( DOMDocument $document ): DOMElement { */ public function getBodyElement( DOMDocument $document ): DOMElement { $htmlElement = $this->getHtmlElement( $document ); - $bodyElement = $htmlElement->childNodes->item( 0 ); + $bodyElement = $htmlElement->childNodes[0]; if ( $bodyElement->tagName !== 'body' ) { throw new Exception( "Expected , got <{$bodyElement->tagName}>" ); } From a27aaada4e54ead7861bd5e1f136823361c5e4fe Mon Sep 17 00:00:00 2001 From: Lucas Werkmeister Date: Mon, 2 Jun 2025 18:13:53 +0200 Subject: [PATCH 7/7] Support single-file component inputs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the input contains a single root