diff --git a/.gitignore b/.gitignore index c56a231..0500c3c 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ vendor node_modules/ composer.lock +.phpunit.result.cache diff --git a/composer.json b/composer.json index cf2a9b7..f7aaadb 100644 --- a/composer.json +++ b/composer.json @@ -9,6 +9,8 @@ ], "license": "LGPL-2.1-only", "require": { + "ext-dom": "*", + "ext-libxml": "*", "php": ">=7.2" }, "autoload": { diff --git a/src/Component.php b/src/Component.php index 2389357..50e3856 100644 --- a/src/Component.php +++ b/src/Component.php @@ -4,14 +4,10 @@ use DOMAttr; use DOMCharacterData; -use DOMDocument; use DOMElement; use DOMNode; use DOMNodeList; use DOMText; -use Exception; -use LibXMLError; - use WMDE\VueJsTemplating\JsParsing\BasicJsExpressionParser; use WMDE\VueJsTemplating\JsParsing\CachingExpressionParser; use WMDE\VueJsTemplating\JsParsing\JsExpressionParser; @@ -19,9 +15,9 @@ class Component { /** - * @var string HTML + * @var DOMElement */ - private $template; + private $rootNode; /** * @var JsExpressionParser @@ -29,79 +25,26 @@ class Component { private $expressionParser; /** - * @param string $template HTML + * @param DOMElement $rootNode * @param callable[] $methods */ - public function __construct( $template, array $methods ) { - $this->template = $template; + public function __construct( DOMElement $rootNode, array $methods ) { + $this->rootNode = $rootNode; $this->expressionParser = new CachingExpressionParser( new BasicJsExpressionParser( $methods ) ); } /** + * Note: this method is not currently safe to call repeatedly + * (the internal root node is modified in-place). + * * @param array $data * * @return string HTML */ public function render( array $data ) { - $document = $this->parseHtml( $this->template ); - - $rootNode = $this->getRootNode( $document ); - $this->handleNode( $rootNode, $data ); - - return $document->saveHTML( $rootNode ); - } - - /** - * @param string $html HTML - * - * @return DOMDocument - */ - private function parseHtml( $html ) { - if ( LIBXML_VERSION < 20900 ) { - $entityLoaderDisabled = libxml_disable_entity_loader( true ); - } - $internalErrors = libxml_use_internal_errors( true ); - $document = new DOMDocument( '1.0', 'UTF-8' ); - - // Ensure $html is treated as UTF-8, see https://stackoverflow.com/a/8218649 - // LIBXML_NOBLANKS Constant excludes "ghost nodes" to avoid violating - // vue's single root node constraint - if ( !$document->loadHTML( '' . $html, LIBXML_NOBLANKS ) ) { - //TODO Test failure - } - - /** @var LibXMLError[] $errors */ - $errors = libxml_get_errors(); - libxml_clear_errors(); - - // Restore previous state - libxml_use_internal_errors( $internalErrors ); - if ( LIBXML_VERSION < 20900 ) { - libxml_disable_entity_loader( $entityLoaderDisabled ); - } - - foreach ( $errors as $error ) { - //TODO html5 tags can fail parsing - //TODO Throw an exception - } - - return $document; - } - - /** - * @param DOMDocument $document - * - * @return DOMElement - * @throws Exception - */ - private function getRootNode( DOMDocument $document ) { - $rootNodes = $document->documentElement->childNodes->item( 0 )->childNodes; - - if ( $rootNodes->length > 1 ) { - throw new Exception( 'Template should have only one root node' ); - } + $this->handleNode( $this->rootNode, $data ); - return $rootNodes->item( 0 ); + return $this->rootNode->ownerDocument->saveHTML( $this->rootNode ); } /** @@ -245,8 +188,9 @@ private function handleFor( DOMNode $node, array $data ) { } private function appendHTML( DOMNode $parent, $source ) { - $tmpDoc = $this->parseHtml( $source ); - foreach ( $tmpDoc->getElementsByTagName( 'body' )->item( 0 )->childNodes as $node ) { + $htmlParser = new HtmlParser(); + $tmpDoc = $htmlParser->parseHtml( $source ); + foreach ( $htmlParser->getBodyElement( $tmpDoc )->childNodes as $node ) { $node = $parent->ownerDocument->importNode( $node, true ); $parent->appendChild( $node ); } diff --git a/src/HtmlParser.php b/src/HtmlParser.php new file mode 100644 index 0000000..acd9e89 --- /dev/null +++ b/src/HtmlParser.php @@ -0,0 +1,175 @@ +loadHTML( '' . $html, LIBXML_NOBLANKS ) ) { + //TODO Test failure + } + + /** @var LibXMLError[] $errors */ + $errors = libxml_get_errors(); + libxml_clear_errors(); + + // Restore previous state + libxml_use_internal_errors( $internalErrors ); + if ( LIBXML_VERSION < 20900 ) { + libxml_disable_entity_loader( $entityLoaderDisabled ); + } + + foreach ( $errors as $error ) { + //TODO html5 tags can fail parsing + //TODO Throw an exception + } + + return $document; + } + + /** + * Get the root node of the template represented by the given document. + */ + public function getRootNode( DOMDocument $document ): DOMElement { + $htmlElement = $this->getHtmlElement( $document ); + $headOrBody = $this->getSoleHeadOrBody( $htmlElement ); + $rootNodeParent = $this->getTemplateElement( $headOrBody ) ?? $headOrBody; + $rootNode = $this->getOnlySubstantialChild( $rootNodeParent ); + return $rootNode; + } + + /** + * Get the `` element of the given document. + */ + public function getHtmlElement( DOMDocument $document ): DOMElement { + $documentElement = $document->documentElement; + if ( $documentElement === null ) { + throw new Exception( 'Empty document' ); + } + if ( $documentElement->tagName !== 'html' ) { + throw new Exception( "Expected , got <{$documentElement->tagName}>" ); + } + return $documentElement; + } + + /** + * Get the `` element of the given document. + */ + public function getBodyElement( DOMDocument $document ): DOMElement { + $htmlElement = $this->getHtmlElement( $document ); + $bodyElement = $htmlElement->childNodes[0]; + if ( $bodyElement->tagName !== 'body' ) { + throw new Exception( "Expected , got <{$bodyElement->tagName}>" ); + } + return $bodyElement; + } + + /** + * Get the `` or `` element of the given document, + * asserting that it is the only child (cannot have both nor any other children). + */ + private function getSoleHeadOrBody( DOMElement $htmlElement ): DOMElement { + $length = $htmlElement->childNodes->length; + if ( $length !== 1 ) { + throw new Exception( "Expected exactly 1 child, got $length" ); + } + + $child = $htmlElement->childNodes[0]; + $tagName = $child->tagName; + if ( $tagName !== 'head' && $tagName !== 'body' ) { + throw new Exception( "Expected or , got <$tagName>" ); + } + + return $child; + } + + /** + * Get the `