Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport commits from 62269 #8227

Open
wants to merge 1 commit into
base: 6.7
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 27 additions & 6 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,22 @@ public function next_tag( $query = null ): bool {
return false;
}

/**
* Finds the next token in the HTML document.
*
* This doesn't currently have a way to represent non-tags and doesn't process
* semantic rules for text nodes. For access to the raw tokens consider using
* WP_HTML_Tag_Processor instead.
*
* @since 6.5.0 Added for internal support; do not use.
* @since 6.7.2 Refactored so subclasses may extend.
*
* @return bool Whether a token was parsed.
*/
public function next_token(): bool {
return $this->next_visitable_token();
}

/**
* Ensures internal accounting is maintained for HTML semantic rules while
* the underlying Tag Processor class is seeking to a bookmark.
Expand All @@ -615,13 +631,18 @@ public function next_tag( $query = null ): bool {
* semantic rules for text nodes. For access to the raw tokens consider using
* WP_HTML_Tag_Processor instead.
*
* @since 6.5.0 Added for internal support; do not use.
* Note that this method may call itself recursively. This is why it is not
* implemented as {@see WP_HTML_Processor::next_token()}, which instead calls
* this method similarly to how {@see WP_HTML_Tag_Processor::next_token()}
* calls the {@see WP_HTML_Tag_Processor::base_class_next_token()} method.
*
* @since 6.7.2 Added for internal support.
*
* @access private
*
* @return bool
*/
public function next_token(): bool {
private function next_visitable_token(): bool {
$this->current_element = null;

if ( isset( $this->last_error ) ) {
Expand All @@ -639,7 +660,7 @@ public function next_token(): bool {
* tokens works in the meantime and isn't obviously wrong.
*/
if ( empty( $this->element_queue ) && $this->step() ) {
return $this->next_token();
return $this->next_visitable_token();
}

// Process the next event on the queue.
Expand All @@ -650,7 +671,7 @@ public function next_token(): bool {
continue;
}

return empty( $this->element_queue ) ? false : $this->next_token();
return empty( $this->element_queue ) ? false : $this->next_visitable_token();
}

$is_pop = WP_HTML_Stack_Event::POP === $this->current_element->operation;
Expand All @@ -661,7 +682,7 @@ public function next_token(): bool {
* the breadcrumbs.
*/
if ( 'root-node' === $this->current_element->token->bookmark_name ) {
return $this->next_token();
return $this->next_visitable_token();
}

// Adjust the breadcrumbs for this event.
Expand All @@ -673,7 +694,7 @@ public function next_token(): bool {

// Avoid sending close events for elements which don't expect a closing.
if ( $is_pop && ! $this->expects_closer( $this->current_element->token ) ) {
return $this->next_token();
return $this->next_visitable_token();
}

return true;
Expand Down
35 changes: 35 additions & 0 deletions tests/phpunit/data/html-api/token-counting-html-processor.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
<?php

class Token_Counting_HTML_Processor extends WP_HTML_Processor {

/**
* List of tokens that have already been seen.
*
* @var array<string, int>
*/
public $token_seen_count = array();

/**
* Gets next token.
*
* @return bool Whether next token was matched.
*/
public function next_token(): bool {
$result = parent::next_token();

if ( $this->get_token_type() === '#tag' ) {
$token_name = ( $this->is_tag_closer() ? '-' : '+' ) . $this->get_tag();
} else {
$token_name = $this->get_token_name();
}

if ( ! isset( $this->token_seen_count[ $token_name ] ) ) {
$this->token_seen_count[ $token_name ] = 1;
} else {
++$this->token_seen_count[ $token_name ];
}

return $result;
}

}
135 changes: 135 additions & 0 deletions tests/phpunit/tests/html-api/wpHtmlProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -908,6 +908,141 @@ public function test_ensure_form_tag_closer_token_is_reachable() {
$this->assertTrue( $processor->is_tag_closer() );
}

/**
* Data provider.
*
* @return array
*/
public function data_html_processor_with_extended_next_token() {
return array(
'single_instance_per_tag' => array(
'html' => '
<html>
<head>
<meta charset="utf-8">
<title>Hello World</title>
</head>
<body>
<h1>Hello World!</h1>
<img src="example.png">
<p>Each tag should occur only once in this document.<!--Closing P tag omitted intentionally.-->
<footer>The end.</footer>
</body>
</html>
',
'expected_token_counts' => array(
'+HTML' => 1,
'+HEAD' => 1,
'#text' => 14,
'+META' => 1,
'+TITLE' => 1,
'-HEAD' => 1,
'+BODY' => 1,
'+H1' => 1,
'-H1' => 1,
'+IMG' => 1,
'+P' => 1,
'#comment' => 1,
'-P' => 1,
'+FOOTER' => 1,
'-FOOTER' => 1,
'-BODY' => 1,
'-HTML' => 1,
'' => 1,
),
),

'multiple_tag_instances' => array(
'html' => '
<html>
<body>
<h1>Hello World!</h1>
<p>First
<p>Second
<p>Third
<ul>
<li>1
<li>2
<li>3
</ul>
</body>
</html>
',
'expected_token_counts' => array(
'+HTML' => 1,
'+HEAD' => 1,
'-HEAD' => 1,
'+BODY' => 1,
'#text' => 13,
'+H1' => 1,
'-H1' => 1,
'+P' => 3,
'-P' => 3,
'+UL' => 1,
'+LI' => 3,
'-LI' => 3,
'-UL' => 1,
'-BODY' => 1,
'-HTML' => 1,
'' => 1,
),
),

'extreme_nested_formatting' => array(
'html' => '
<html>
<body>
<p>
<strong><em><strike><i><b><u>FORMAT</u></b></i></strike></em></strong>
</p>
</body>
</html>
',
'expected_token_counts' => array(
'+HTML' => 1,
'+HEAD' => 1,
'-HEAD' => 1,
'+BODY' => 1,
'#text' => 7,
'+P' => 1,
'+STRONG' => 1,
'+EM' => 1,
'+STRIKE' => 1,
'+I' => 1,
'+B' => 1,
'+U' => 1,
'-U' => 1,
'-B' => 1,
'-I' => 1,
'-STRIKE' => 1,
'-EM' => 1,
'-STRONG' => 1,
'-P' => 1,
'-BODY' => 1,
'-HTML' => 1,
'' => 1,
),
),
);
}

/**
* Ensures that subclasses to WP_HTML_Processor can do bookkeeping by extending the next_token() method.
*
* @ticket 62269
* @dataProvider data_html_processor_with_extended_next_token
*/
public function test_ensure_next_token_method_extensibility( $html, $expected_token_counts ) {
require_once DIR_TESTDATA . '/html-api/token-counting-html-processor.php';

$processor = Token_Counting_HTML_Processor::create_full_parser( $html );
while ( $processor->next_tag() ) {
continue;
}

$this->assertEquals( $expected_token_counts, $processor->token_seen_count, 'Snapshot: ' . var_export( $processor->token_seen_count, true ) );
}

/**
* Ensure that lowercased tag_name query matches tags case-insensitively.
*
Expand Down
Loading