diff --git a/components/DataLiberation/CSS/class-cssprocessor.php b/components/DataLiberation/CSS/class-cssprocessor.php index cacb48b4..f59f6c63 100644 --- a/components/DataLiberation/CSS/class-cssprocessor.php +++ b/components/DataLiberation/CSS/class-cssprocessor.php @@ -1583,15 +1583,45 @@ private function decode_string_or_url( int $start, int $length ): string { // Handle escapes (if enabled). if ( '\\' === $char ) { - if ( $this->is_valid_escape( $at ) ) { + ++$at; // Consume the backslash. + + /* + * Backslash at end of range (e.g. backslash-EOF in a string token). + * + * Per the CSS spec, in a string token, a backslash at EOF is ignored. + * + * @see https://www.w3.org/TR/css-syntax-3/#consume-string-token + */ + if ( $at >= $end ) { + continue; + } + + $next_char = $this->css[ $at ]; + + /* + * Backslash before a newline (backslash-newline) in a string token. + * + * Per the CSS spec, in a string token, a backslash followed by a + * newline is consumed but produces nothing in the token value. + * + * @see https://www.w3.org/TR/css-syntax-3/#consume-string-token + */ + if ( "\n" === $next_char || "\f" === $next_char ) { ++$at; - $decoded .= $this->decode_escape_at( $at, $bytes_consumed ); - $at += $bytes_consumed; continue; } - // Invalid escape - consume the backslash and keep going. - $decoded .= '\\'; - ++$at; + if ( "\r" === $next_char ) { + ++$at; + // Handle \r\n as a single newline. + if ( $at < $end && "\n" === $this->css[ $at ] ) { + ++$at; + } + continue; + } + + // Otherwise, this is a valid escape: decode and append the escaped code point. + $decoded .= $this->decode_escape_at( $at, $bytes_consumed ); + $at += $bytes_consumed; continue; } diff --git a/components/DataLiberation/Tests/CSSProcessorTest.php b/components/DataLiberation/Tests/CSSProcessorTest.php index ed164ac2..106d6808 100644 --- a/components/DataLiberation/Tests/CSSProcessorTest.php +++ b/components/DataLiberation/Tests/CSSProcessorTest.php @@ -1541,4 +1541,83 @@ public function test_ident_start_codepoint_bounds_check(): void { ); $this->assertSame( $expected_tokens, $actual_tokens ); } + + /** + * Tests that backslash-newline in a CSS string token value is ignored. + * + * Per the CSS spec, in a , a U+005C REVERSE SOLIDUS (\) + * followed by a newline is consumed but contributes nothing to the token + * value — i.e. the logical string contains no backslash and no newline at + * that position. + * + * @see https://www.w3.org/TR/css-syntax-3/#consume-string-token + * @see https://github.com/WordPress/php-toolkit/issues/222 + */ + public function test_string_token_backslash_newline_is_ignored(): void { + // LF: the canonical case from the spec. + $processor = CSSProcessor::create( "'str\\\ning'" ); + $processor->next_token(); + $this->assertSame( CSSProcessor::TOKEN_STRING, $processor->get_token_type() ); + $this->assertSame( 'string', $processor->get_token_value() ); + + // CR: carriage return should also be consumed. + $processor = CSSProcessor::create( "'str\\\ring'" ); + $processor->next_token(); + $this->assertSame( CSSProcessor::TOKEN_STRING, $processor->get_token_type() ); + $this->assertSame( 'string', $processor->get_token_value() ); + + // CRLF: the pair should be consumed as a single newline. + $processor = CSSProcessor::create( "'str\\\r\ning'" ); + $processor->next_token(); + $this->assertSame( CSSProcessor::TOKEN_STRING, $processor->get_token_type() ); + $this->assertSame( 'string', $processor->get_token_value() ); + + // FF (form feed): treated as a newline per the spec. + $processor = CSSProcessor::create( "'str\\\fing'" ); + $processor->next_token(); + $this->assertSame( CSSProcessor::TOKEN_STRING, $processor->get_token_type() ); + $this->assertSame( 'string', $processor->get_token_value() ); + + // Multiple backslash-newlines in one string. + $processor = CSSProcessor::create( "'A\\\nB\\\nC'" ); + $processor->next_token(); + $this->assertSame( CSSProcessor::TOKEN_STRING, $processor->get_token_type() ); + $this->assertSame( 'ABC', $processor->get_token_value() ); + + // Backslash-newline at the very start of the string value. + $processor = CSSProcessor::create( "'\\\nvalue'" ); + $processor->next_token(); + $this->assertSame( CSSProcessor::TOKEN_STRING, $processor->get_token_type() ); + $this->assertSame( 'value', $processor->get_token_value() ); + } + + /** + * Tests that a backslash at EOF inside a CSS string token contributes + * nothing to the token value. + * + * Per the CSS spec, in a , a U+005C REVERSE SOLIDUS (\) + * followed by EOF is consumed but produces nothing in the token value. + * + * @see https://www.w3.org/TR/css-syntax-3/#consume-string-token + * @see https://github.com/WordPress/php-toolkit/issues/223 + */ + public function test_string_token_backslash_eof_is_ignored(): void { + // Backslash at the very end of input (no closing quote). + $processor = CSSProcessor::create( "'string\\" ); + $processor->next_token(); + $this->assertSame( CSSProcessor::TOKEN_STRING, $processor->get_token_type() ); + $this->assertSame( 'string', $processor->get_token_value() ); + + // Backslash immediately after opening quote, then EOF. + $processor = CSSProcessor::create( "'\\" ); + $processor->next_token(); + $this->assertSame( CSSProcessor::TOKEN_STRING, $processor->get_token_type() ); + $this->assertSame( '', $processor->get_token_value() ); + + // Double-quoted variant. + $processor = CSSProcessor::create( '"string\\' ); + $processor->next_token(); + $this->assertSame( CSSProcessor::TOKEN_STRING, $processor->get_token_type() ); + $this->assertSame( 'string', $processor->get_token_value() ); + } }