Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: Default to UTF-8 for empty blog_charset values #8531

Open
wants to merge 4 commits into
base: trunk
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/wp-includes/default-filters.php
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,8 @@
// Misc filters.
add_filter( 'wp_default_autoload_value', 'wp_filter_default_autoload_value_via_option_size', 5, 4 ); // Allow the value to be overridden at the default priority.
add_filter( 'option_ping_sites', 'privacy_ping_filter' );

add_filter( 'option_blog_charset', '_wp_ensure_blog_charset', 9 );
add_filter( 'option_blog_charset', '_wp_specialchars' ); // IMPORTANT: This must not be wp_specialchars() or esc_html() or it'll cause an infinite loop.
add_filter( 'option_blog_charset', '_canonical_charset' );
add_filter( 'option_home', '_config_wp_home' );
Expand Down
12 changes: 11 additions & 1 deletion src/wp-includes/functions.php
Original file line number Diff line number Diff line change
Expand Up @@ -7566,7 +7566,13 @@ function get_tag_regex( $tag ) {
* @return bool Whether the slug represents the UTF-8 encoding.
*/
function is_utf8_charset( $blog_charset = null ) {
return _is_utf8_charset( $blog_charset ?? get_option( 'blog_charset' ) );
if ( null === $blog_charset ) {
$blog_charset = get_option( 'blog_charset', 'UTF-8' );
} elseif ( empty( $blog_charset ) ) {
return false;
}

return _is_utf8_charset( $blog_charset );
}

/**
Expand All @@ -7582,6 +7588,10 @@ function is_utf8_charset( $blog_charset = null ) {
* @return string The canonical form of the charset.
*/
function _canonical_charset( $charset ) {
if ( empty( $charset ) ) {
return '';
}

if ( is_utf8_charset( $charset ) ) {
return 'UTF-8';
}
Expand Down
12 changes: 12 additions & 0 deletions src/wp-includes/option.php
Original file line number Diff line number Diff line change
Expand Up @@ -3238,3 +3238,15 @@ function wp_autoload_values_to_autoload() {

return array_intersect( $filtered_values, $autoload_values );
}

/**
* Ensures blog_charset has a value by defaulting to UTF-8 when empty.
*
* @since 6.8.0
*
* @param string|null $charset The character set from the option.
* @return string The character set, defaulting to UTF-8 when empty.
*/
function _wp_ensure_blog_charset( $charset ) {
return empty( $charset ) ? 'UTF-8' : $charset;
}
91 changes: 91 additions & 0 deletions tests/phpunit/tests/functions/ensureBlogCharset.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
<?php
/**
* Tests for the _wp_ensure_blog_charset() function.
*
* @group functions
* @group charset
*
* @covers ::_wp_ensure_blog_charset
*/
class Tests_Functions_EnsureBlogCharset extends WP_UnitTestCase {
/**
* Tests that _wp_ensure_blog_charset returns UTF-8 for empty or null values.
*
* @ticket 25693
*
* @dataProvider data_empty_charset_values
*
* @param mixed $empty_charset Empty or null charset value.
*/
public function test_returns_utf8_for_empty_values( $empty_charset ) {
$this->assertSame(
'UTF-8',
_wp_ensure_blog_charset( $empty_charset ),
'Should return UTF-8 when the blog_charset value is empty'
);
}

/**
* Tests that _wp_ensure_blog_charset preserves valid charsets.
*
* @ticket 25693
*
* @dataProvider data_valid_charset_values
*
* @param string $valid_charset A valid charset.
*/
public function test_preserves_valid_charset_values( $valid_charset ) {
$this->assertSame(
$valid_charset,
_wp_ensure_blog_charset( $valid_charset ),
'Should preserve the original charset when it is not empty'
);
}

/**
* Data provider for empty charset values.
*
* @return array[].
*/
public static function data_empty_charset_values() {
return array(
array( null ),
array( '' ),
array( false ),
array( 0 ),
array( '0' ),
);
}

/**
* Data provider for valid charset values.
*
* @return array[].
*/
public static function data_valid_charset_values() {
return array(
array( 'UTF-8' ),
array( 'ISO-8859-1' ),
array( 'ASCII' ),
array( 'Windows-1252' ),
array( 'EUC-JP' ),
);
}

/**
* Tests the integration of _wp_ensure_blog_charset with the option filter system.
*
* @ticket 25693
*/
public function test_option_filter_integration() {
$original_charset = get_option( 'blog_charset' );

update_option( 'blog_charset', '' );
$this->assertSame( 'UTF-8', get_option( 'blog_charset' ), 'Filter should ensure UTF-8 when blog_charset is empty' );

update_option( 'blog_charset', 'ISO-8859-1' );
$this->assertSame( 'ISO-8859-1', get_option( 'blog_charset' ), 'Filter should preserve valid blog_charset' );

update_option( 'blog_charset', $original_charset );
}
}
123 changes: 123 additions & 0 deletions tests/phpunit/tests/functions/isUtf8Charset.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
<?php
/**
* Tests for the is_utf8_charset() function.
*
* @group functions
* @group charset
*
* @covers ::is_utf8_charset
*/
class Tests_Functions_IsUtf8Charset extends WP_UnitTestCase {
/**
* Tests that is_utf8_charset handles null by getting the blog_charset option.
*
* @ticket 25693
*/
public function test_handles_null_by_getting_option() {
$original_charset = get_option( 'blog_charset' );

update_option( 'blog_charset', 'UTF-8' );
$this->assertTrue(
is_utf8_charset( null ),
'Should return true when null is passed and blog_charset is UTF-8'
);

update_option( 'blog_charset', 'ISO-8859-1' );
$this->assertFalse(
is_utf8_charset( null ),
'Should return false when null is passed and blog_charset is not UTF-8'
);

update_option( 'blog_charset', $original_charset );
}

/**
* Tests that is_utf8_charset returns false for empty values.
*
* @ticket 25693
*
* @dataProvider data_empty_charset_values
*
* @param mixed $empty_charset Empty or null charset value.
*/
public function test_handles_empty_values( $empty_charset ) {
$this->assertFalse(
is_utf8_charset( $empty_charset ),
'Should return false when empty values are explicitly passed'
);
}

/**
* Tests that is_utf8_charset correctly identifies UTF-8 variants.
*
* @ticket 25693
*
* @dataProvider data_utf8_charset_variants
*
* @param string $utf8_charset A UTF-8 charset variant.
*/
public function test_identifies_utf8_variants( $utf8_charset ) {
$this->assertTrue(
is_utf8_charset( $utf8_charset ),
'Should identify valid UTF-8 charset variants'
);
}

/**
* Tests that is_utf8_charset correctly rejects non-UTF-8 charsets.
*
* @ticket 25693
*
* @dataProvider data_non_utf8_charset_values
*
* @param string $non_utf8_charset A non-UTF-8 charset.
*/
public function test_rejects_non_utf8_charsets( $non_utf8_charset ) {
$this->assertFalse(
is_utf8_charset( $non_utf8_charset ),
'Should reject non-UTF-8 charsets'
);
}

/**
* Data provider for empty charset values.
*
* @return array[].
*/
public static function data_empty_charset_values() {
return array(
array( '' ),
array( false ),
array( 0 ),
array( '0' ),
);
}

/**
* Data provider for UTF-8 charset variants.
*
* @return array[].
*/
public static function data_utf8_charset_variants() {
return array(
array( 'UTF-8' ),
array( 'utf-8' ),
array( 'utf8' ),
array( 'UTF8' ),
);
}

/**
* Data provider for non-UTF-8 charset values.
*
* @return array[].
*/
public static function data_non_utf8_charset_values() {
return array(
array( 'ISO-8859-1' ),
array( 'Windows-1252' ),
array( 'ASCII' ),
array( 'EUC-JP' ),
);
}
}
Loading