Skip to content

Commit 007a08c

Browse files
committed
Start extracting tags from code prior to highlighting
1 parent b9511a3 commit 007a08c

1 file changed

Lines changed: 41 additions & 1 deletion

File tree

syntax-highlighting-code-block.php

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -622,9 +622,47 @@ function render_block( $attributes, $content ) {
622622

623623
$language = $attributes['language'];
624624

625+
$content = $matches['content'];
626+
// @todo We need to remove all tags but remember the byte positions, and also we need to do the same for entities.
627+
628+
$token_offets = [];
629+
630+
$token_regexps = [
631+
'(?P<start_tag><\w[^<]*?>)',
632+
'(?P<end_tag><\\/\w[^<]*?>)',
633+
'(?P<entity>&(?:\w+|#(?:\d+|x[0-9a-fA-F]+));)',
634+
//'(?P<named_entity>&\w+;)',
635+
//'(?P<decimal_entity>&#\d+;)',
636+
//'(?P<hex_entity>&#x[0-9a-fA-F]+;)',
637+
];
638+
639+
$offset_diff = 0;
640+
641+
$pattern = '/' . implode( '|', $token_regexps ) . '/s';
642+
643+
$content = preg_replace_callback(
644+
$pattern,
645+
static function ( $matches ) use ( $token_offets, $offset_diff ) {
646+
$original = $matches[0][0];
647+
648+
if ( $matches['start_tag'][1] !== -1 ) {
649+
$replacement = '';
650+
} elseif ( $matches['end_tag'][1] !== -1 ) {
651+
$replacement = '';
652+
} elseif ( $matches['entity'][1] !== -1 ) {
653+
$replacement = html_entity_decode( $matches['entity'][0], ENT_QUOTES | ENT_HTML5, 'utf-8' );
654+
}
655+
656+
return $replacement;
657+
},
658+
$content,
659+
-1,
660+
$count,
661+
PREG_OFFSET_CAPTURE
662+
);
663+
625664
// Note that the decoding here is reversed later in the escape() function.
626665
// @todo Now that Code blocks may have markup (e.g. bolding, italics, and hyperlinks), these need to be removed and then restored after highlighting is completed.
627-
$content = html_entity_decode( $matches['content'], ENT_QUOTES );
628666

629667
// Convert from Prism.js languages names.
630668
if ( 'clike' === $language ) {
@@ -662,6 +700,8 @@ function render_block( $attributes, $content ) {
662700
set_transient( $transient_key, compact( 'content', 'attributes' ), MONTH_IN_SECONDS );
663701
}
664702

703+
// @todo The tags and entities extracted from $content need to be restored now.
704+
665705
return inject_markup( $matches['pre_start_tag'], $matches['code_start_tag'], $attributes, $content );
666706
} catch ( Exception $e ) {
667707
return sprintf(

0 commit comments

Comments
 (0)