Skip to content

Commit 487bf7e

Browse files
committed
Performance improvements
1 parent 51b146d commit 487bf7e

File tree

1 file changed

+143
-111
lines changed

1 file changed

+143
-111
lines changed

src/ParsedownExtended.php

Lines changed: 143 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ class ParsedownExtended extends \ParsedownExtendedParentAlias
5151
/** @var array|null $emojiMap Cached emoji map for emoji replacements */
5252
private ?array $emojiMap = null;
5353

54+
/** @var bool $predefinedAbbreviationsAdded Tracks whether predefined abbreviations have been merged */
55+
private bool $predefinedAbbreviationsAdded = false;
56+
5457
/** @var array CONFIG_SCHEMA_DEFAULT Default configuration schema */
5558
private const CONFIG_SCHEMA_DEFAULT = [
5659
'abbreviations' => [
@@ -526,34 +529,54 @@ protected function processLinkElement($Excerpt)
526529
*/
527530
private function isExternalLink(string $href): bool
528531
{
529-
// Determine if the link starts with a scheme or is protocol-relative
530-
$isProtocolRelative = strncmp($href, '//', 2) === 0;
531-
$isHttp = stripos($href, 'http://') === 0;
532-
$isHttps = stripos($href, 'https://') === 0;
533-
534-
if (!$isProtocolRelative && !$isHttp && !$isHttps) {
535-
return false; // Relative URL
532+
// Early return for relative URLs (not starting with http(s):// or //)
533+
$protocolRelative = strncmp($href, '//', 2);
534+
if (
535+
$protocolRelative !== 0 &&
536+
stripos($href, 'http://') !== 0 &&
537+
stripos($href, 'https://') !== 0
538+
) {
539+
return false;
536540
}
537541

538-
// Extract the host part of the URL
539-
$host = parse_url($href, PHP_URL_HOST);
542+
// Normalize protocol-relative URLs for parse_url
543+
$url = ($protocolRelative === 0) ? 'http:' . $href : $href;
544+
$host = parse_url($url, PHP_URL_HOST);
540545
if (!$host) {
541546
return false;
542547
}
543548

544-
// Check if the domain matches the current domain
545-
if (isset($_SERVER['HTTP_HOST']) && $host === $_SERVER['HTTP_HOST']) {
546-
return false;
549+
// Normalize host (lowercase, strip www.)
550+
$host = strtolower($host);
551+
if (strpos($host, 'www.') === 0) {
552+
$host = substr($host, 4);
547553
}
548554

549-
// Remove 'www.' from the host to get the base domain name
550-
$domain = (strpos($host, 'www.') === 0) ? substr($host, 4) : $host;
555+
// Normalize current host
556+
$currentHost = $_SERVER['HTTP_HOST'] ?? '';
557+
$currentHost = strtolower($currentHost);
558+
if (strpos($currentHost, 'www.') === 0) {
559+
$currentHost = substr($currentHost, 4);
560+
}
561+
if ($host === $currentHost) {
562+
return false;
563+
}
551564

552-
// Get the list of internal hosts from the configuration
553-
$internalHosts = $this->config()->get('links.external_links.internal_hosts');
565+
// Use static cache for internal hosts set
566+
static $internalHostsSet = null;
567+
if ($internalHostsSet === null) {
568+
$internalHostsSet = [];
569+
$internalHosts = $this->config()->get('links.external_links.internal_hosts');
570+
foreach ($internalHosts as $h) {
571+
$h = strtolower($h);
572+
if (strpos($h, 'www.') === 0) {
573+
$h = substr($h, 4);
574+
}
575+
$internalHostsSet[$h] = true;
576+
}
577+
}
554578

555-
// If the link is not in the list of internal hosts, it is external
556-
return !in_array($domain, $internalHosts, true);
579+
return !isset($internalHostsSet[$host]);
557580
}
558581

559582
/**
@@ -2295,7 +2318,8 @@ protected function blockHeader($Line)
22952318
$Block['element']['attributes'] = ['id' => $id];
22962319

22972320
// Check if the heading level should be included in the Table of Contents (TOC)
2298-
if (!in_array($level, $config->get('toc.levels'))) {
2321+
// Also ensure we skip adding it to TOC if it is disabled in the config
2322+
if (!$config->get('toc') || !in_array($level, $config->get('toc.levels'))) {
22992323
return $Block; // Return the block if it should not be part of the TOC
23002324
}
23012325

@@ -2351,7 +2375,8 @@ protected function blockSetextHeader($Line, $Block = null)
23512375
$Block['element']['attributes'] = ['id' => $id];
23522376

23532377
// Check if the heading level should be included in the Table of Contents (TOC)
2354-
if (!in_array($level, $config->get('toc.levels'))) {
2378+
// Also ensure we skip adding it to TOC if it is disabled in the config
2379+
if (!$config->get('toc') || !in_array($level, $config->get('toc.levels'))) {
23552380
return $Block; // Return the block if it should not be part of the TOC
23562381
}
23572382

@@ -2622,6 +2647,7 @@ public function body(string $text): string
26222647
$this->contentsListArray = [];
26232648
$this->contentsListString = '';
26242649
$this->firstHeadLevel = 0;
2650+
$this->predefinedAbbreviationsAdded = false;
26252651

26262652
$text = $this->encodeTag($text); // Escapes ToC tag temporarily
26272653
$html = parent::text($text); // Parses the markdown text
@@ -2642,6 +2668,7 @@ public function body(string $text): string
26422668
*/
26432669
public function contentsList(string $type_return = 'string'): string
26442670
{
2671+
26452672
switch (strtolower($type_return)) {
26462673
case 'string':
26472674
return $this->contentsListString ? $this->body($this->contentsListString) : '';
@@ -3144,9 +3171,12 @@ protected function unmarkedText($text)
31443171
{
31453172
$config = $this->config();
31463173

3147-
// Add predefined abbreviations to the definition data
3148-
foreach ($config->get('abbreviations.predefined') as $abbreviation => $description) {
3149-
$this->DefinitionData['Abbreviation'][$abbreviation] = $description;
3174+
if (!$this->predefinedAbbreviationsAdded) {
3175+
// Add predefined abbreviations to the definition data once per parse
3176+
foreach ($config->get('abbreviations.predefined') as $abbreviation => $description) {
3177+
$this->DefinitionData['Abbreviation'][$abbreviation] = $description;
3178+
}
3179+
$this->predefinedAbbreviationsAdded = true;
31503180
}
31513181

31523182
// Call the parent method to handle the rest of the text processing
@@ -3500,72 +3530,76 @@ protected function element(array $Element)
35003530
public function line($text, $nonNestables = [])
35013531
{
35023532
$markup = '';
3533+
$inlineMarkerList = $this->inlineMarkerList;
3534+
$InlineTypes = $this->InlineTypes;
3535+
$nonNestablesSet = $nonNestables ? array_flip($nonNestables) : [];
35033536

3504-
// Search for inline markers in the text
3505-
while ($Excerpt = strpbrk((string)$text, $this->inlineMarkerList)) {
3506-
$marker = $Excerpt[0];
3537+
while (true) {
3538+
$ExcerptStr = strpbrk((string)$text, $inlineMarkerList);
3539+
if ($ExcerptStr === false) {
3540+
// No more markers, process the rest and break
3541+
$markup .= $this->unmarkedText($text);
3542+
break;
3543+
}
3544+
3545+
$marker = $ExcerptStr[0];
35073546
$markerPosition = strpos($text, $marker);
35083547

3509-
// Get the character before the marker
3548+
// Prepare excerpt context
35103549
$before = $markerPosition > 0 ? $text[$markerPosition - 1] : '';
3511-
3512-
// Create an excerpt array with context for inline processing
35133550
$Excerpt = [
3514-
'text' => $Excerpt,
3551+
'text' => $ExcerptStr,
35153552
'context' => $text,
35163553
'before' => $before,
35173554
'parent' => $this,
35183555
];
35193556

3520-
// Iterate through possible inline types for the marker
3521-
foreach ($this->InlineTypes[$marker] as $inlineType) {
3522-
if (!empty($nonNestables) && in_array($inlineType, $nonNestables)) {
3523-
continue; // Skip non-nestable inline types in this context
3557+
// Try each inline type for this marker
3558+
foreach ($InlineTypes[$marker] as $inlineType) {
3559+
if (isset($nonNestablesSet[$inlineType])) {
3560+
continue;
35243561
}
35253562

3526-
// Attempt to create an inline element using the handler
3527-
$Inline = $this->{'inline'.$inlineType}($Excerpt);
3563+
$handler = 'inline' . $inlineType;
3564+
$Inline = $this->$handler($Excerpt);
35283565

35293566
if (!isset($Inline)) {
3530-
continue; // If no inline element was found, continue to the next type
3567+
continue;
35313568
}
35323569

35333570
if (isset($Inline['position']) && $Inline['position'] > $markerPosition) {
3534-
continue; // Ensure the inline belongs to the current marker
3571+
continue;
35353572
}
35363573

3537-
// Set a default position if not provided
3538-
if (!isset($Inline['position'])) {
3539-
$Inline['position'] = $markerPosition;
3540-
}
3574+
$Inline['position'] = $Inline['position'] ?? $markerPosition;
35413575

3542-
// Add non-nestables to the inline element
3543-
foreach ($nonNestables as $non_nestable) {
3544-
$Inline['element']['nonNestables'][] = $non_nestable;
3576+
// Only add nonNestables if present
3577+
if ($nonNestables) {
3578+
foreach ($nonNestables as $non_nestable) {
3579+
$Inline['element']['nonNestables'][] = $non_nestable;
3580+
}
35453581
}
35463582

3547-
// Compile the text that comes before the inline element
3548-
$unmarkedText = substr($text, 0, $Inline['position']);
3549-
$markup .= $this->unmarkedText($unmarkedText);
3583+
// Add text before the inline element
3584+
if ($Inline['position'] > 0) {
3585+
$markup .= $this->unmarkedText(substr($text, 0, $Inline['position']));
3586+
}
35503587

3551-
// Compile the inline element
3588+
// Add the inline element
35523589
$markup .= $Inline['markup'] ?? $this->element($Inline['element']);
35533590

3554-
// Remove the processed text from the input
3591+
// Remove processed text
35553592
$text = substr($text, $Inline['position'] + $Inline['extent']);
35563593

3557-
continue 2; // Continue parsing the rest of the text
3594+
// Continue with the rest of the text
3595+
continue 2;
35583596
}
35593597

3560-
// If no valid inline marker was found, add the marker to the markup
3561-
$unmarkedText = substr($text, 0, $markerPosition + 1);
3562-
$markup .= $this->unmarkedText($unmarkedText);
3598+
// No inline found, treat marker as plain text
3599+
$markup .= $this->unmarkedText(substr($text, 0, $markerPosition + 1));
35633600
$text = substr($text, $markerPosition + 1);
35643601
}
35653602

3566-
// Compile the remaining text
3567-
$markup .= $this->unmarkedText($text);
3568-
35693603
return $markup;
35703604
}
35713605

@@ -3589,86 +3623,84 @@ public function line($text, $nonNestables = [])
35893623
protected function lineElements($text, $nonNestables = []): array
35903624
{
35913625
$Elements = [];
3626+
$inlineMarkerList = $this->inlineMarkerList;
3627+
$InlineTypes = $this->InlineTypes;
3628+
$nonNestablesSet = $nonNestables ? array_flip($nonNestables) : [];
3629+
3630+
$textLen = strlen($text);
3631+
$offset = 0;
3632+
3633+
while ($offset < $textLen) {
3634+
$ExcerptStr = strpbrk(substr($text, $offset), $inlineMarkerList);
3635+
if ($ExcerptStr === false) {
3636+
// No more markers, process the rest and break
3637+
if ($offset < $textLen) {
3638+
$InlineText = $this->inlineText(substr($text, $offset));
3639+
$Elements[] = $InlineText['element'];
3640+
}
3641+
break;
3642+
}
35923643

3593-
// If non-nestable elements are provided, convert them to associative array for fast lookup
3594-
$nonNestables = (
3595-
empty($nonNestables)
3596-
? []
3597-
: array_combine($nonNestables, $nonNestables)
3598-
);
3599-
3600-
// $Excerpt represents the first occurrence of an inline marker in the text
3601-
while ($Excerpt = strpbrk($text, $this->inlineMarkerList)) {
3602-
$marker = $Excerpt[0]; // The detected marker
3603-
$markerPosition = strlen($text) - strlen($Excerpt); // Calculate the marker position in the text
3644+
$marker = $ExcerptStr[0];
3645+
$markerPosition = strpos($text, $marker, $offset);
36043646

3605-
// Get the character before the marker (if any)
36063647
$before = $markerPosition > 0 ? $text[$markerPosition - 1] : '';
3648+
$Excerpt = [
3649+
'text' => substr($text, $markerPosition),
3650+
'context' => $text,
3651+
'before' => $before,
3652+
];
36073653

3608-
// Prepare an excerpt for further processing
3609-
$Excerpt = ['text' => $Excerpt, 'context' => $text, 'before' => $before];
3610-
3611-
// Process all inline types associated with this marker
3612-
foreach ($this->InlineTypes[$marker] as $inlineType) {
3613-
// Skip inline types that are non-nestable within this context
3614-
if (isset($nonNestables[$inlineType])) {
3654+
foreach ($InlineTypes[$marker] as $inlineType) {
3655+
if (isset($nonNestablesSet[$inlineType])) {
36153656
continue;
36163657
}
36173658

3618-
// Call the corresponding inline processing function
36193659
$Inline = $this->{"inline$inlineType"}($Excerpt);
36203660

3621-
// If no valid inline element was found, continue to the next inline type
36223661
if (!isset($Inline)) {
36233662
continue;
36243663
}
36253664

3626-
// Ensure the inline element belongs to the current marker
3627-
if (isset($Inline['position']) && $Inline['position'] > $markerPosition) {
3665+
if (isset($Inline['position']) && $Inline['position'] > ($markerPosition - $offset)) {
36283666
continue;
36293667
}
36303668

3631-
// Set default inline position if not specified
3632-
if (!isset($Inline['position'])) {
3633-
$Inline['position'] = $markerPosition;
3634-
}
3635-
3636-
// Inherit non-nestable elements from the current context
3637-
$Inline['element']['nonNestables'] = isset($Inline['element']['nonNestables'])
3638-
? array_merge($Inline['element']['nonNestables'], $nonNestables)
3639-
: $nonNestables;
3669+
$Inline['position'] = $Inline['position'] ?? 0;
36403670

3641-
// Get the text before the inline marker
3642-
$unmarkedText = substr($text, 0, $Inline['position']);
3671+
// Only add nonNestables if present
3672+
if ($nonNestablesSet) {
3673+
$Inline['element']['nonNestables'] = isset($Inline['element']['nonNestables'])
3674+
? array_merge($Inline['element']['nonNestables'], array_keys($nonNestablesSet))
3675+
: array_keys($nonNestablesSet);
3676+
}
36433677

3644-
// Process and add the unmarked text as an element
3645-
$InlineText = $this->inlineText($unmarkedText);
3646-
$Elements[] = $InlineText['element'];
3678+
// Add unmarked text before the inline element
3679+
if ($Inline['position'] > 0) {
3680+
$unmarkedText = substr($text, $offset, $Inline['position']);
3681+
if ($unmarkedText !== '') {
3682+
$InlineText = $this->inlineText($unmarkedText);
3683+
$Elements[] = $InlineText['element'];
3684+
}
3685+
}
36473686

3648-
// Process and add the inline element
3687+
// Add the inline element
36493688
$Elements[] = $this->extractElement($Inline);
36503689

3651-
// Remove the processed portion from the text and continue parsing
3652-
$text = substr($text, $Inline['position'] + $Inline['extent']);
3653-
3690+
// Move offset past the processed inline element
3691+
$offset = $markerPosition + $Inline['position'] + $Inline['extent'];
36543692
continue 2;
36553693
}
36563694

3657-
// If no valid inline element was found for the marker, treat it as plain text
3658-
$unmarkedText = substr($text, 0, $markerPosition + 1);
3659-
3660-
// Process and add the unmarked text as an element
3661-
$InlineText = $this->inlineText($unmarkedText);
3662-
$Elements[] = $InlineText['element'];
3663-
3664-
// Remove the processed portion from the text
3665-
$text = substr($text, $markerPosition + 1);
3695+
// No inline found, treat marker as plain text
3696+
$plainText = substr($text, $offset, $markerPosition - $offset + 1);
3697+
if ($plainText !== '') {
3698+
$InlineText = $this->inlineText($plainText);
3699+
$Elements[] = $InlineText['element'];
3700+
}
3701+
$offset = $markerPosition + 1;
36663702
}
36673703

3668-
// Process any remaining text after all markers
3669-
$InlineText = $this->inlineText($text);
3670-
$Elements[] = $InlineText['element'];
3671-
36723704
// Set the `autobreak` property for each element, defaulting to false if not already set
36733705
foreach ($Elements as &$Element) {
36743706
if (!isset($Element['autobreak'])) {

0 commit comments

Comments
 (0)