1616use PHPStan \Type \Accessory \AccessoryDecimalIntegerStringType ;
1717use PHPStan \Type \Accessory \AccessoryNonEmptyStringType ;
1818use PHPStan \Type \Accessory \AccessoryNonFalsyStringType ;
19+ use PHPStan \Type \Accessory \AccessoryNumericStringType ;
1920use PHPStan \Type \Constant \ConstantStringType ;
2021use PHPStan \Type \IntersectionType ;
2122use PHPStan \Type \StringType ;
2526use function array_key_exists ;
2627use function array_values ;
2728use function count ;
29+ use function ctype_digit ;
2830use function in_array ;
2931use function is_int ;
3032use function preg_replace ;
@@ -129,8 +131,11 @@ public function parseGroups(string $regex): ?RegexAstWalkResult
129131 $ subjectAsGroupResult ->isDecimalInteger ()->yes ()
130132 && $ this ->regexExpressionHelper ->isAnchoredPattern ($ regex )
131133 ) {
134+ $ accessory = $ subjectAsGroupResult ->isDecimalIntegerLeadingZeroSafe ()
135+ ? new AccessoryDecimalIntegerStringType ()
136+ : new AccessoryNumericStringType ();
132137 $ astWalkResult = $ astWalkResult ->withSubjectBaseType (
133- new IntersectionType ([new StringType (), new AccessoryDecimalIntegerStringType () ]),
138+ new IntersectionType ([new StringType (), $ accessory ]),
134139 );
135140 } elseif ($ subjectAsGroupResult ->isNonFalsy ()->yes ()) {
136141 $ astWalkResult = $ astWalkResult ->withSubjectBaseType (
@@ -427,15 +432,21 @@ private function createGroupType(TreeNode $group, bool $maybeConstant, string $p
427432 }
428433
429434 if ($ walkResult ->isDecimalInteger ()->yes ()) {
435+ // a series of digits beginning with "0" (e.g. "007") or a "-0" is not a canonical
436+ // decimal integer string, but it is still a numeric string
437+ $ accessory = $ walkResult ->isDecimalIntegerLeadingZeroSafe ()
438+ ? new AccessoryDecimalIntegerStringType ()
439+ : new AccessoryNumericStringType ();
440+
430441 if ($ walkResult ->isNonFalsy ()->yes ()) {
431442 return new IntersectionType ([
432443 new StringType (),
433- new AccessoryDecimalIntegerStringType () ,
444+ $ accessory ,
434445 new AccessoryNonFalsyStringType (),
435446 ]);
436447 }
437448
438- $ result = new IntersectionType ([new StringType (), new AccessoryDecimalIntegerStringType () ]);
449+ $ result = new IntersectionType ([new StringType (), $ accessory ]);
439450 if (!$ walkResult ->isNonEmpty ()->yes ()) {
440451 return new UnionType ([new ConstantStringType ('' ), $ result ]);
441452 }
@@ -517,7 +528,7 @@ private function walkGroupAst(
517528 }
518529 }
519530 } elseif ($ ast ->getId () === '#quantification ' ) {
520- [$ min ] = $ this ->getQuantificationRange ($ ast );
531+ [$ min, $ max ] = $ this ->getQuantificationRange ($ ast );
521532
522533 if ($ min === 0 ) {
523534 $ walkResult = $ walkResult ->inOptionalQuantification (true );
@@ -532,39 +543,77 @@ private function walkGroupAst(
532543 }
533544 }
534545
535- $ walkResult = $ walkResult ->onlyLiterals (null );
536- } elseif ($ ast ->getId () === '#class ' && $ walkResult ->getOnlyLiterals () !== null ) {
546+ // signal the quantified atom whether it may appear more than once
547+ // (so a leading zero may be followed by more digits) and whether it
548+ // is optional, which is consumed when the atom is processed.
549+ $ walkResult = $ walkResult
550+ ->decimalAtomRepeats ($ max === null || $ max >= 2 )
551+ ->decimalAtomOptional ($ min === 0 )
552+ ->onlyLiterals (null );
553+ } elseif (in_array ($ ast ->getId (), ['#class ' , '#negativeclass ' ], true )) {
537554 $ inClass = true ;
538555
539- $ newLiterals = [];
540- foreach ($ children as $ child ) {
541- $ oldLiterals = $ walkResult ->getOnlyLiterals ();
556+ [$ atomRepeats , $ atomOptional , $ walkResult ] = $ this ->consumeDecimalAtomQuantification ($ walkResult );
542557
543- $ this ->getLiteralValue ($ child , $ oldLiterals , true , $ patternModifiers , true );
544- foreach ($ oldLiterals ?? [] as $ oldLiteral ) {
545- $ newLiterals [] = $ oldLiteral ;
558+ [$ classAllDigit , $ classCanBeZero ] = $ ast ->getId () === '#class '
559+ ? $ this ->getClassDecimalInfo ($ ast )
560+ : [false , false ];
561+
562+ if ($ classAllDigit ) {
563+ if ($ walkResult ->isDecimalInteger ()->maybe ()) {
564+ $ walkResult = $ walkResult ->decimalInteger (TrinaryLogic::createYes ());
565+ }
566+ $ walkResult = $ this ->applyDecimalDigitPosition ($ walkResult , $ classCanBeZero , !$ atomOptional , $ atomRepeats );
567+ } else {
568+ // [^0-9] should not parse as decimal-int-string, and [^list-everything-but-numbers] is
569+ // technically doable but really silly compared to just \d so we can safely assume the string
570+ // is not a decimal integer for negative classes (and classes containing non-digits).
571+ $ walkResult = $ walkResult ->decimalInteger (TrinaryLogic::createNo ());
572+ }
573+
574+ if ($ ast ->getId () === '#class ' && $ walkResult ->getOnlyLiterals () !== null ) {
575+ $ newLiterals = [];
576+ foreach ($ children as $ child ) {
577+ $ oldLiterals = $ walkResult ->getOnlyLiterals ();
578+
579+ $ this ->getLiteralValue ($ child , $ oldLiterals , true , $ patternModifiers , true );
580+ foreach ($ oldLiterals ?? [] as $ oldLiteral ) {
581+ $ newLiterals [] = $ oldLiteral ;
582+ }
546583 }
584+ $ walkResult = $ walkResult ->onlyLiterals ($ newLiterals );
585+ } else {
586+ $ walkResult = $ walkResult ->onlyLiterals (null );
547587 }
548- $ walkResult = $ walkResult ->onlyLiterals ($ newLiterals );
549588 } elseif ($ ast ->getId () === 'token ' ) {
550589 $ onlyLiterals = $ walkResult ->getOnlyLiterals ();
551590 $ literalValue = $ this ->getLiteralValue ($ ast , $ onlyLiterals , !$ inClass , $ patternModifiers , false );
552591 $ walkResult = $ walkResult ->onlyLiterals ($ onlyLiterals );
553592
554593 if ($ literalValue !== null ) {
555- if (Strings::match ($ literalValue , '/^\d+$/ ' ) !== null ) {
556- if ($ walkResult ->isDecimalInteger ()->maybe ()) {
557- $ walkResult = $ walkResult ->decimalInteger (TrinaryLogic::createYes ());
594+ if (!$ inClass && $ literalValue !== '' ) {
595+ [$ atomRepeats , $ atomOptional , $ walkResult ] = $ this ->consumeDecimalAtomQuantification ($ walkResult );
596+
597+ if (Strings::match ($ literalValue , '/^\d+$/ ' ) !== null ) {
598+ if ($ walkResult ->isDecimalInteger ()->maybe ()) {
599+ $ walkResult = $ walkResult ->decimalInteger (TrinaryLogic::createYes ());
600+ }
601+ $ walkResult = $ this ->applyDecimalDigitPosition (
602+ $ walkResult ,
603+ $ literalValue [0 ] === '0 ' ,
604+ !$ atomOptional ,
605+ $ atomRepeats || strlen ($ literalValue ) > 1 ,
606+ );
607+ } elseif (
608+ $ literalValue === '- '
609+ && $ walkResult ->isDecimalInteger ()->maybe ()
610+ && !$ walkResult ->hasSeenDecimalIntegerSign ()
611+ ) {
612+ // a single leading minus sign keeps the string a decimal integer (e.g. "-1")
613+ $ walkResult = $ walkResult ->seenDecimalIntegerSign (true );
614+ } else {
615+ $ walkResult = $ walkResult ->decimalInteger (TrinaryLogic::createNo ());
558616 }
559- } elseif (
560- $ literalValue === '- '
561- && $ walkResult ->isDecimalInteger ()->maybe ()
562- && !$ walkResult ->hasSeenDecimalIntegerSign ()
563- ) {
564- // a single leading minus sign keeps the string a decimal integer (e.g. "-1")
565- $ walkResult = $ walkResult ->seenDecimalIntegerSign (true );
566- } elseif ($ literalValue !== '' ) {
567- $ walkResult = $ walkResult ->decimalInteger (TrinaryLogic::createNo ());
568617 }
569618
570619 if (!$ walkResult ->isInOptionalQuantification () && $ literalValue !== '' ) {
@@ -584,6 +633,10 @@ private function walkGroupAst(
584633 $ nonEmpty = TrinaryLogic::createYes ();
585634 $ nonFalsy = TrinaryLogic::createYes ();
586635 $ decimalInteger = TrinaryLogic::createYes ();
636+ $ branchBad = false ;
637+ $ branchLeadCanBeZero = false ;
638+ $ branchResolved = true ;
639+ $ branchSeenDigit = false ;
587640 foreach ($ children as $ child ) {
588641 $ childResult = $ this ->walkGroupAst (
589642 $ child ,
@@ -593,12 +646,20 @@ private function walkGroupAst(
593646 ->nonEmpty (TrinaryLogic::createMaybe ())
594647 ->nonFalsy (TrinaryLogic::createMaybe ())
595648 ->decimalInteger (TrinaryLogic::createMaybe ())
596- ->seenDecimalIntegerSign (false ),
649+ ->seenDecimalIntegerSign (false )
650+ ->decimalLeadingResolved (false )
651+ ->decimalSeenDigit (false )
652+ ->decimalLeadCanBeZero (false )
653+ ->decimalBad (false ),
597654 );
598655
599656 $ nonEmpty = $ nonEmpty ->and ($ childResult ->isNonEmpty ());
600657 $ nonFalsy = $ nonFalsy ->and ($ childResult ->isNonFalsy ());
601658 $ decimalInteger = $ decimalInteger ->and ($ childResult ->isDecimalInteger ());
659+ $ branchBad = $ branchBad || !$ childResult ->isDecimalIntegerLeadingZeroSafe ();
660+ $ branchLeadCanBeZero = $ branchLeadCanBeZero || $ childResult ->isDecimalLeadCanBeZero ();
661+ $ branchResolved = $ branchResolved && $ childResult ->isDecimalLeadingResolved ();
662+ $ branchSeenDigit = $ branchSeenDigit || $ childResult ->hasDecimalSeenDigit ();
602663
603664 if ($ newLiterals === null ) {
604665 continue ;
@@ -613,18 +674,26 @@ private function walkGroupAst(
613674 }
614675 }
615676
677+ // the alternation is a single conceptual digit position: it is unsafe if any
678+ // branch is internally unsafe, or if a preceding zero-able lead now gets more digits
679+ $ mergedBad = $ walkResult ->isDecimalBad ()
680+ || $ branchBad
681+ || ($ walkResult ->hasDecimalSeenDigit () && $ walkResult ->isDecimalLeadCanBeZero () && $ branchSeenDigit );
682+ $ mergedLeadCanBeZero = $ walkResult ->isDecimalLeadingResolved ()
683+ ? $ walkResult ->isDecimalLeadCanBeZero ()
684+ : ($ walkResult ->isDecimalLeadCanBeZero () || $ branchLeadCanBeZero );
685+
616686 return $ walkResult
617687 ->onlyLiterals ($ newLiterals )
618688 ->nonEmpty ($ walkResult ->isNonEmpty ()->or ($ nonEmpty ))
619689 ->nonFalsy ($ walkResult ->isNonFalsy ()->or ($ nonFalsy ))
620- ->decimalInteger (TrinaryLogic::maxMin ($ walkResult ->isDecimalInteger (), $ decimalInteger ));
621- }
622-
623- // [^0-9] should not parse as decimal-int-string, and [^list-everything-but-numbers] is technically
624- // doable but really silly compared to just \d so we can safely assume the string is not a decimal
625- // integer for negative classes
626- if ($ ast ->getId () === '#negativeclass ' ) {
627- $ walkResult = $ walkResult ->decimalInteger (TrinaryLogic::createNo ());
690+ ->decimalInteger (TrinaryLogic::maxMin ($ walkResult ->isDecimalInteger (), $ decimalInteger ))
691+ ->decimalLeadingResolved ($ walkResult ->isDecimalLeadingResolved () || $ branchResolved )
692+ ->decimalSeenDigit ($ walkResult ->hasDecimalSeenDigit () || $ branchSeenDigit )
693+ ->decimalLeadCanBeZero ($ mergedLeadCanBeZero )
694+ ->decimalBad ($ mergedBad )
695+ ->decimalAtomRepeats (false )
696+ ->decimalAtomOptional (false );
628697 }
629698
630699 foreach ($ children as $ child ) {
@@ -639,6 +708,132 @@ private function walkGroupAst(
639708 return $ walkResult ;
640709 }
641710
711+ /**
712+ * Reads and clears the transient quantification flags set on the walk result
713+ * for the next digit-producing atom.
714+ *
715+ * @return array{bool, bool, RegexGroupWalkResult} [repeats, optional, walkResult]
716+ */
717+ private function consumeDecimalAtomQuantification (RegexGroupWalkResult $ walkResult ): array
718+ {
719+ return [
720+ $ walkResult ->isDecimalAtomRepeats (),
721+ $ walkResult ->isDecimalAtomOptional (),
722+ $ walkResult ->decimalAtomRepeats (false )->decimalAtomOptional (false ),
723+ ];
724+ }
725+
726+ /**
727+ * Tracks one digit character position to detect whether a leading zero can be
728+ * followed by more digits (which would not be a canonical decimal integer).
729+ *
730+ * @param bool $canBeZero whether this digit can be "0"
731+ * @param bool $mandatory whether this digit is always present (not optional)
732+ * @param bool $repeats whether this digit may appear more than once in a row
733+ */
734+ private function applyDecimalDigitPosition (RegexGroupWalkResult $ walkResult , bool $ canBeZero , bool $ mandatory , bool $ repeats ): RegexGroupWalkResult
735+ {
736+ $ leadingResolved = $ walkResult ->isDecimalLeadingResolved ();
737+ $ leadCanBeZero = $ walkResult ->isDecimalLeadCanBeZero ();
738+ $ bad = $ walkResult ->isDecimalBad ();
739+
740+ // a digit appears after another digit position: if the lead can be a zero,
741+ // the value is a leading-zero string like "00"
742+ if ($ walkResult ->hasDecimalSeenDigit () && $ leadCanBeZero ) {
743+ $ bad = true ;
744+ }
745+
746+ // while the leading digit is not pinned down yet (only optional digits seen
747+ // so far), this digit may be the leading one
748+ if (!$ leadingResolved && $ canBeZero ) {
749+ $ leadCanBeZero = true ;
750+ }
751+
752+ // a single quantified digit repeated produces a leading-zero string like "00"
753+ if ($ repeats && $ leadCanBeZero ) {
754+ $ bad = true ;
755+ }
756+
757+ if ($ mandatory ) {
758+ $ leadingResolved = true ;
759+ }
760+
761+ return $ walkResult
762+ ->decimalSeenDigit (true )
763+ ->decimalLeadingResolved ($ leadingResolved )
764+ ->decimalLeadCanBeZero ($ leadCanBeZero )
765+ ->decimalBad ($ bad );
766+ }
767+
768+ /**
769+ * @return array{bool, bool} [allDigit, canBeZero]
770+ */
771+ private function getClassDecimalInfo (TreeNode $ classNode ): array
772+ {
773+ $ allDigit = true ;
774+ $ canBeZero = false ;
775+
776+ foreach ($ classNode ->getChildren () as $ child ) {
777+ if ($ child ->getId () === '#range ' ) {
778+ $ bounds = $ child ->getChildren ();
779+ $ from = $ this ->getClassBoundChar ($ bounds [0 ] ?? null );
780+ $ to = $ this ->getClassBoundChar ($ bounds [1 ] ?? null );
781+
782+ if ($ from === null || $ to === null || $ from > $ to || ctype_digit ($ from ) === false || ctype_digit ($ to ) === false ) {
783+ $ allDigit = false ;
784+ } elseif ($ from <= '0 ' && '0 ' <= $ to ) {
785+ $ canBeZero = true ;
786+ }
787+
788+ continue ;
789+ }
790+
791+ if ($ child ->getId () === 'token ' ) {
792+ $ token = $ child ->getValueToken ();
793+ $ value = $ child ->getValueValue ();
794+
795+ if ($ token === 'character_type ' && $ value === '\d ' ) {
796+ $ canBeZero = true ;
797+ continue ;
798+ }
799+
800+ if ($ token === 'posix_class ' && $ value === '[:digit:] ' ) {
801+ $ canBeZero = true ;
802+ continue ;
803+ }
804+
805+ if (
806+ in_array ($ token , ['literal ' , 'range ' , 'class_ ' , '_class ' ], true )
807+ && strlen ($ value ) === 1
808+ && ctype_digit ($ value )
809+ ) {
810+ if ($ value === '0 ' ) {
811+ $ canBeZero = true ;
812+ }
813+ continue ;
814+ }
815+ }
816+
817+ $ allDigit = false ;
818+ }
819+
820+ return [$ allDigit , $ canBeZero ];
821+ }
822+
823+ private function getClassBoundChar (?TreeNode $ node ): ?string
824+ {
825+ if ($ node === null || $ node ->getId () !== 'token ' ) {
826+ return null ;
827+ }
828+
829+ $ value = $ node ->getValueValue ();
830+ if (strlen ($ value ) > 1 && $ value [0 ] === '\\' ) {
831+ $ value = substr ($ value , 1 ) ?: '' ;
832+ }
833+
834+ return strlen ($ value ) === 1 ? $ value : null ;
835+ }
836+
642837 private function isMaybeEmptyNode (TreeNode $ node , string $ patternModifiers , bool &$ isNonFalsy , bool &$ isNonDecimal ): bool
643838 {
644839 if ($ node ->getId () === '#quantification ' ) {
0 commit comments