1+ package jakarta .xml .bind ;
2+
3+ import org .junit .Test ;
4+
5+ import java .math .BigInteger ;
6+
7+ import static org .junit .Assert .assertEquals ;
8+ import static org .junit .Assert .assertFalse ;
9+ import static org .junit .Assert .assertThrows ;
10+ import static org .junit .Assert .assertTrue ;
11+
12+ public class DatatypeConverterImplTest {
13+
14+ @ Test
15+ public void parseIntegerAndLongKeepBehaviorWithoutRemoveOptionalPlus () {
16+ // the following code can be used to explore all 370 Unicode characters that are considered digits
17+ // all of these are accepted by BigInteger and Long.parseLong as valid digits
18+ // while only ASCII '0' to '9' are valid from an XML Schema perspective
19+ //
20+ // note that the old implementation only accepted ASCII digits after a leading plus sign but didn't care
21+ // for the rest of the string, so there is a mixture of correct and incorrect input validation
22+
23+ // long numbersInUnicode = 0;
24+ // for (int ch = 0; ch <= Character.MAX_VALUE; ch++) {
25+ // int number = Character.digit(ch, 10);
26+ // if (number >= 0) {
27+ // numbersInUnicode++;
28+ // System.out.println("Code point " + ch + " is number " + number);
29+ // }
30+ // }
31+ // System.out.println("Total number of Unicode characters that are digits: " + numbersInUnicode);
32+
33+ assertFalse (DataTypeConverterImplOld .codePath1Visited &&
34+ DataTypeConverterImplOld .codePath2Visited &&
35+ DataTypeConverterImplOld .codePath3Visited &&
36+ DataTypeConverterImplOld .codePath4Visited );
37+
38+ // code path 1: empty string or just plus or minus
39+ String [] zeroLengthInputs = {"" , "+" , "-" };
40+ for (String input : zeroLengthInputs ) {
41+ assertThrows (NumberFormatException .class , () -> DatatypeConverterImpl ._parseInteger (input ));
42+ assertThrows (NumberFormatException .class , () -> DataTypeConverterImplOld ._parseInteger (input ));
43+ assertThrows (NumberFormatException .class , () -> DatatypeConverterImpl ._parseLong (input ));
44+ assertThrows (NumberFormatException .class , () -> DataTypeConverterImplOld ._parseLong (input ));
45+ }
46+
47+ // code path 1: single char string or no leading plus
48+ String [] testInputs = {"5" , "0" , "-3" , "1234567890" ,"-1234567890" };
49+ for (String input : testInputs ) {
50+ assertEquals (DatatypeConverterImpl ._parseInteger (input ), DataTypeConverterImplOld ._parseInteger (input ));
51+ assertEquals (DatatypeConverterImpl ._parseLong (input ), DataTypeConverterImplOld ._parseLong (input ));
52+ }
53+
54+ // code paths 2, 3, 4: leading plus followed by digit, dot, or invalid char
55+ // I would love to test all Unicode codepoints here, but unfortunately 1632 (Arabic-Indic Digit Zero) is the
56+ // first codepoint that is a valid digit but not an ASCII digit, so we limit the test to 1631 for now
57+ // for (int ch = 0; ch <= Character.MAX_VALUE; ch++) {
58+ for (int ch = 0 ; ch <= 1631 ; ch ++) {
59+ String input = "+" + (char ) ch + "123" ;
60+ System .out .println ("Testing input: " + ch );
61+ System .out .println ("Testing input: \" " + input + "\" " );
62+ if (ch >= '0' && ch <= '9' ) {
63+ // code path 2
64+ assertEquals (DatatypeConverterImpl ._parseInteger (input ), DataTypeConverterImplOld ._parseInteger (input ));
65+ assertEquals (DatatypeConverterImpl ._parseLong (input ), DataTypeConverterImplOld ._parseLong (input ));
66+ } else {
67+ // code paths 3 and 4
68+ assertThrows (NumberFormatException .class , () -> DatatypeConverterImpl ._parseInteger (input ));
69+ assertThrows (NumberFormatException .class , () -> DataTypeConverterImplOld ._parseInteger (input ));
70+ assertThrows (NumberFormatException .class , () -> DatatypeConverterImpl ._parseLong (input ));
71+ assertThrows (NumberFormatException .class , () -> DataTypeConverterImplOld ._parseLong (input ));
72+ }
73+ }
74+
75+ assertTrue (DataTypeConverterImplOld .codePath1Visited &&
76+ DataTypeConverterImplOld .codePath2Visited &&
77+ DataTypeConverterImplOld .codePath3Visited &&
78+ DataTypeConverterImplOld .codePath4Visited );
79+ }
80+
81+ // copy of old implementation plus boolean flags to track code paths
82+ private static final class DataTypeConverterImplOld {
83+ static boolean codePath1Visited = false ;
84+ static boolean codePath2Visited = false ;
85+ static boolean codePath3Visited = false ;
86+ static boolean codePath4Visited = false ;
87+
88+ public static BigInteger _parseInteger (CharSequence s ) {
89+ return new BigInteger (removeOptionalPlus (WhiteSpaceProcessor .trim (s )).toString ());
90+ }
91+
92+ public static long _parseLong (CharSequence s ) {
93+ return Long .parseLong (removeOptionalPlus (WhiteSpaceProcessor .trim (s )).toString ());
94+ }
95+
96+ private static CharSequence removeOptionalPlus (CharSequence s ) {
97+ int len = s .length ();
98+
99+ if (len <= 1 || s .charAt (0 ) != '+' ) {
100+ codePath1Visited = true ;
101+ return s ;
102+ }
103+
104+ s = s .subSequence (1 , len );
105+ char ch = s .charAt (0 );
106+ if ('0' <= ch && ch <= '9' ) {
107+ codePath2Visited = true ;
108+ return s ;
109+ }
110+ if ('.' == ch ) {
111+ codePath3Visited = true ;
112+ return s ;
113+ }
114+
115+ codePath4Visited = true ;
116+ throw new NumberFormatException ();
117+ }
118+ }
119+
120+
121+ }
0 commit comments