Skip to content

Commit c131097

Browse files
add test to compare with old implementation
1 parent f55b8bd commit c131097

File tree

1 file changed

+121
-0
lines changed

1 file changed

+121
-0
lines changed
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
package jakarta.xml.bind;
2+
3+
import org.junit.Test;
4+
5+
import java.math.BigInteger;
6+
7+
import static org.junit.Assert.assertEquals;
8+
import static org.junit.Assert.assertFalse;
9+
import static org.junit.Assert.assertThrows;
10+
import static org.junit.Assert.assertTrue;
11+
12+
public class DatatypeConverterImplTest {
13+
14+
@Test
15+
public void parseIntegerAndLongKeepBehaviorWithoutRemoveOptionalPlus() {
16+
// the following code can be used to explore all 370 Unicode characters that are considered digits
17+
// all of these are accepted by BigInteger and Long.parseLong as valid digits
18+
// while only ASCII '0' to '9' are valid from an XML Schema perspective
19+
//
20+
// note that the old implementation only accepted ASCII digits after a leading plus sign but didn't care
21+
// for the rest of the string, so there is a mixture of correct and incorrect input validation
22+
23+
// long numbersInUnicode = 0;
24+
// for (int ch = 0; ch <= Character.MAX_VALUE; ch++) {
25+
// int number = Character.digit(ch, 10);
26+
// if (number >= 0) {
27+
// numbersInUnicode++;
28+
// System.out.println("Code point " + ch + " is number " + number);
29+
// }
30+
// }
31+
// System.out.println("Total number of Unicode characters that are digits: " + numbersInUnicode);
32+
33+
assertFalse(DataTypeConverterImplOld.codePath1Visited &&
34+
DataTypeConverterImplOld.codePath2Visited &&
35+
DataTypeConverterImplOld.codePath3Visited &&
36+
DataTypeConverterImplOld.codePath4Visited);
37+
38+
// code path 1: empty string or just plus or minus
39+
String[] zeroLengthInputs = {"", "+", "-"};
40+
for (String input : zeroLengthInputs) {
41+
assertThrows(NumberFormatException.class, () -> DatatypeConverterImpl._parseInteger(input));
42+
assertThrows(NumberFormatException.class, () -> DataTypeConverterImplOld._parseInteger(input));
43+
assertThrows(NumberFormatException.class, () -> DatatypeConverterImpl._parseLong(input));
44+
assertThrows(NumberFormatException.class, () -> DataTypeConverterImplOld._parseLong(input));
45+
}
46+
47+
// code path 1: single char string or no leading plus
48+
String[] testInputs = {"5", "0", "-3", "1234567890","-1234567890"};
49+
for (String input : testInputs) {
50+
assertEquals(DatatypeConverterImpl._parseInteger(input), DataTypeConverterImplOld._parseInteger(input));
51+
assertEquals(DatatypeConverterImpl._parseLong(input), DataTypeConverterImplOld._parseLong(input));
52+
}
53+
54+
// code paths 2, 3, 4: leading plus followed by digit, dot, or invalid char
55+
// I would love to test all Unicode codepoints here, but unfortunately 1632 (Arabic-Indic Digit Zero) is the
56+
// first codepoint that is a valid digit but not an ASCII digit, so we limit the test to 1631 for now
57+
// for (int ch = 0; ch <= Character.MAX_VALUE; ch++) {
58+
for (int ch = 0; ch <= 1631; ch++) {
59+
String input = "+" + (char) ch + "123";
60+
System.out.println("Testing input: " + ch);
61+
System.out.println("Testing input: \"" + input + "\"");
62+
if (ch >= '0' && ch <= '9') {
63+
// code path 2
64+
assertEquals(DatatypeConverterImpl._parseInteger(input), DataTypeConverterImplOld._parseInteger(input));
65+
assertEquals(DatatypeConverterImpl._parseLong(input), DataTypeConverterImplOld._parseLong(input));
66+
} else {
67+
// code paths 3 and 4
68+
assertThrows(NumberFormatException.class, () -> DatatypeConverterImpl._parseInteger(input));
69+
assertThrows(NumberFormatException.class, () -> DataTypeConverterImplOld._parseInteger(input));
70+
assertThrows(NumberFormatException.class, () -> DatatypeConverterImpl._parseLong(input));
71+
assertThrows(NumberFormatException.class, () -> DataTypeConverterImplOld._parseLong(input));
72+
}
73+
}
74+
75+
assertTrue(DataTypeConverterImplOld.codePath1Visited &&
76+
DataTypeConverterImplOld.codePath2Visited &&
77+
DataTypeConverterImplOld.codePath3Visited &&
78+
DataTypeConverterImplOld.codePath4Visited);
79+
}
80+
81+
// copy of old implementation plus boolean flags to track code paths
82+
private static final class DataTypeConverterImplOld {
83+
static boolean codePath1Visited = false;
84+
static boolean codePath2Visited = false;
85+
static boolean codePath3Visited = false;
86+
static boolean codePath4Visited = false;
87+
88+
public static BigInteger _parseInteger(CharSequence s) {
89+
return new BigInteger(removeOptionalPlus(WhiteSpaceProcessor.trim(s)).toString());
90+
}
91+
92+
public static long _parseLong(CharSequence s) {
93+
return Long.parseLong(removeOptionalPlus(WhiteSpaceProcessor.trim(s)).toString());
94+
}
95+
96+
private static CharSequence removeOptionalPlus(CharSequence s) {
97+
int len = s.length();
98+
99+
if (len <= 1 || s.charAt(0) != '+') {
100+
codePath1Visited = true;
101+
return s;
102+
}
103+
104+
s = s.subSequence(1, len);
105+
char ch = s.charAt(0);
106+
if ('0' <= ch && ch <= '9') {
107+
codePath2Visited = true;
108+
return s;
109+
}
110+
if ('.' == ch) {
111+
codePath3Visited = true;
112+
return s;
113+
}
114+
115+
codePath4Visited = true;
116+
throw new NumberFormatException();
117+
}
118+
}
119+
120+
121+
}

0 commit comments

Comments
 (0)