Skip to content

Commit 538afc3

Browse files
authored
Merge pull request #1038 from OwenSanzas/fix-xmltokener-unescapeentity
Fix input validation in XMLTokener.unescapeEntity()
2 parents d092d09 + 592e782 commit 538afc3

File tree

3 files changed

+165
-12
lines changed

3 files changed

+165
-12
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,6 @@ build
1616
/gradlew
1717
/gradlew.bat
1818
.gitmodules
19+
20+
# ignore compiled class files
21+
*.class

src/main/java/org/json/XMLTokener.java

Lines changed: 87 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -151,33 +151,108 @@ public Object nextEntity(@SuppressWarnings("unused") char ampersand) throws JSON
151151
/**
152152
* Unescape an XML entity encoding;
153153
* @param e entity (only the actual entity value, not the preceding & or ending ;
154-
* @return
154+
* @return the unescaped entity string
155+
* @throws JSONException if the entity is malformed
155156
*/
156-
static String unescapeEntity(String e) {
157+
static String unescapeEntity(String e) throws JSONException {
157158
// validate
158159
if (e == null || e.isEmpty()) {
159160
return "";
160161
}
161162
// if our entity is an encoded unicode point, parse it.
162163
if (e.charAt(0) == '#') {
163-
int cp;
164-
if (e.charAt(1) == 'x' || e.charAt(1) == 'X') {
165-
// hex encoded unicode
166-
cp = Integer.parseInt(e.substring(2), 16);
167-
} else {
168-
// decimal encoded unicode
169-
cp = Integer.parseInt(e.substring(1));
164+
if (e.length() < 2) {
165+
throw new JSONException("Invalid numeric character reference: &#;");
170166
}
171-
return new String(new int[] {cp},0,1);
172-
}
167+
int cp = (e.charAt(1) == 'x' || e.charAt(1) == 'X')
168+
? parseHexEntity(e)
169+
: parseDecimalEntity(e);
170+
return new String(new int[] {cp}, 0, 1);
171+
}
173172
Character knownEntity = entity.get(e);
174-
if(knownEntity==null) {
173+
if (knownEntity == null) {
175174
// we don't know the entity so keep it encoded
176175
return '&' + e + ';';
177176
}
178177
return knownEntity.toString();
179178
}
180179

180+
/**
181+
* Parse a hexadecimal numeric character reference (e.g., "&#xABC;").
182+
* @param e entity string starting with '#' (e.g., "#x1F4A9")
183+
* @return the Unicode code point
184+
* @throws JSONException if the format is invalid
185+
*/
186+
private static int parseHexEntity(String e) throws JSONException {
187+
// hex encoded unicode - need at least one hex digit after #x
188+
if (e.length() < 3) {
189+
throw new JSONException("Invalid hex character reference: missing hex digits in &#" + e.substring(1) + ";");
190+
}
191+
String hex = e.substring(2);
192+
if (!isValidHex(hex)) {
193+
throw new JSONException("Invalid hex character reference: &#" + e.substring(1) + ";");
194+
}
195+
try {
196+
return Integer.parseInt(hex, 16);
197+
} catch (NumberFormatException nfe) {
198+
throw new JSONException("Invalid hex character reference: &#" + e.substring(1) + ";", nfe);
199+
}
200+
}
201+
202+
/**
203+
* Parse a decimal numeric character reference (e.g., "&#123;").
204+
* @param e entity string starting with '#' (e.g., "#123")
205+
* @return the Unicode code point
206+
* @throws JSONException if the format is invalid
207+
*/
208+
private static int parseDecimalEntity(String e) throws JSONException {
209+
String decimal = e.substring(1);
210+
if (!isValidDecimal(decimal)) {
211+
throw new JSONException("Invalid decimal character reference: &#" + decimal + ";");
212+
}
213+
try {
214+
return Integer.parseInt(decimal);
215+
} catch (NumberFormatException nfe) {
216+
throw new JSONException("Invalid decimal character reference: &#" + decimal + ";", nfe);
217+
}
218+
}
219+
220+
/**
221+
* Check if a string contains only valid hexadecimal digits.
222+
* @param s the string to check
223+
* @return true if s is non-empty and contains only hex digits (0-9, a-f, A-F)
224+
*/
225+
private static boolean isValidHex(String s) {
226+
if (s == null || s.isEmpty()) {
227+
return false;
228+
}
229+
for (int i = 0; i < s.length(); i++) {
230+
char c = s.charAt(i);
231+
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
232+
return false;
233+
}
234+
}
235+
return true;
236+
}
237+
238+
/**
239+
* Check if a string contains only valid decimal digits.
240+
* @param s the string to check
241+
* @return true if s is non-empty and contains only digits (0-9)
242+
*/
243+
private static boolean isValidDecimal(String s) {
244+
if (s == null || s.isEmpty()) {
245+
return false;
246+
}
247+
for (int i = 0; i < s.length(); i++) {
248+
char c = s.charAt(i);
249+
if (c < '0' || c > '9') {
250+
return false;
251+
}
252+
}
253+
return true;
254+
}
255+
181256

182257
/**
183258
* <pre>{@code

src/test/java/org/json/junit/XMLTest.java

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1426,6 +1426,81 @@ public void clarifyCurrentBehavior() {
14261426
assertEquals(jsonObject3.getJSONObject("color").getString("value"), "008E97");
14271427
}
14281428

1429+
/**
1430+
* Tests that empty numeric character reference &#; throws JSONException.
1431+
* Previously threw StringIndexOutOfBoundsException.
1432+
* Related to issue #1035
1433+
*/
1434+
@Test(expected = JSONException.class)
1435+
public void testEmptyNumericEntityThrowsJSONException() {
1436+
String xmlStr = "<a>&#;</a>";
1437+
XML.toJSONObject(xmlStr);
1438+
}
1439+
1440+
/**
1441+
* Tests that malformed decimal entity &#txx; throws JSONException.
1442+
* Previously threw NumberFormatException.
1443+
* Related to issue #1036
1444+
*/
1445+
@Test(expected = JSONException.class)
1446+
public void testInvalidDecimalEntityThrowsJSONException() {
1447+
String xmlStr = "<a>&#txx;</a>";
1448+
XML.toJSONObject(xmlStr);
1449+
}
1450+
1451+
/**
1452+
* Tests that empty hex entity &#x; throws JSONException.
1453+
* Validates proper input validation for hex entities.
1454+
*/
1455+
@Test(expected = JSONException.class)
1456+
public void testEmptyHexEntityThrowsJSONException() {
1457+
String xmlStr = "<a>&#x;</a>";
1458+
XML.toJSONObject(xmlStr);
1459+
}
1460+
1461+
/**
1462+
* Tests that invalid hex entity &#xGGG; throws JSONException.
1463+
* Validates hex digit validation.
1464+
*/
1465+
@Test(expected = JSONException.class)
1466+
public void testInvalidHexEntityThrowsJSONException() {
1467+
String xmlStr = "<a>&#xGGG;</a>";
1468+
XML.toJSONObject(xmlStr);
1469+
}
1470+
1471+
/**
1472+
* Tests that valid decimal numeric entity &#65; works correctly.
1473+
* Should decode to character 'A'.
1474+
*/
1475+
@Test
1476+
public void testValidDecimalEntity() {
1477+
String xmlStr = "<a>&#65;</a>";
1478+
JSONObject jsonObject = XML.toJSONObject(xmlStr);
1479+
assertEquals("A", jsonObject.getString("a"));
1480+
}
1481+
1482+
/**
1483+
* Tests that valid hex numeric entity &#x41; works correctly.
1484+
* Should decode to character 'A'.
1485+
*/
1486+
@Test
1487+
public void testValidHexEntity() {
1488+
String xmlStr = "<a>&#x41;</a>";
1489+
JSONObject jsonObject = XML.toJSONObject(xmlStr);
1490+
assertEquals("A", jsonObject.getString("a"));
1491+
}
1492+
1493+
/**
1494+
* Tests that valid uppercase hex entity &#X41; works correctly.
1495+
* Should decode to character 'A'.
1496+
*/
1497+
@Test
1498+
public void testValidUppercaseHexEntity() {
1499+
String xmlStr = "<a>&#X41;</a>";
1500+
JSONObject jsonObject = XML.toJSONObject(xmlStr);
1501+
assertEquals("A", jsonObject.getString("a"));
1502+
}
1503+
14291504
}
14301505

14311506

0 commit comments

Comments
 (0)