Skip to content

Commit e38b0a3

Browse files
joakimegregw
andauthored
Issue #13683 - UrlParameterDecoder fixed for mixed bytes/chars pct-encoding usage. (#13684)
* Cleanup CharsetStringBuilder.DecoderStringBuilder * Introduce more CharsetStringBuilderTest methods * Introduce UrlParameterDecoderTest.testSjisNonStandardForm() --------- Co-authored-by: Greg Wilkins <[email protected]>
1 parent 74763ad commit e38b0a3

File tree

3 files changed

+293
-61
lines changed

3 files changed

+293
-61
lines changed

jetty-core/jetty-util/src/main/java/org/eclipse/jetty/util/CharsetStringBuilder.java

Lines changed: 50 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,10 @@
2222
import java.util.Arrays;
2323
import java.util.Objects;
2424

25-
import org.slf4j.Logger;
26-
import org.slf4j.LoggerFactory;
27-
2825
/**
2926
* <p>Build a string from a sequence of bytes and/or characters.</p>
3027
* <p>Implementations of this interface are optimized for processing a mix of calls to already decoded
31-
* character based appends (e.g. {@link #append(char)} and calls to undecoded byte methods (e.g. {@link #append(byte)}.
28+
* character based appends (e.g. {@link #append(char)}) and calls to undecoded byte methods (e.g. {@link #append(byte)}).
3229
* This is particularly useful for decoding % encoded strings that are mostly already decoded but may contain
3330
* escaped byte sequences that are not decoded. The standard {@link CharsetDecoder} API is not well suited for this
3431
* use-case.</p>
@@ -274,10 +271,9 @@ public void reset()
274271

275272
class DecoderStringBuilder implements CharsetStringBuilder
276273
{
277-
private static final Logger LOG = LoggerFactory.getLogger(DecoderStringBuilder.class);
278274
private final CharsetDecoder _decoder;
279275
private final StringBuilder _stringBuilder = new StringBuilder(32);
280-
private ByteBuffer _buffer = ByteBuffer.allocate(32);
276+
private ByteBuffer _buffer;
281277

282278
public DecoderStringBuilder(CharsetDecoder charsetDecoder, CodingErrorAction onMalformedInput, CodingErrorAction onUnmappableCharacter)
283279
{
@@ -288,11 +284,18 @@ public DecoderStringBuilder(CharsetDecoder charsetDecoder, CodingErrorAction onM
288284

289285
private void ensureSpace(int needed)
290286
{
291-
int space = _buffer.remaining();
292-
if (space < needed)
287+
if (_buffer == null)
288+
{
289+
_buffer = ByteBuffer.allocate(needed + 32);
290+
}
291+
else
293292
{
294-
int position = _buffer.position();
295-
_buffer = ByteBuffer.wrap(Arrays.copyOf(_buffer.array(), _buffer.capacity() + needed - space + 32)).position(position);
293+
int space = _buffer.remaining();
294+
if (space < needed)
295+
{
296+
int position = _buffer.position();
297+
_buffer = ByteBuffer.wrap(Arrays.copyOf(_buffer.array(), _buffer.capacity() + needed - space + 32)).position(position);
298+
}
296299
}
297300
}
298301

@@ -306,43 +309,51 @@ public void append(byte b)
306309
@Override
307310
public void append(char c)
308311
{
309-
if (_buffer.position() > 0)
312+
if (_buffer != null && _buffer.position() > 0)
310313
{
311-
try
314+
// If we have seen a byte append, then assume all following character appends are mistakes,
315+
// but try to decode safely if we can.
316+
if (c > 0xFF)
312317
{
313-
// Append any data already in the decoder
314-
_stringBuilder.append(_decoder.decode(_buffer.flip()));
315-
_buffer.clear();
318+
// Try to decode and continue
319+
try
320+
{
321+
CharSequence decoded = _decoder.decode(_buffer.flip());
322+
_buffer.clear();
323+
_stringBuilder.append(decoded);
324+
_stringBuilder.append(c);
325+
}
326+
catch (CharacterCodingException e)
327+
{
328+
if (_decoder.malformedInputAction() == CodingErrorAction.IGNORE)
329+
return;
330+
if (_decoder.malformedInputAction() == CodingErrorAction.REPLACE)
331+
{
332+
_buffer.clear();
333+
_stringBuilder.append(_decoder.replacement());
334+
return;
335+
}
336+
throw new IllegalArgumentException("Invalid character " + Integer.toHexString(c), e);
337+
}
316338
}
317-
catch (CharacterCodingException e)
339+
else
318340
{
319-
// This will be thrown only if the decoder is configured to REPORT,
320-
// otherwise errors will be ignored or replaced and we will not catch here.
321-
throw new RuntimeException(e);
341+
// This only works for charsets that are true supersets of USASCII
342+
ensureSpace(1);
343+
_buffer.put((byte)c);
322344
}
323345
}
324-
_stringBuilder.append(c);
346+
else
347+
{
348+
_stringBuilder.append(c);
349+
}
325350
}
326351

327352
@Override
328353
public void append(CharSequence chars, int offset, int length)
329354
{
330-
if (_buffer.position() > 0)
331-
{
332-
try
333-
{
334-
// Append any data already in the decoder
335-
_stringBuilder.append(_decoder.decode(_buffer.flip()));
336-
_buffer.clear();
337-
}
338-
catch (CharacterCodingException e)
339-
{
340-
// This will be thrown only if the decoder is configured to REPORT,
341-
// otherwise errors will be ignored or replaced and we will not catch here.
342-
throw new RuntimeException(e);
343-
}
344-
}
345-
_stringBuilder.append(chars, offset, offset + length);
355+
for (int idx = offset; idx < offset + length; idx++)
356+
append(chars.charAt(idx));
346357
}
347358

348359
@Override
@@ -368,7 +379,7 @@ public String build(boolean ignore) throws CharacterCodingException
368379
// and onUnmappableCharacter(CodingErrorAction)
369380
try
370381
{
371-
if (_buffer.position() > 0)
382+
if (_buffer != null && _buffer.position() > 0)
372383
{
373384
CharSequence decoded = _decoder.decode(_buffer.flip());
374385
_buffer.clear();
@@ -394,6 +405,8 @@ public int length()
394405
public void reset()
395406
{
396407
_stringBuilder.setLength(0);
408+
if (_buffer != null)
409+
_buffer.clear();
397410
}
398411
}
399412
}

0 commit comments

Comments
 (0)