2222import java .util .Arrays ;
2323import java .util .Objects ;
2424
25- import org .slf4j .Logger ;
26- import org .slf4j .LoggerFactory ;
27-
2825/**
2926 * <p>Build a string from a sequence of bytes and/or characters.</p>
3027 * <p>Implementations of this interface are optimized for processing a mix of calls to already decoded
31- * character based appends (e.g. {@link #append(char)} and calls to undecoded byte methods (e.g. {@link #append(byte)}.
28+ * character based appends (e.g. {@link #append(char)}) and calls to undecoded byte methods (e.g. {@link #append(byte)}) .
3229 * This is particularly useful for decoding % encoded strings that are mostly already decoded but may contain
3330 * escaped byte sequences that are not decoded. The standard {@link CharsetDecoder} API is not well suited for this
3431 * use-case.</p>
@@ -274,10 +271,9 @@ public void reset()
274271
275272 class DecoderStringBuilder implements CharsetStringBuilder
276273 {
277- private static final Logger LOG = LoggerFactory .getLogger (DecoderStringBuilder .class );
278274 private final CharsetDecoder _decoder ;
279275 private final StringBuilder _stringBuilder = new StringBuilder (32 );
280- private ByteBuffer _buffer = ByteBuffer . allocate ( 32 ) ;
276+ private ByteBuffer _buffer ;
281277
282278 public DecoderStringBuilder (CharsetDecoder charsetDecoder , CodingErrorAction onMalformedInput , CodingErrorAction onUnmappableCharacter )
283279 {
@@ -288,11 +284,18 @@ public DecoderStringBuilder(CharsetDecoder charsetDecoder, CodingErrorAction onM
288284
289285 private void ensureSpace (int needed )
290286 {
291- int space = _buffer .remaining ();
292- if (space < needed )
287+ if (_buffer == null )
288+ {
289+ _buffer = ByteBuffer .allocate (needed + 32 );
290+ }
291+ else
293292 {
294- int position = _buffer .position ();
295- _buffer = ByteBuffer .wrap (Arrays .copyOf (_buffer .array (), _buffer .capacity () + needed - space + 32 )).position (position );
293+ int space = _buffer .remaining ();
294+ if (space < needed )
295+ {
296+ int position = _buffer .position ();
297+ _buffer = ByteBuffer .wrap (Arrays .copyOf (_buffer .array (), _buffer .capacity () + needed - space + 32 )).position (position );
298+ }
296299 }
297300 }
298301
@@ -306,43 +309,51 @@ public void append(byte b)
306309 @ Override
307310 public void append (char c )
308311 {
309- if (_buffer .position () > 0 )
312+ if (_buffer != null && _buffer .position () > 0 )
310313 {
311- try
314+ // If we have seen a byte append, then assume all following character appends are mistakes,
315+ // but try to decode safely if we can.
316+ if (c > 0xFF )
312317 {
313- // Append any data already in the decoder
314- _stringBuilder .append (_decoder .decode (_buffer .flip ()));
315- _buffer .clear ();
318+ // Try to decode and continue
319+ try
320+ {
321+ CharSequence decoded = _decoder .decode (_buffer .flip ());
322+ _buffer .clear ();
323+ _stringBuilder .append (decoded );
324+ _stringBuilder .append (c );
325+ }
326+ catch (CharacterCodingException e )
327+ {
328+ if (_decoder .malformedInputAction () == CodingErrorAction .IGNORE )
329+ return ;
330+ if (_decoder .malformedInputAction () == CodingErrorAction .REPLACE )
331+ {
332+ _buffer .clear ();
333+ _stringBuilder .append (_decoder .replacement ());
334+ return ;
335+ }
336+ throw new IllegalArgumentException ("Invalid character " + Integer .toHexString (c ), e );
337+ }
316338 }
317- catch ( CharacterCodingException e )
339+ else
318340 {
319- // This will be thrown only if the decoder is configured to REPORT,
320- // otherwise errors will be ignored or replaced and we will not catch here.
321- throw new RuntimeException ( e );
341+ // This only works for charsets that are true supersets of USASCII
342+ ensureSpace ( 1 );
343+ _buffer . put (( byte ) c );
322344 }
323345 }
324- _stringBuilder .append (c );
346+ else
347+ {
348+ _stringBuilder .append (c );
349+ }
325350 }
326351
327352 @ Override
328353 public void append (CharSequence chars , int offset , int length )
329354 {
330- if (_buffer .position () > 0 )
331- {
332- try
333- {
334- // Append any data already in the decoder
335- _stringBuilder .append (_decoder .decode (_buffer .flip ()));
336- _buffer .clear ();
337- }
338- catch (CharacterCodingException e )
339- {
340- // This will be thrown only if the decoder is configured to REPORT,
341- // otherwise errors will be ignored or replaced and we will not catch here.
342- throw new RuntimeException (e );
343- }
344- }
345- _stringBuilder .append (chars , offset , offset + length );
355+ for (int idx = offset ; idx < offset + length ; idx ++)
356+ append (chars .charAt (idx ));
346357 }
347358
348359 @ Override
@@ -368,7 +379,7 @@ public String build(boolean ignore) throws CharacterCodingException
368379 // and onUnmappableCharacter(CodingErrorAction)
369380 try
370381 {
371- if (_buffer .position () > 0 )
382+ if (_buffer != null && _buffer .position () > 0 )
372383 {
373384 CharSequence decoded = _decoder .decode (_buffer .flip ());
374385 _buffer .clear ();
@@ -394,6 +405,8 @@ public int length()
394405 public void reset ()
395406 {
396407 _stringBuilder .setLength (0 );
408+ if (_buffer != null )
409+ _buffer .clear ();
397410 }
398411 }
399412}
0 commit comments