11/*
22 * SPDX-License-Identifier: Apache-2.0
3- * Copyright (C) 2018-2022 National Library of Australia and the jwarc contributors
3+ * Copyright (C) 2018-2025 National Library of Australia and the jwarc contributors
44 */
55
66package org .netpreserve .jwarc ;
77
88import java .io .*;
9+ import java .lang .reflect .InvocationTargetException ;
910import java .nio .ByteBuffer ;
1011import java .nio .ByteOrder ;
1112import java .nio .channels .Channels ;
@@ -61,7 +62,7 @@ public WarcReader(ReadableByteChannel channel, ByteBuffer buffer) throws IOExcep
6162 startPosition = tryPosition (channel );
6263 position = startPosition ;
6364
64- while (buffer .remaining () < 2 ) {
65+ while (buffer .remaining () < 4 ) {
6566 buffer .compact ();
6667 int n = channel .read (buffer );
6768 buffer .flip ();
@@ -78,17 +79,40 @@ public WarcReader(ReadableByteChannel channel, ByteBuffer buffer) throws IOExcep
7879 }
7980 }
8081
81- if ((buffer .order () == ByteOrder .LITTLE_ENDIAN && buffer .getShort (buffer .position ()) == (short ) 0x8b1f )
82- || (buffer .order () == ByteOrder .BIG_ENDIAN && buffer .getShort (buffer .position ()) == 0x1f8b )) {
83- this .channel = new GunzipChannel (channel , buffer );
84- this .buffer = ByteBuffer .allocate (8192 );
85- this .buffer .flip ();
86- compression = WarcCompression .GZIP ;
87- } else {
88- this .channel = channel ;
89- this .buffer = buffer ;
90- compression = WarcCompression .NONE ;
82+ ByteOrder originalOrder = buffer .order ();
83+ try {
84+ buffer .order (ByteOrder .LITTLE_ENDIAN );
85+
86+ if (buffer .getShort (buffer .position ()) == GzipChannel .GZIP_MAGIC ) {
87+ this .channel = new GunzipChannel (channel , buffer );
88+ this .buffer = ByteBuffer .allocate (8192 );
89+ this .buffer .flip ();
90+ compression = WarcCompression .GZIP ;
91+ } else if (buffer .getInt (buffer .position ()) == 0xfd2fb528 || buffer .getInt (buffer .position ()) == 0x184D2A5D ) {
92+ try {
93+ this .channel = (ReadableByteChannel ) Class .forName ("org.netpreserve.jwarc.ZstdDecompressingChannel" )
94+ .getConstructor (ReadableByteChannel .class , ByteBuffer .class )
95+ .newInstance (channel , buffer );
96+ } catch (InstantiationException | IllegalAccessException | InvocationTargetException |
97+ NoSuchMethodException | ClassNotFoundException e ) {
98+ throw new IOException (e );
99+ }
100+ this .buffer = ByteBuffer .allocate (8192 );
101+ this .buffer .flip ();
102+ compression = WarcCompression .ZSTD ;
103+
104+ // update position in case we read a dictionary frame
105+ position = ((DecompressingChannel ) this .channel ).inputPosition ();
106+ } else {
107+ this .channel = channel ;
108+ this .buffer = buffer ;
109+ compression = WarcCompression .NONE ;
110+ }
111+
112+ } finally {
113+ buffer .order (originalOrder );
91114 }
115+
92116 underlyingChannel = channel ;
93117 }
94118
@@ -150,8 +174,8 @@ public Optional<WarcRecord> next() throws IOException {
150174 record .body ().close ();
151175 long trailerLength = consumeTrailer ();
152176
153- if (channel instanceof GunzipChannel ) {
154- position = startPosition + ((GunzipChannel ) channel ).inputPosition ();
177+ if (channel instanceof DecompressingChannel ) {
178+ position = startPosition + ((DecompressingChannel ) channel ).inputPosition ();
155179 } else {
156180 position += headerLength + record .body ().size () + trailerLength ;
157181 }
@@ -309,8 +333,8 @@ public void position(long newPosition) throws IOException {
309333 record .body ().close ();
310334 record = null ;
311335 }
312- if (compression == WarcCompression . GZIP ) {
313- ((GunzipChannel )channel ).reset ();
336+ if (channel instanceof DecompressingChannel ) {
337+ ((DecompressingChannel )channel ).reset ();
314338 }
315339 }
316340
0 commit comments