15
15
import java .nio .channels .WritableByteChannel ;
16
16
import java .nio .file .Path ;
17
17
import java .nio .file .Paths ;
18
- import java .util .ArrayList ;
19
- import java .util .List ;
20
- import java .util .Optional ;
18
+ import java .util .*;
21
19
22
20
import static java .nio .charset .StandardCharsets .US_ASCII ;
23
21
import static java .nio .charset .StandardCharsets .UTF_8 ;
@@ -85,6 +83,7 @@ private static void usage(int exitValue) {
85
83
System .err .println ();
86
84
System .err .println ("Options:" );
87
85
System .err .println ();
86
+ System .err .println (" --concurrent\t also outputs any immediately following concurrent records" );
88
87
System .err .println (" --headers\t output only record (and HTTP) headers" );
89
88
System .err .println (" --payload\t output only record payload, if necessary" );
90
89
System .err .println (" \t decode transfer and/or content encoding" );
@@ -95,11 +94,16 @@ public static void main(String[] args) throws IOException {
95
94
ExtractAction action = ExtractAction .RECORD ;
96
95
Path warcFile = null ;
97
96
List <Long > offsets = new ArrayList <>();
97
+ boolean extractConcurrent = false ;
98
98
for (String arg : args ) {
99
99
switch (arg ) {
100
100
case "-h" :
101
101
case "--help" :
102
102
usage (0 );
103
+ break ;
104
+ case "--concurrent" :
105
+ extractConcurrent = true ;
106
+ break ;
103
107
case "--headers" :
104
108
action = ExtractAction .HEADERS ;
105
109
break ;
@@ -128,7 +132,9 @@ public static void main(String[] args) throws IOException {
128
132
}
129
133
if (warcFile == null || offsets .isEmpty ()) {
130
134
usage (1 );
135
+ return ;
131
136
}
137
+ WritableByteChannel out = Channels .newChannel (System .out );
132
138
for (long offset : offsets ) {
133
139
try (FileChannel channel = FileChannel .open (warcFile );
134
140
WarcReader reader = new WarcReader (channel .position (offset ))) {
@@ -137,22 +143,36 @@ public static void main(String[] args) throws IOException {
137
143
System .err .println ("No record found at position " + offset );
138
144
System .exit (1 );
139
145
}
140
- WritableByteChannel out = Channels .newChannel (System .out );
141
- switch (action ) {
142
- case RECORD :
143
- writeWarcHeaders (out , record .get ());
144
- writeBody (out , record .get ().body ());
145
- out .write (ByteBuffer .wrap ("\r \n \r \n " .getBytes (US_ASCII )));
146
- break ;
147
- case HEADERS :
148
- writeWarcHeaders (out , record .get ());
149
- writeHttpHeaders (out , record .get ());
150
- break ;
151
- case PAYLOAD :
152
- writePayload (out , record .get ());
153
- break ;
146
+
147
+ writeRecord (record .get (), out , action );
148
+
149
+ if (extractConcurrent ) {
150
+ ConcurrentRecordSet concurrentSet = new ConcurrentRecordSet ();
151
+ while (true ) {
152
+ concurrentSet .add (record .get ());
153
+ record = reader .next ();
154
+ if (!record .isPresent () || !concurrentSet .contains (record .get ())) break ;
155
+ writeRecord (record .get (), out , action );
156
+ }
154
157
}
155
158
}
156
159
}
157
160
}
161
+
162
+ private static void writeRecord (WarcRecord record , WritableByteChannel out , ExtractAction action ) throws IOException {
163
+ switch (action ) {
164
+ case RECORD :
165
+ writeWarcHeaders (out , record );
166
+ writeBody (out , record .body ());
167
+ out .write (ByteBuffer .wrap ("\r \n \r \n " .getBytes (US_ASCII )));
168
+ break ;
169
+ case HEADERS :
170
+ writeWarcHeaders (out , record );
171
+ writeHttpHeaders (out , record );
172
+ break ;
173
+ case PAYLOAD :
174
+ writePayload (out , record );
175
+ break ;
176
+ }
177
+ }
158
178
}
0 commit comments