66import org .netpreserve .jwarc .*;
77
88import java .io .IOException ;
9+ import java .net .URI ;
910import java .io .StringWriter ;
1011import java .nio .file .Files ;
1112import java .nio .file .Path ;
@@ -188,4 +189,89 @@ public void testFilteringRequest_shouldWork() throws Exception {
188189
189190 }
190191
192+ @ Test
193+ public void testFilteringRequestAndResponse_shouldWork () throws Exception {
194+ Path testWarcFile = temporaryFolder .newFile ().toPath ().toAbsolutePath ();
195+
196+ try (WarcWriter warcWriter = new WarcWriter (Files .newByteChannel (testWarcFile , CREATE , WRITE ))) {
197+ HttpRequest httpRequest = new HttpRequest .Builder ("GET" , "http://example.org/" )
198+ .build ();
199+
200+ warcWriter .write (new WarcRequest .Builder ("http://example.org/" )
201+ .date (Instant .parse ("2022-03-02T21:44:34Z" ))
202+ .body (httpRequest )
203+ .payloadDigest ("sha256" , "b04af472c47a8b1b5059b3404caac0e1bfb5a3c07b329be66f65cfab5ee8d3faaa" )
204+ .build ());
205+
206+ HttpResponse httpResponse = new HttpResponse .Builder (200 , "OK" )
207+ .body (MediaType .HTML , new byte [0 ])
208+ .build ();
209+
210+ warcWriter .write (new WarcResponse .Builder ("http://example.org/" )
211+ .date (Instant .parse ("2022-03-01T12:45:34Z" ))
212+ .body (httpResponse )
213+ .payloadDigest ("sha256" , "b04af472c47a8b1b5059b3404caac0e1bfb5a3c07b329be66f65cfab5ee8d3f3" )
214+ .build ());
215+ }
216+
217+ StringWriter cdxBuffer = new StringWriter ();
218+ try (CdxWriter cdxWriter = new CdxWriter (cdxBuffer )) {
219+ cdxWriter .setRecordFilter (record -> record .type ().equals ("request" ) || record .type ().equals ("response" ));
220+ cdxWriter .setFormat (CdxFormat .CDXJ );
221+ cdxWriter .setSort (true );
222+ cdxWriter .process (Collections .singletonList (testWarcFile ), true );
223+ }
224+
225+ List <String > splits = cdxBuffer .toString ().isEmpty ()
226+ ? Collections .emptyList ()
227+ : Arrays .asList (cdxBuffer .toString ().split ("\n " ));
228+
229+ assertThat (splits , hasSize (2 ));
230+ assertThat (splits .get (0 ), not (emptyString ()));
231+ assertThat (splits .get (0 ), startsWith ("org,example)/ 20220301124534" ));
232+ assertThat (splits .get (0 ), containsString ("http://example.org/" ));
233+ assertThat (splits .get (0 ), containsString ("\" status\" : \" 200\" " ));
234+ assertThat (splits .get (1 ), startsWith ("org,example)/ 20220302214434" ));
235+ assertThat (splits .get (1 ), containsString ("http://example.org/" ));
236+ assertThat (splits .get (1 ), not (containsString ("\" status\" : \" 200\" " )));
237+
238+
239+ }
240+
241+ @ Test
242+ public void testFilteringResource_shouldWork () throws Exception {
243+ Path testWarcFile = temporaryFolder .newFile ().toPath ().toAbsolutePath ();
244+
245+ try (WarcWriter warcWriter = new WarcWriter (Files .newByteChannel (testWarcFile , CREATE , WRITE ))) {
246+ // Add a Resource record
247+ warcWriter .write (new WarcResource .Builder (URI .create ("http://example.org/resource.png" ))
248+ .date (Instant .parse ("2022-03-02T21:44:34Z" ))
249+ .body (MediaType .parse ("image/png" ), new byte [0 ])
250+ .payloadDigest ("sha256" , "b04af472c47a8b1b5059b3404caac0e1bfb5a3c07b329be66f65cfab5ee8d3faaa" )
251+ .build ());
252+
253+ // Add a Metadata record
254+ warcWriter .write (new WarcMetadata .Builder ()
255+ .targetURI (URI .create ("http://example.org/metadata" ))
256+ .date (Instant .parse ("2022-03-01T12:44:34Z" ))
257+ .body (MediaType .parse ("application/warc-fields" ), "foo: bar" .getBytes ())
258+ .build ());
259+ }
260+
261+ StringWriter cdxBuffer = new StringWriter ();
262+ try (CdxWriter cdxWriter = new CdxWriter (cdxBuffer )) {
263+ cdxWriter .setRecordFilter (record -> record .type ().equals ("resource" ));
264+ cdxWriter .setFormat (CdxFormat .CDXJ );
265+ cdxWriter .setSort (true );
266+ cdxWriter .process (Collections .singletonList (testWarcFile ), true );
267+ }
268+
269+ List <String > splits = cdxBuffer .toString ().isEmpty ()
270+ ? Collections .emptyList ()
271+ : Arrays .asList (cdxBuffer .toString ().split ("\n " ));
272+
273+ assertThat (splits , hasSize (1 ));
274+ assertThat (splits .get (0 ), containsString ("http://example.org/resource.png" ));
275+ assertThat (splits .get (0 ), containsString ("\" mime\" : \" image/png\" " ));
276+ }
191277}
0 commit comments