@@ -119,7 +119,20 @@ private void formatJsonBlock(WarcCaptureRecord record, String filename, long pos
119119 value = PYWB_REVISIT_MIMETYPE ;
120120 } else {
121121 try {
122- value = record .payload ().map (p -> p .type ().base ()).orElse (MediaType .OCTET_STREAM ).toString ();
122+ if (record instanceof WarcResponse &&
123+ record .contentType ().equals (MediaType .HTTP_RESPONSE )) {
124+ value = ((WarcResponse ) record ).http ().headers ().first ("Content-Type" )
125+ .map (s -> MediaType .parseLeniently (s ).base ().toString ())
126+ .orElse (null );
127+ } else if (record instanceof WarcResource ) {
128+ value = record .headers ().first ("Content-Type" )
129+ .map (s -> MediaType .parseLeniently (s ).base ().toString ())
130+ .orElse (null );
131+ } else {
132+ value = record .payload ()
133+ .map (p -> p .type ().base ())
134+ .map (Object ::toString ).orElse (null );
135+ }
123136 } catch (IOException e ) {
124137 value = null ;
125138 }
@@ -130,7 +143,8 @@ private void formatJsonBlock(WarcCaptureRecord record, String filename, long pos
130143 break ;
131144 case "status" :
132145 try {
133- value = String .valueOf (statusCode (record ));
146+ Integer status = statusCode (record );
147+ value = status == 0 ? null : String .valueOf (status );
134148 } catch (IOException e ) {
135149 value = null ;
136150 }
@@ -184,7 +198,7 @@ private static int statusCode(WarcCaptureRecord record) throws IOException {
184198 return ((WarcResponse ) record ).gemini ().statusHttpEquivalent ();
185199 }
186200 }
187- return 200 ;
201+ return 0 ;
188202 }
189203
190204 String formatField (byte fieldName , WarcCaptureRecord record , String filename , long position , long size , String urlkey ) throws IOException {
@@ -223,17 +237,9 @@ String formatField(byte fieldName, WarcCaptureRecord record, String filename, lo
223237 return "-" ;
224238 }
225239 case RESPONSE_CODE :
226- if (record instanceof WarcResponse || record instanceof WarcRevisit ) {
227- if (record instanceof WarcRevisit ) {
228- return Integer .toString (((WarcRevisit ) record ).http ().status ());
229- }
230- else if (record .contentType ().base ().equals (MediaType .HTTP )) {
231- return Integer .toString (((WarcResponse ) record ).http ().status ());
232- } else if (record .contentType ().base ().equals (MediaType .GEMINI )) {
233- return String .format ("%02d" , ((WarcResponse ) record ).gemini ().statusHttpEquivalent ());
234- }
235- }
236- return Integer .toString (statusCode (record ));
240+ int status = statusCode (record );
241+ if (status == 0 ) status = 200 ;
242+ return Integer .toString (status );
237243 default :
238244 throw new IllegalArgumentException ("Unknown CDX field: " + (char ) fieldName );
239245 }
0 commit comments