1313import org .apache .lucene .util .BytesRef ;
1414
1515import com .github .oeuvres .alix .util .Report ;
16+ import com .github .oeuvres .alix .util .SideFiles ;
1617
1718import java .io .BufferedInputStream ;
1819import java .io .BufferedOutputStream ;
2122import java .io .EOFException ;
2223import java .io .IOException ;
2324import java .io .OutputStream ;
24- import java .nio .charset .StandardCharsets ;
25- import java .nio .file .FileAlreadyExistsException ;
2625import java .nio .file .Files ;
27- import java .nio .file .NoSuchFileException ;
2826import java .nio .file .Path ;
29- import java .nio .file .StandardCopyOption ;
3027import java .nio .file .StandardOpenOption ;
3128import java .util .Arrays ;
3229import java .util .BitSet ;
@@ -170,13 +167,14 @@ public final class FieldStats implements ReferenceStats
170167 *
171168 * @param dataDir Lucene directory that contains the index and the stats file
172169 * @param field indexed field
173- * @param vocabSize number of distinct terms
174- * @param docCount number of documents that contain the field
175170 * @param maxDoc Lucene document-address space size
176- * @param totalTermFreq total number of tokens in the field
177- * @param docFreqs per-term document frequencies
171+ * @param docWidths exact field token counts by global doc id
172+ * @param fieldWidth sum of all docWidths
173+ * @param vocabSize number of distinct terms
174+ * @param fieldDocs number of documents that contain the field
175+ * @param fieldTokens total number of tokens in the field
176+ * @param termDocs per-term document frequencies
178177 * @param termFreqs per-term total term frequencies
179- * @param posLens exact field token counts by global doc id
180178 */
181179 private FieldStats (
182180 final Path dataDir ,
@@ -190,8 +188,6 @@ private FieldStats(
190188 final int [] termDocs ,
191189 final long [] termFreqs
192190 ) {
193-
194-
195191 this .dataDir = dataDir ;
196192 this .field = field ;
197193 this .maxDoc = maxDoc ;
@@ -220,8 +216,6 @@ public long arraysBytes()
220216 ;
221217 }
222218
223-
224-
225219 /**
226220 * Builds the statistics file for one field from an already opened snapshot reader.
227221 * <p>
@@ -231,6 +225,7 @@ public long arraysBytes()
231225 * @param dataDir directory that will receive the {@code <field>.stats} file
232226 * @param reader snapshot reader that defines the field statistics
233227 * @param field indexed field name
228+ * @param report progress reporter; may be {@code null}
234229 * @throws IOException if the field has no terms, if term frequencies are unavailable,
235230 * if a target file already exists, or if writing fails
236231 */
@@ -246,15 +241,15 @@ public static void build(final Path dataDir, final IndexReader reader, final Str
246241 ByTermStats byTerm = byTermStats (reader , field , report );
247242 // write data
248243 final Path statsPath = statsPath (dataDir , field );
249- ensureAbsent (statsPath );
250- final Path tmp = tmpPath (statsPath );
251- ensureAbsent (tmp );
244+ SideFiles . ensureAbsent (statsPath );
245+ final Path tmp = SideFiles . tmpPath (statsPath );
246+ SideFiles . ensureAbsent (tmp );
252247 try (OutputStream os = new BufferedOutputStream (Files .newOutputStream (tmp , StandardOpenOption .CREATE_NEW ));
253248 DataOutputStream out = new DataOutputStream (os ))
254249 {
255250 out .writeInt (MAGIC );
256251 out .writeInt (VERSION );
257- writeUtf8 (out , field );
252+ SideFiles . writeUtf8 (out , field );
258253 // by doc stats
259254 out .writeInt (maxDoc );
260255 for (int docWidth : docWidths ) {
@@ -270,12 +265,11 @@ public static void build(final Path dataDir, final IndexReader reader, final Str
270265 for (long termFreq : byTerm .termFreqs ) {
271266 out .writeLong (termFreq );
272267 }
273- moveTemp (tmp , statsPath );
268+ SideFiles . moveTemp (tmp , statsPath );
274269 } catch (IOException | RuntimeException e ) {
275- deleteIfExists (tmp );
270+ SideFiles . deleteIfExists (tmp );
276271 throw e ;
277272 }
278-
279273 }
280274
281275 /**
@@ -304,7 +298,6 @@ public int[] docFreqsCopy()
304298
305299 /**
306300 * Returns the number of token positions of the field for one global Lucene document id.
307-
308301 */
309302 public int docWidth (final int docId )
310303 {
@@ -362,6 +355,8 @@ public long fieldTokens()
362355 return fieldTokens ;
363356 }
364357
358+
359+
365360 /**
366361 * Returns the Lucene directory from which these statistics were opened.
367362 *
@@ -388,8 +383,9 @@ public int maxDoc()
388383 /**
389384 * Opens the persisted statistics for one field from a frozen Lucene directory.
390385 *
391- * @param dataDir Lucene directory that contains the index and the stats file
392- * @param field indexed field name
386+ * @param dataDir directory that contains the stats file
387+ * @param reader snapshot reader used to cross-check maxDoc
388+ * @param field indexed field name
393389 * @return opened immutable field statistics
394390 * @throws IOException if the file is missing, inconsistent or unreadable
395391 */
@@ -399,7 +395,7 @@ public static FieldStats open(final Path dataDir, final IndexReader reader, fina
399395 Objects .requireNonNull (field , "field" );
400396
401397 final Path path = statsPath (dataDir , field );
402- ensureRegularFile (path );
398+ SideFiles . ensureRegularFile (path );
403399
404400 try (DataInputStream in = new DataInputStream (
405401 new BufferedInputStream (Files .newInputStream (path , StandardOpenOption .READ ))))
@@ -414,7 +410,7 @@ public static FieldStats open(final Path dataDir, final IndexReader reader, fina
414410 throw new IOException ("Unsupported stats file version " + version + ": " + path );
415411 }
416412
417- final String fieldFound = readUtf8 (in );
413+ final String fieldFound = SideFiles . readUtf8 (in );
418414 if (!field .equals (fieldFound )) {
419415 throw new IOException (
420416 "Field mismatch in stats file: requested '" + field + "', found '" + fieldFound + "'" );
@@ -482,6 +478,7 @@ public static FieldStats open(final Path dataDir, final IndexReader reader, fina
482478 * @param indexDir Lucene directory that will receive the sidecar file
483479 * @param reader snapshot reader for building (ignored if file exists)
484480 * @param field indexed field name
481+ * @param report progress reporter; may be {@code null}
485482 * @return opened immutable field statistics
486483 * @throws IOException if building or opening fails
487484 */
@@ -712,7 +709,9 @@ public static ByTermStats byTermStats (
712709 * Otherwise the method counts terms by iteration.
713710 * </p>
714711 *
715- * @param terms merged field terms
712+ * @param reader index reader
713+ * @param field indexed field name
714+ * @param report progress reporter
716715 * @return vocabulary size
717716 * @throws IOException if term iteration fails
718717 */
@@ -810,81 +809,6 @@ private void checkTermId(final int termId)
810809 }
811810 }
812811
813- /**
814- * Deletes a file if it exists.
815- *
816- * @param path path to delete
817- */
818- private static void deleteIfExists (final Path path )
819- {
820- try {
821- Files .deleteIfExists (path );
822- } catch (IOException ignored ) {
823- // best-effort cleanup only
824- }
825- }
826-
827- /**
828- * Ensures that a file does not already exist.
829- *
830- * @param path target path
831- * @throws FileAlreadyExistsException if the path already exists
832- */
833- private static void ensureAbsent (final Path path ) throws FileAlreadyExistsException
834- {
835- if (Files .exists (path )) {
836- throw new FileAlreadyExistsException (path .toString ());
837- }
838- }
839-
840- /**
841- * Ensures that a file exists and is a regular file.
842- *
843- * @param path path to check
844- * @throws IOException if the file does not exist or is not regular
845- */
846- private static void ensureRegularFile (final Path path ) throws IOException
847- {
848- if (!Files .isRegularFile (path )) {
849- throw new NoSuchFileException (path .toString ());
850- }
851- }
852-
853-
854- /**
855- * Moves one temporary file into its final location.
856- *
857- * @param source temporary file path
858- * @param target final file path
859- * @throws IOException if the move fails
860- */
861- private static void moveTemp (final Path source , final Path target ) throws IOException
862- {
863- try {
864- Files .move (source , target , StandardCopyOption .ATOMIC_MOVE );
865- } catch (IOException e ) {
866- Files .move (source , target );
867- }
868- }
869-
870- /**
871- * Reads one UTF-8 string preceded by its byte length.
872- *
873- * @param in source stream
874- * @return decoded string
875- * @throws IOException if reading fails or if the encoded length is invalid
876- */
877- private static String readUtf8 (final DataInputStream in ) throws IOException
878- {
879- final int length = in .readInt ();
880- if (length < 0 ) {
881- throw new IOException ("Negative UTF-8 byte length: " + length );
882- }
883- final byte [] bytes = new byte [length ];
884- in .readFully (bytes );
885- return new String (bytes , StandardCharsets .UTF_8 );
886- }
887-
888812 /**
889813 * Returns the path of the persisted statistics file for one field.
890814 *
@@ -896,29 +820,4 @@ private static Path statsPath(final Path indexDir, final String field)
896820 {
897821 return indexDir .resolve (field + ".stats" );
898822 }
899-
900- /**
901- * Returns the temporary path used while writing one file.
902- *
903- * @param path final target path
904- * @return sibling temporary path with {@code .tmp} suffix
905- */
906- private static Path tmpPath (final Path path )
907- {
908- return path .resolveSibling (path .getFileName ().toString () + ".tmp" );
909- }
910-
911- /**
912- * Writes one UTF-8 string preceded by its byte length.
913- *
914- * @param out destination stream
915- * @param s string to write
916- * @throws IOException if writing fails
917- */
918- private static void writeUtf8 (final DataOutputStream out , final String s ) throws IOException
919- {
920- final byte [] bytes = s .getBytes (StandardCharsets .UTF_8 );
921- out .writeInt (bytes .length );
922- out .write (bytes );
923- }
924- }
823+ }
0 commit comments