Skip to content

Commit d8ac45c

Browse files
committed
Refactor with separate IO util
1 parent 7acef61 commit d8ac45c

6 files changed

Lines changed: 91 additions & 453 deletions

File tree

common/src/java/com/github/oeuvres/alix/lucene/FlucText.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import com.github.oeuvres.alix.lucene.terms.TermLexicon;
1111
import com.github.oeuvres.alix.lucene.terms.TermRail;
1212
import com.github.oeuvres.alix.lucene.terms.ThemeTerms;
13+
import com.github.oeuvres.alix.util.Report;
1314

1415
/**
1516
* A tokenized field with positions: the primary Alix field type.
@@ -100,8 +101,8 @@ public synchronized FieldStats fieldStats()
100101
{
101102
return fieldStatsHolder.get(
102103
() -> FieldStats.exists(indexDir, name()),
103-
() -> FieldStats.build(indexDir, reader, name()),
104-
() -> FieldStats.open(indexDir, name())
104+
() -> FieldStats.build(indexDir, reader, name(), Report.ReportNull.INSTANCE),
105+
() -> FieldStats.open(indexDir, reader, name())
105106
);
106107
}
107108

@@ -134,7 +135,7 @@ public synchronized TermRail termRail()
134135
final TermLexicon lex = termLexicon();
135136
return railHolder.get(
136137
() -> TermRail.exists(indexDir, name()),
137-
() -> TermRail.build(indexDir, reader, name(), lex),
138+
() -> TermRail.build(indexDir, reader, name(), lex, Report.ReportNull.INSTANCE),
138139
() -> TermRail.open(indexDir, name())
139140
);
140141
}

common/src/java/com/github/oeuvres/alix/lucene/terms/FieldStats.java

Lines changed: 26 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import org.apache.lucene.util.BytesRef;
1414

1515
import com.github.oeuvres.alix.util.Report;
16+
import com.github.oeuvres.alix.util.SideFiles;
1617

1718
import java.io.BufferedInputStream;
1819
import java.io.BufferedOutputStream;
@@ -21,12 +22,8 @@
2122
import java.io.EOFException;
2223
import java.io.IOException;
2324
import java.io.OutputStream;
24-
import java.nio.charset.StandardCharsets;
25-
import java.nio.file.FileAlreadyExistsException;
2625
import java.nio.file.Files;
27-
import java.nio.file.NoSuchFileException;
2826
import java.nio.file.Path;
29-
import java.nio.file.StandardCopyOption;
3027
import java.nio.file.StandardOpenOption;
3128
import java.util.Arrays;
3229
import java.util.BitSet;
@@ -170,13 +167,14 @@ public final class FieldStats implements ReferenceStats
170167
*
171168
* @param dataDir Lucene directory that contains the index and the stats file
172169
* @param field indexed field
173-
* @param vocabSize number of distinct terms
174-
* @param docCount number of documents that contain the field
175170
* @param maxDoc Lucene document-address space size
176-
* @param totalTermFreq total number of tokens in the field
177-
* @param docFreqs per-term document frequencies
171+
* @param docWidths exact field token counts by global doc id
172+
* @param fieldWidth sum of all docWidths
173+
* @param vocabSize number of distinct terms
174+
* @param fieldDocs number of documents that contain the field
175+
* @param fieldTokens total number of tokens in the field
176+
* @param termDocs per-term document frequencies
178177
* @param termFreqs per-term total term frequencies
179-
* @param posLens exact field token counts by global doc id
180178
*/
181179
private FieldStats(
182180
final Path dataDir,
@@ -190,8 +188,6 @@ private FieldStats(
190188
final int[] termDocs,
191189
final long[] termFreqs
192190
) {
193-
194-
195191
this.dataDir = dataDir;
196192
this.field = field;
197193
this.maxDoc = maxDoc;
@@ -220,8 +216,6 @@ public long arraysBytes()
220216
;
221217
}
222218

223-
224-
225219
/**
226220
* Builds the statistics file for one field from an already opened snapshot reader.
227221
* <p>
@@ -231,6 +225,7 @@ public long arraysBytes()
231225
* @param dataDir directory that will receive the {@code <field>.stats} file
232226
* @param reader snapshot reader that defines the field statistics
233227
* @param field indexed field name
228+
* @param report progress reporter; may be {@code null}
234229
* @throws IOException if the field has no terms, if term frequencies are unavailable,
235230
* if a target file already exists, or if writing fails
236231
*/
@@ -246,15 +241,15 @@ public static void build(final Path dataDir, final IndexReader reader, final Str
246241
ByTermStats byTerm = byTermStats(reader, field, report);
247242
// write data
248243
final Path statsPath = statsPath(dataDir, field);
249-
ensureAbsent(statsPath);
250-
final Path tmp = tmpPath(statsPath);
251-
ensureAbsent(tmp);
244+
SideFiles.ensureAbsent(statsPath);
245+
final Path tmp = SideFiles.tmpPath(statsPath);
246+
SideFiles.ensureAbsent(tmp);
252247
try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(tmp, StandardOpenOption.CREATE_NEW));
253248
DataOutputStream out = new DataOutputStream(os))
254249
{
255250
out.writeInt(MAGIC);
256251
out.writeInt(VERSION);
257-
writeUtf8(out, field);
252+
SideFiles.writeUtf8(out, field);
258253
// by doc stats
259254
out.writeInt(maxDoc);
260255
for (int docWidth : docWidths) {
@@ -270,12 +265,11 @@ public static void build(final Path dataDir, final IndexReader reader, final Str
270265
for (long termFreq : byTerm.termFreqs) {
271266
out.writeLong(termFreq);
272267
}
273-
moveTemp(tmp, statsPath);
268+
SideFiles.moveTemp(tmp, statsPath);
274269
} catch (IOException | RuntimeException e) {
275-
deleteIfExists(tmp);
270+
SideFiles.deleteIfExists(tmp);
276271
throw e;
277272
}
278-
279273
}
280274

281275
/**
@@ -304,7 +298,6 @@ public int[] docFreqsCopy()
304298

305299
/**
306300
* Returns the number of token positions of the field for one global Lucene document id.
307-
308301
*/
309302
public int docWidth(final int docId)
310303
{
@@ -362,6 +355,8 @@ public long fieldTokens()
362355
return fieldTokens;
363356
}
364357

358+
359+
365360
/**
366361
* Returns the Lucene directory from which these statistics were opened.
367362
*
@@ -388,8 +383,9 @@ public int maxDoc()
388383
/**
389384
* Opens the persisted statistics for one field from a frozen Lucene directory.
390385
*
391-
* @param dataDir Lucene directory that contains the index and the stats file
392-
* @param field indexed field name
386+
* @param dataDir directory that contains the stats file
387+
* @param reader snapshot reader used to cross-check maxDoc
388+
* @param field indexed field name
393389
* @return opened immutable field statistics
394390
* @throws IOException if the file is missing, inconsistent or unreadable
395391
*/
@@ -399,7 +395,7 @@ public static FieldStats open(final Path dataDir, final IndexReader reader, fina
399395
Objects.requireNonNull(field, "field");
400396

401397
final Path path = statsPath(dataDir, field);
402-
ensureRegularFile(path);
398+
SideFiles.ensureRegularFile(path);
403399

404400
try (DataInputStream in = new DataInputStream(
405401
new BufferedInputStream(Files.newInputStream(path, StandardOpenOption.READ))))
@@ -414,7 +410,7 @@ public static FieldStats open(final Path dataDir, final IndexReader reader, fina
414410
throw new IOException("Unsupported stats file version " + version + ": " + path);
415411
}
416412

417-
final String fieldFound = readUtf8(in);
413+
final String fieldFound = SideFiles.readUtf8(in);
418414
if (!field.equals(fieldFound)) {
419415
throw new IOException(
420416
"Field mismatch in stats file: requested '" + field + "', found '" + fieldFound + "'");
@@ -482,6 +478,7 @@ public static FieldStats open(final Path dataDir, final IndexReader reader, fina
482478
* @param indexDir Lucene directory that will receive the sidecar file
483479
* @param reader snapshot reader for building (ignored if file exists)
484480
* @param field indexed field name
481+
* @param report progress reporter; may be {@code null}
485482
* @return opened immutable field statistics
486483
* @throws IOException if building or opening fails
487484
*/
@@ -712,7 +709,9 @@ public static ByTermStats byTermStats (
712709
* Otherwise the method counts terms by iteration.
713710
* </p>
714711
*
715-
* @param terms merged field terms
712+
* @param reader index reader
713+
* @param field indexed field name
714+
* @param report progress reporter
716715
* @return vocabulary size
717716
* @throws IOException if term iteration fails
718717
*/
@@ -810,81 +809,6 @@ private void checkTermId(final int termId)
810809
}
811810
}
812811

813-
/**
814-
* Deletes a file if it exists.
815-
*
816-
* @param path path to delete
817-
*/
818-
private static void deleteIfExists(final Path path)
819-
{
820-
try {
821-
Files.deleteIfExists(path);
822-
} catch (IOException ignored) {
823-
// best-effort cleanup only
824-
}
825-
}
826-
827-
/**
828-
* Ensures that a file does not already exist.
829-
*
830-
* @param path target path
831-
* @throws FileAlreadyExistsException if the path already exists
832-
*/
833-
private static void ensureAbsent(final Path path) throws FileAlreadyExistsException
834-
{
835-
if (Files.exists(path)) {
836-
throw new FileAlreadyExistsException(path.toString());
837-
}
838-
}
839-
840-
/**
841-
* Ensures that a file exists and is a regular file.
842-
*
843-
* @param path path to check
844-
* @throws IOException if the file does not exist or is not regular
845-
*/
846-
private static void ensureRegularFile(final Path path) throws IOException
847-
{
848-
if (!Files.isRegularFile(path)) {
849-
throw new NoSuchFileException(path.toString());
850-
}
851-
}
852-
853-
854-
/**
855-
* Moves one temporary file into its final location.
856-
*
857-
* @param source temporary file path
858-
* @param target final file path
859-
* @throws IOException if the move fails
860-
*/
861-
private static void moveTemp(final Path source, final Path target) throws IOException
862-
{
863-
try {
864-
Files.move(source, target, StandardCopyOption.ATOMIC_MOVE);
865-
} catch (IOException e) {
866-
Files.move(source, target);
867-
}
868-
}
869-
870-
/**
871-
* Reads one UTF-8 string preceded by its byte length.
872-
*
873-
* @param in source stream
874-
* @return decoded string
875-
* @throws IOException if reading fails or if the encoded length is invalid
876-
*/
877-
private static String readUtf8(final DataInputStream in) throws IOException
878-
{
879-
final int length = in.readInt();
880-
if (length < 0) {
881-
throw new IOException("Negative UTF-8 byte length: " + length);
882-
}
883-
final byte[] bytes = new byte[length];
884-
in.readFully(bytes);
885-
return new String(bytes, StandardCharsets.UTF_8);
886-
}
887-
888812
/**
889813
* Returns the path of the persisted statistics file for one field.
890814
*
@@ -896,29 +820,4 @@ private static Path statsPath(final Path indexDir, final String field)
896820
{
897821
return indexDir.resolve(field + ".stats");
898822
}
899-
900-
/**
901-
* Returns the temporary path used while writing one file.
902-
*
903-
* @param path final target path
904-
* @return sibling temporary path with {@code .tmp} suffix
905-
*/
906-
private static Path tmpPath(final Path path)
907-
{
908-
return path.resolveSibling(path.getFileName().toString() + ".tmp");
909-
}
910-
911-
/**
912-
* Writes one UTF-8 string preceded by its byte length.
913-
*
914-
* @param out destination stream
915-
* @param s string to write
916-
* @throws IOException if writing fails
917-
*/
918-
private static void writeUtf8(final DataOutputStream out, final String s) throws IOException
919-
{
920-
final byte[] bytes = s.getBytes(StandardCharsets.UTF_8);
921-
out.writeInt(bytes.length);
922-
out.write(bytes);
923-
}
924-
}
823+
}

0 commit comments

Comments
 (0)