Skip to content

Commit 1a2daf6

Browse files
committed
fix
1 parent 5af6693 commit 1a2daf6

File tree

9 files changed

+85
-14
lines changed

9 files changed

+85
-14
lines changed

paimon-common/src/main/java/org/apache/paimon/format/SimpleStatsExtractor.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ public interface SimpleStatsExtractor {
3232
Pair<SimpleColStats[], FileInfo> extractWithFileInfo(FileIO fileIO, Path path, long length)
3333
throws IOException;
3434

35+
boolean isStatsDisabled();
36+
3537
/** File info fetched from physical file. */
3638
class FileInfo {
3739

paimon-core/src/main/java/org/apache/paimon/io/KeyValueDataFileWriter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ public KeyValueDataFileWriter(
103103
writeRowType,
104104
simpleStatsExtractor,
105105
compression,
106-
StatsCollectorFactories.createStatsFactories(
106+
StatsCollectorFactories.createStatsFactoriesForAvro(
107107
statsMode, options, writeRowType.getFieldNames()),
108108
options.asyncFileWrite());
109109

paimon-core/src/main/java/org/apache/paimon/io/KeyValueFileWriterFactory.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import javax.annotation.Nullable;
4242

4343
import java.io.IOException;
44+
import java.util.Collections;
4445
import java.util.HashMap;
4546
import java.util.Map;
4647
import java.util.Set;
@@ -291,7 +292,10 @@ private WriteFormatContext(
291292
StatsCollectorFactories.createStatsFactories(
292293
level2Stats.apply(level),
293294
options,
294-
writeRowType.getFieldNames());
295+
writeRowType.getFieldNames(),
296+
thinModeEnabled
297+
? keyType.getFieldNames()
298+
: Collections.emptyList());
295299
return fileFormat
296300
.createStatsExtractor(writeRowType, statsFactories)
297301
.orElse(null);

paimon-core/src/main/java/org/apache/paimon/io/StatsCollectingSingleFileWriter.java

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,13 @@
2525
import org.apache.paimon.format.SimpleStatsExtractor;
2626
import org.apache.paimon.fs.FileIO;
2727
import org.apache.paimon.fs.Path;
28-
import org.apache.paimon.statistics.NoneSimpleColStatsCollector;
2928
import org.apache.paimon.statistics.SimpleColStatsCollector;
3029
import org.apache.paimon.types.RowType;
3130
import org.apache.paimon.utils.Preconditions;
3231

3332
import javax.annotation.Nullable;
3433

3534
import java.io.IOException;
36-
import java.util.Arrays;
3735
import java.util.function.Function;
3836
import java.util.stream.IntStream;
3937

@@ -62,15 +60,15 @@ public StatsCollectingSingleFileWriter(
6260
boolean asyncWrite) {
6361
super(fileIO, factory, path, converter, compression, asyncWrite);
6462
this.simpleStatsExtractor = simpleStatsExtractor;
65-
if (this.simpleStatsExtractor == null) {
63+
if (this.simpleStatsExtractor != null) {
64+
this.isStatsDisabled = simpleStatsExtractor.isStatsDisabled();
65+
} else {
6666
this.simpleStatsCollector = new SimpleStatsCollector(writeSchema, statsCollectors);
67+
this.isStatsDisabled = simpleStatsCollector.isDisabled();
6768
}
6869
Preconditions.checkArgument(
6970
statsCollectors.length == writeSchema.getFieldCount(),
7071
"The stats collector is not aligned to write schema.");
71-
this.isStatsDisabled =
72-
Arrays.stream(SimpleColStatsCollector.create(statsCollectors))
73-
.allMatch(p -> p instanceof NoneSimpleColStatsCollector);
7472
if (isStatsDisabled) {
7573
this.noneStats =
7674
IntStream.range(0, statsCollectors.length)

paimon-core/src/main/java/org/apache/paimon/utils/StatsCollectorFactories.java

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@
2121
import org.apache.paimon.CoreOptions;
2222
import org.apache.paimon.options.Options;
2323
import org.apache.paimon.statistics.SimpleColStatsCollector;
24+
import org.apache.paimon.statistics.TruncateSimpleColStatsCollector;
25+
import org.apache.paimon.table.SpecialFields;
2426

27+
import java.util.Collections;
2528
import java.util.List;
2629

2730
import static org.apache.paimon.CoreOptions.FIELDS_PREFIX;
@@ -33,21 +36,56 @@ public class StatsCollectorFactories {
3336

3437
public static SimpleColStatsCollector.Factory[] createStatsFactories(
3538
String statsMode, CoreOptions options, List<String> fields) {
36-
Options cfg = options.toConfiguration();
39+
return createStatsFactories(statsMode, options, fields, Collections.emptyList());
40+
}
41+
42+
public static SimpleColStatsCollector.Factory[] createStatsFactories(
43+
String statsMode, CoreOptions coreOptions, List<String> fields, List<String> keyNames) {
44+
Options options = coreOptions.toConfiguration();
3745
SimpleColStatsCollector.Factory[] modes =
3846
new SimpleColStatsCollector.Factory[fields.size()];
3947
for (int i = 0; i < fields.size(); i++) {
4048
String field = fields.get(i);
41-
String fieldMode =
42-
cfg.get(
43-
key(String.format("%s.%s.%s", FIELDS_PREFIX, field, STATS_MODE_SUFFIX))
44-
.stringType()
45-
.noDefaultValue());
49+
String fieldMode = fieldMode(options, field);
50+
if (fieldMode != null) {
51+
modes[i] = SimpleColStatsCollector.from(fieldMode);
52+
} else if (SpecialFields.isSystemField(field)
53+
||
54+
// If we config DATA_FILE_THIN_MODE to true, we need to maintain the
55+
// stats for key fields.
56+
keyNames.contains(SpecialFields.KEY_FIELD_PREFIX + field)) {
57+
modes[i] = () -> new TruncateSimpleColStatsCollector(128);
58+
} else {
59+
modes[i] = SimpleColStatsCollector.from(statsMode);
60+
}
61+
}
62+
return modes;
63+
}
64+
65+
/**
66+
* If all are None, return all None to Avro Writer, which can greatly accelerate the writing
67+
* speed.
68+
*/
69+
public static SimpleColStatsCollector.Factory[] createStatsFactoriesForAvro(
70+
String statsMode, CoreOptions coreOptions, List<String> fields) {
71+
Options options = coreOptions.toConfiguration();
72+
SimpleColStatsCollector.Factory[] modes =
73+
new SimpleColStatsCollector.Factory[fields.size()];
74+
for (int i = 0; i < fields.size(); i++) {
75+
String field = fields.get(i);
76+
String fieldMode = fieldMode(options, field);
4677
modes[i] =
4778
fieldMode != null
4879
? SimpleColStatsCollector.from(fieldMode)
4980
: SimpleColStatsCollector.from(statsMode);
5081
}
5182
return modes;
5283
}
84+
85+
private static String fieldMode(Options options, String field) {
86+
return options.get(
87+
key(String.format("%s.%s.%s", FIELDS_PREFIX, field, STATS_MODE_SUFFIX))
88+
.stringType()
89+
.noDefaultValue());
90+
}
5391
}

paimon-core/src/test/java/org/apache/paimon/stats/TestSimpleStatsExtractor.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.apache.paimon.format.SimpleStatsExtractor;
2727
import org.apache.paimon.fs.FileIO;
2828
import org.apache.paimon.fs.Path;
29+
import org.apache.paimon.statistics.NoneSimpleColStatsCollector;
2930
import org.apache.paimon.statistics.SimpleColStatsCollector;
3031
import org.apache.paimon.types.RowType;
3132
import org.apache.paimon.utils.ObjectSerializer;
@@ -34,6 +35,7 @@
3435

3536
import java.io.IOException;
3637
import java.util.ArrayList;
38+
import java.util.Arrays;
3739
import java.util.List;
3840

3941
import static org.apache.paimon.utils.FileUtils.createFormatReader;
@@ -77,6 +79,12 @@ public Pair<SimpleColStats[], FileInfo> extractWithFileInfo(
7779
return Pair.of(statsCollector.extract(), new FileInfo(records.size()));
7880
}
7981

82+
@Override
83+
public boolean isStatsDisabled() {
84+
return Arrays.stream(SimpleColStatsCollector.create(stats))
85+
.allMatch(p -> p instanceof NoneSimpleColStatsCollector);
86+
}
87+
8088
private static <T> List<T> readListFromFile(
8189
FileIO fileIO,
8290
Path path,

paimon-format/src/main/java/org/apache/paimon/format/avro/AvroSimpleStatsExtractor.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ public Pair<SimpleColStats[], FileInfo> extractWithFileInfo(
7272
new FileInfo(rowCount));
7373
}
7474

75+
@Override
76+
public boolean isStatsDisabled() {
77+
return true;
78+
}
79+
7580
private long getRowCount(InputStream inStream) throws java.io.IOException {
7681
// an Avro file's layout looks like this:
7782
// header|block|block|...

paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcSimpleStatsExtractor.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.apache.paimon.format.orc.OrcReaderFactory;
2727
import org.apache.paimon.fs.FileIO;
2828
import org.apache.paimon.fs.Path;
29+
import org.apache.paimon.statistics.NoneSimpleColStatsCollector;
2930
import org.apache.paimon.statistics.SimpleColStatsCollector;
3031
import org.apache.paimon.types.DataField;
3132
import org.apache.paimon.types.DecimalType;
@@ -50,6 +51,7 @@
5051

5152
import java.io.IOException;
5253
import java.sql.Date;
54+
import java.util.Arrays;
5355
import java.util.List;
5456
import java.util.stream.IntStream;
5557

@@ -116,6 +118,12 @@ public Pair<SimpleColStats[], FileInfo> extractWithFileInfo(
116118
}
117119
}
118120

121+
@Override
122+
public boolean isStatsDisabled() {
123+
return Arrays.stream(SimpleColStatsCollector.create(statsCollectors))
124+
.allMatch(p -> p instanceof NoneSimpleColStatsCollector);
125+
}
126+
119127
private SimpleColStats toFieldStats(
120128
DataField field,
121129
ColumnStatistics stats,

paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSimpleStatsExtractor.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.apache.paimon.format.SimpleStatsExtractor;
2626
import org.apache.paimon.fs.FileIO;
2727
import org.apache.paimon.fs.Path;
28+
import org.apache.paimon.statistics.NoneSimpleColStatsCollector;
2829
import org.apache.paimon.statistics.SimpleColStatsCollector;
2930
import org.apache.paimon.types.DataField;
3031
import org.apache.paimon.types.DecimalType;
@@ -46,6 +47,7 @@
4647
import java.io.IOException;
4748
import java.math.BigDecimal;
4849
import java.math.BigInteger;
50+
import java.util.Arrays;
4951
import java.util.Map;
5052
import java.util.stream.IntStream;
5153

@@ -91,6 +93,12 @@ public Pair<SimpleColStats[], FileInfo> extractWithFileInfo(
9193
statsPair.getRight());
9294
}
9395

96+
@Override
97+
public boolean isStatsDisabled() {
98+
return Arrays.stream(SimpleColStatsCollector.create(statsCollectors))
99+
.allMatch(p -> p instanceof NoneSimpleColStatsCollector);
100+
}
101+
94102
private SimpleColStats toFieldStats(
95103
DataField field, Statistics<?> stats, SimpleColStatsCollector collector) {
96104
if (stats == null) {

0 commit comments

Comments
 (0)