Skip to content

Commit 92231ca

Browse files
Update orc_proto.proto
1 parent 6c85d4e commit 92231ca

1 file changed

Lines changed: 23 additions & 0 deletions

File tree

format/orc_proto.proto

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,12 +100,24 @@ message ColumnStatistics {
100100
optional CollectionStatistics collectionStatistics = 12;
101101
}
102102

103+
message RowIndexEntry {
104+
repeated uint64 positions = 1 [packed=true];
105+
optional ColumnStatistics statistics = 2;
106+
}
107+
108+
message RowIndex {
109+
repeated RowIndexEntry entry = 1;
110+
}
111+
103112
message BloomFilter {
104113
optional uint32 numHashFunctions = 1;
105114
repeated fixed64 bitset = 2;
106115
optional bytes utf8bitset = 3;
107116
}
108117

118+
message BloomFilterIndex {
119+
repeated BloomFilter bloomFilter = 1;
120+
}
109121

110122
message Stream {
111123
// if you add new index stream kinds, you need to make sure to update
@@ -258,12 +270,23 @@ message Metadata {
258270
repeated StripeStatistics stripeStats = 1;
259271
}
260272

273+
// In ORC v2 (and for encrypted columns in v1), each column has
274+
// their column statistics written separately.
275+
message ColumnarStripeStatistics {
276+
// one value for each stripe in the file
277+
repeated ColumnStatistics colStats = 1;
278+
}
279+
261280
enum EncryptionAlgorithm {
262281
UNKNOWN_ENCRYPTION = 0; // used for detecting future algorithms
263282
AES_CTR_128 = 1;
264283
AES_CTR_256 = 2;
265284
}
266285

286+
message FileStatistics {
287+
repeated ColumnStatistics column = 1;
288+
}
289+
267290
// How was the data masked? This isn't necessary for reading the file, but
268291
// is documentation about how the file was written.
269292
message DataMask {

0 commit comments

Comments
 (0)