Skip to content

Commit

Permalink
Add plain test
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo committed Aug 27, 2024
1 parent 5f9bd7d commit 5b10d09
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 93 deletions.
3 changes: 1 addition & 2 deletions velox/dwio/common/IntDecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -444,9 +444,8 @@ inline T IntDecoder<isSigned>::readInt() {
if (numBytes == 12) {
VELOX_DCHECK(!useVInts, "Int96 should not be VInt encoded.");
return readInt96();
} else {
VELOX_NYI();
}
VELOX_NYI();
}
return readLongLE();
}
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
162 changes: 71 additions & 91 deletions velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,72 @@ class ParquetTableScanTest : public HiveConnectorTestBase {
writer->close();
}

void testInt96TimestampRead(const std::string& fileName) {
// Timestamp-int96.parquet holds one column (t: TIMESTAMP) and
// 10 rows in one row group. Data is in SNAPPY compressed format.
// The values are:
// |t |
// +-------------------+
// |2015-06-01 19:34:56|
// |2015-06-02 19:34:56|
// |2001-02-03 03:34:06|
// |1998-03-01 08:01:06|
// |2022-12-23 03:56:01|
// |1980-01-24 00:23:07|
// |1999-12-08 13:39:26|
// |2023-04-21 09:09:34|
// |2000-09-12 22:36:29|
// |2007-12-12 04:27:56|
// +-------------------+
auto vector = makeFlatVector<Timestamp>(
{Timestamp(1433187296, 0),
Timestamp(1433273696, 0),
Timestamp(981171246, 0),
Timestamp(888739266, 0),
Timestamp(1671767761, 0),
Timestamp(317521387, 0),
Timestamp(944660366, 0),
Timestamp(1682068174, 0),
Timestamp(968798189, 0),
Timestamp(1197433676, 0)});

loadData(
getExampleFilePath(fileName),
ROW({"t"}, {TIMESTAMP()}),
makeRowVector(
{"t"},
{
vector,
}));

assertSelectWithFilter({"t"}, {}, "", "SELECT t from tmp");
assertSelectWithFilter(
{"t"},
{},
"t < TIMESTAMP '2000-09-12 22:36:29'",
"SELECT t from tmp where t < TIMESTAMP '2000-09-12 22:36:29'");
assertSelectWithFilter(
{"t"},
{},
"t <= TIMESTAMP '2000-09-12 22:36:29'",
"SELECT t from tmp where t <= TIMESTAMP '2000-09-12 22:36:29'");
assertSelectWithFilter(
{"t"},
{},
"t > TIMESTAMP '1980-01-24 00:23:07'",
"SELECT t from tmp where t > TIMESTAMP '1980-01-24 00:23:07'");
assertSelectWithFilter(
{"t"},
{},
"t >= TIMESTAMP '1980-01-24 00:23:07'",
"SELECT t from tmp where t >= TIMESTAMP '1980-01-24 00:23:07'");
assertSelectWithFilter(
{"t"},
{},
"t == TIMESTAMP '2022-12-23 03:56:01'",
"SELECT t from tmp where t == TIMESTAMP '2022-12-23 03:56:01'");
}

private:
RowTypePtr getRowType(std::vector<std::string>&& outputColumnNames) const {
std::vector<TypePtr> types;
Expand Down Expand Up @@ -719,70 +785,12 @@ TEST_F(ParquetTableScanTest, sessionTimezone) {
assertSelectWithTimezone({"a"}, "SELECT a FROM tmp", "Asia/Shanghai");
}

TEST_F(ParquetTableScanTest, timestampFilter) {
// Timestamp-int96.parquet holds one column (t: TIMESTAMP) and
// 10 rows in one row group. Data is in SNAPPY compressed format.
// The values are:
// |t |
// +-------------------+
// |2015-06-01 19:34:56|
// |2015-06-02 19:34:56|
// |2001-02-03 03:34:06|
// |1998-03-01 08:01:06|
// |2022-12-23 03:56:01|
// |1980-01-24 00:23:07|
// |1999-12-08 13:39:26|
// |2023-04-21 09:09:34|
// |2000-09-12 22:36:29|
// |2007-12-12 04:27:56|
// +-------------------+
auto vector = makeFlatVector<Timestamp>(
{Timestamp(1433187296, 0),
Timestamp(1433273696, 0),
Timestamp(981171246, 0),
Timestamp(888739266, 0),
Timestamp(1671767761, 0),
Timestamp(317521387, 0),
Timestamp(944660366, 0),
Timestamp(1682068174, 0),
Timestamp(968798189, 0),
Timestamp(1197433676, 0)});

loadData(
getExampleFilePath("timestamp_int96.parquet"),
ROW({"t"}, {TIMESTAMP()}),
makeRowVector(
{"t"},
{
vector,
}));
TEST_F(ParquetTableScanTest, timestampInt96Dictionary) {
testInt96TimestampRead("timestamp_int96_dictionary.parquet");
}

assertSelectWithFilter({"t"}, {}, "", "SELECT t from tmp");
assertSelectWithFilter(
{"t"},
{},
"t < TIMESTAMP '2000-09-12 22:36:29'",
"SELECT t from tmp where t < TIMESTAMP '2000-09-12 22:36:29'");
assertSelectWithFilter(
{"t"},
{},
"t <= TIMESTAMP '2000-09-12 22:36:29'",
"SELECT t from tmp where t <= TIMESTAMP '2000-09-12 22:36:29'");
assertSelectWithFilter(
{"t"},
{},
"t > TIMESTAMP '1980-01-24 00:23:07'",
"SELECT t from tmp where t > TIMESTAMP '1980-01-24 00:23:07'");
assertSelectWithFilter(
{"t"},
{},
"t >= TIMESTAMP '1980-01-24 00:23:07'",
"SELECT t from tmp where t >= TIMESTAMP '1980-01-24 00:23:07'");
assertSelectWithFilter(
{"t"},
{},
"t == TIMESTAMP '2022-12-23 03:56:01'",
"SELECT t from tmp where t == TIMESTAMP '2022-12-23 03:56:01'");
TEST_F(ParquetTableScanTest, timestampInt96Plain) {
testInt96TimestampRead("timestamp_int96_plain.parquet");
}

TEST_F(ParquetTableScanTest, timestampPrecisionMicrosecond) {
Expand Down Expand Up @@ -836,34 +844,6 @@ TEST_F(ParquetTableScanTest, timestampPrecisionMicrosecond) {
assertEqualResults({expected}, result.second);
}


TEST_F(ParquetTableScanTest, timestampINT96) {
auto a = makeFlatVector<Timestamp>({Timestamp(1, 0), Timestamp(2, 0)});
auto expected = makeRowVector({"time"}, {a});
createDuckDbTable("expected", {expected});

auto vector = makeArrayVector<Timestamp>({{}});
loadData(
getExampleFilePath("timestamp_dict_int96.parquet"),
ROW({"time"}, {TIMESTAMP()}),
makeRowVector(
{"time"},
{
vector,
}));
assertSelect({"time"}, "SELECT time from expected");

loadData(
getExampleFilePath("timestamp_plain_int96.parquet"),
ROW({"time"}, {TIMESTAMP()}),
makeRowVector(
{"time"},
{
vector,
}));
assertSelect({"time"}, "SELECT time from expected");
}

int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
folly::Init init{&argc, &argv, false};
Expand Down

0 comments on commit 5b10d09

Please sign in to comment.