Skip to content

Commit 7fc2421

Browse files
xiaodouchenmeta-codesync[bot]
authored andcommitted
fix: Throw error for unsupported compression kinds in TextReader (facebookincubator#15316)
Summary: This PR fixes an issue where TextReader would produce garbled output when trying to read files with unsupported compression formats (like SNAPPY, LZO, LZ4). Instead of failing silently, TextReader will now check the compression kind upon initialization and throw an explicit exception if it is not supported. This prevents data corruption and provides clear feedback to the user. This PR fixes facebookincubator#15314. Pull Request resolved: facebookincubator#15316 Reviewed By: kevinwilfong Differential Revision: D85881726 Pulled By: zacw7 fbshipit-source-id: b08920b030dcc5d50ac1b9bcfb0a67a6fdd1b417
1 parent 853c625 commit 7fc2421

File tree

5 files changed

+33
-0
lines changed

5 files changed

+33
-0
lines changed

velox/dwio/text/reader/TextReader.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ using folly::StringPiece;
3535
constexpr const char* kTextfileCompressionExtensionGzip = ".gz";
3636
constexpr const char* kTextfileCompressionExtensionDeflate = ".deflate";
3737
constexpr const char* kTextfileCompressionExtensionZst = ".zst";
38+
constexpr const char* kTextfileCompressionExtensionLz4 = ".lz4";
39+
constexpr const char* kTextfileCompressionExtensionLzo = ".lzo";
40+
constexpr const char* kTextfileCompressionExtensionSnappy = ".snappy";
3841

3942
static std::string emptyString = std::string();
4043

@@ -101,6 +104,11 @@ void setCompressionSettings(
101104
const std::string& filename,
102105
CompressionKind& kind,
103106
dwio::common::compression::CompressionOptions& compressionOptions) {
107+
if (endsWith(filename, kTextfileCompressionExtensionLz4) ||
108+
endsWith(filename, kTextfileCompressionExtensionLzo) ||
109+
endsWith(filename, kTextfileCompressionExtensionSnappy)) {
110+
VELOX_FAIL("Unsupported compression extension for file: {}", filename);
111+
}
104112
if (endsWith(filename, kTextfileCompressionExtensionGzip)) {
105113
kind = CompressionKind::CompressionKind_GZIP;
106114
compressionOptions.format.zlib.windowBits =

velox/dwio/text/tests/reader/TextReaderTest.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1910,6 +1910,31 @@ INSTANTIATE_TEST_SUITE_P(
19101910
testing::ValuesIn(params),
19111911
[](const auto& paramInfo) { return paramInfo.param.compression; });
19121912

1913+
TEST_F(TextReaderTest, unsupportedCompressedKind) {
1914+
auto type = ROW(
1915+
{{"col_string", VARCHAR()},
1916+
{"col_int", INTEGER()},
1917+
{"col_float", DOUBLE()},
1918+
{"col_bool", BOOLEAN()}});
1919+
auto factory = dwio::common::getReaderFactory(dwio::common::FileFormat::TEXT);
1920+
const std::string kBaseDir = "velox/dwio/text/tests/reader/";
1921+
std::vector paths = {
1922+
getDataFilePath(kBaseDir, "examples/simple_types_compressed_file.lz4"),
1923+
getDataFilePath(kBaseDir, "examples/simple_types_compressed_file.lzo"),
1924+
getDataFilePath(
1925+
kBaseDir, "examples/simple_types_compressed_file.snappy")};
1926+
for (const auto& path : paths) {
1927+
auto readFile = std::make_shared<LocalReadFile>(path);
1928+
auto readerOptions = dwio::common::ReaderOptions(pool());
1929+
readerOptions.setFileSchema(type);
1930+
auto input =
1931+
std::make_unique<dwio::common::BufferedInput>(readFile, poolRef());
1932+
EXPECT_THROW(
1933+
factory->createReader(std::move(input), readerOptions),
1934+
VeloxRuntimeError);
1935+
}
1936+
}
1937+
19131938
} // namespace
19141939

19151940
} // namespace facebook::velox::text
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)