From ce05e8c36816993ea218a547c49051f1c42c4f02 Mon Sep 17 00:00:00 2001 From: Bharat Nallan Chakravarthy Date: Wed, 16 Apr 2025 22:58:42 -0700 Subject: [PATCH] remove table validation checks Remove validation checks on table. The validation checks mainly fail for debug builds and what they do is to actually validate the metadata for a table. While dealing with indices for dictionaries, it looks like for compatibility, we accept signed or unsigned uint32 or uint64 integers. However, arrow dictionary builder AppendIndices only accepts signed integers. So, we end up appending signed integers while dictionary schema could still be unsigned integers. So, when the validation happens, it checks if the dictionary schema and the actual column data are the same types. Since it's possible to have uint64 schema with int64 columns, this leads to errors like (and similar error for uint32): ``` Column data for field 0 with type dictionary is inconsistent with schema dictionary ``` --- cpp/src/arrow/table.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc index 5dc5e4c1a9a8c..967e78f6b4db1 100644 --- a/cpp/src/arrow/table.cc +++ b/cpp/src/arrow/table.cc @@ -619,7 +619,6 @@ TableBatchReader::TableBatchReader(const Table& table) for (int i = 0; i < table.num_columns(); ++i) { column_data_[i] = table.column(i).get(); } - DCHECK(table_.Validate().ok()); } TableBatchReader::TableBatchReader(std::shared_ptr table) @@ -633,7 +632,6 @@ TableBatchReader::TableBatchReader(std::shared_ptr
table) for (int i = 0; i < owned_table_->num_columns(); ++i) { column_data_[i] = owned_table_->column(i).get(); } - DCHECK(table_.Validate().ok()); } std::shared_ptr TableBatchReader::schema() const { return table_.schema(); }