-
Notifications
You must be signed in to change notification settings - Fork 19
Description
You have to provide the following information whenever possible.
Describe what's wrong
We meet a core in PlainByteArrayDecoder::DecodeCHDense caused by a simple sql that reads a string column from a parquet file.
core stack is as following
#0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
#1 0x00007fc6e86f0859 in __GI_abort () at abort.c:79
#2 0x00007fc6e875b26e in __libc_message (action=action@entry=do_abort, fmt=fmt@entry=0x7fc6e8885298 "%s\n")
at ../sysdeps/posix/libc_fatal.c:155
#3 0x00007fc6e87632fc in malloc_printerr (str=str@entry=0x7fc6e88835bb "realloc(): invalid next size")
at malloc.c:5347
#4 0x00007fc6e8766fac in _int_realloc (av=av@entry=0x7fc3ec000020, oldp=oldp@entry=0x7fc3ec7df380,
oldsize=oldsize@entry=1048592, nb=2097168) at malloc.c:4564
#5 0x00007fc6e8769156 in __GI___libc_realloc (oldmem=0x7fc3ec7df390, bytes=2097152) at malloc.c:3235
#6 0x00007fc4976579d2 in Allocator<false, false>::realloc (this=0x7fc3ec47f0e8, buf=0x7fc3ec7df390, old_size=1048576,
new_size=2097152, alignment=0) at ../src/Common/Allocator.h:134
#7 0x00007fc494dc601a in DB::PODArrayBase<1ul, 4096ul, Allocator<false, false>, 63ul, 64ul>::realloc<>(unsigned long)
(this=0x7fc3ec47f0e8, bytes=2097152) at ../src/Common/PODArray.h:171
#8 0x00007fc4979732a4 in DB::PODArray<char8_t, 4096ul, Allocator<false, false>, 63ul, 64ul>::emplace_back<char> (
this=0x7fc3ec47f0e8, args=<optimized out>) at ../src/Common/PODArray.h:453
#9 ch_parquet::(anonymous namespace)::PlainByteArrayDecoder::DecodeCHDense (num_values=<optimized out>,
column_chars_t_p=0x7fc3ec47f0e8, column_offsets_p=<optimized out>, bitmap_writer=..., this=<optimized out>,
null_count=<optimized out>, valid_bits=<optimized out>, valid_bits_offset=<optimized out>,
out_values_decoded=<optimized out>) at ../utils/local-engine/Storages/ch_parquet/arrow/encoding.cc:1427
#10 ch_parquet::(anonymous namespace)::PlainByteArrayDecoder::DecodeCH (this=0x7fc3ec484750,
num_values=<optimized out>, null_count=<optimized out>, valid_bits=<optimized out>,
valid_bits_offset=<optimized out>, column_chars_t_p=<optimized out>, column_offsets_p=0x7fc3ec47f0d0,
bitmap_writer=...) at ../utils/local-engine/Storages/ch_parquet/arrow/encoding.cc:1371
#11 0x00007fc49793c771 in ch_parquet::internal::(anonymous namespace)::CHByteArrayChunkedRecordReader::ReadValuesSpaced
(this=0x7fc3ec47d348, values_to_read=3600, null_count=0)
at ../utils/local-engine/Storages/ch_parquet/arrow/column_reader.cc:1810
#12 0x00007fc49793af1a in ch_parquet::internal::(anonymous namespace)::TypedRecordReader<parquet::PhysicalType<(parquet::Type::type)6> >::ReadRecordData (this=this@entry=0x7fc3ec47d348, num_records=<optimized out>, num_records@entry=4592)
at ../utils/local-engine/Storages/ch_parquet/arrow/column_reader.cc:1558
#13 0x00007fc497939662 in ch_parquet::internal::(anonymous namespace)::TypedRecordReader<parquet::PhysicalType<(parquet::Type::type)6> >::ReadRecords (this=0x7fc3ec47d348, num_records=8192)
at ../utils/local-engine/Storages/ch_parquet/arrow/column_reader.cc:1313
#14 0x00007fc4979168a8 in ch_parquet::arrow::(anonymous namespace)::LeafReader::LoadBatch (this=0x7fc3ec47e710,
records_to_read=8192) at ../utils/local-engine/Storages/ch_parquet/arrow/reader.cc:477
#15 0x00007fc497921a43 in ch_parquet::arrow::ColumnReaderImpl::NextBatch (this=0x7fc4150f56a0, batch_size=0,
out=0x7fc6e871100b <__GI_raise+203>) at ../utils/local-engine/Storages/ch_parquet/arrow/reader.cc:118
#16 0x00007fc49791b777 in ch_parquet::arrow::(anonymous namespace)::FileReaderImpl::GetRecordBatchReader(std::__1::vector<int, std::__1::allocator<int> > const&, std::__1::vector<int, std::__1::allocator<int> > const&, std::__1::unique_ptr<arrow::RecordBatchReader, std::__1::default_delete<arrow::RecordBatchReader> >*)::$_0::operator()()::{lambda(int)#1}::operator()(int) const (i=0, this=<optimized out>) at ../utils/local-engine/Storages/ch_parquet/arrow/reader.cc:1021
#17 arrow::internal::OptionalParallelFor<ch_parquet::arrow::(anonymous namespace)::FileReaderImpl::GetRecordBatchReader(std::__1::vector<int, std::__1::allocator<int> > const&, std::__1::vector<int, std::__1::allocator<int> > const&, std::__1::unique_ptr<arrow::RecordBatchReader, std::__1::default_delete<arrow::RecordBatchReader> >*)::$_0::operator()()::{lambda(int)#1}>(bool, int, ch_parquet::arrow::(anonymous namespace)::FileReaderImpl::GetRecordBatchReader(std::__1::vector<int, std::__1::allocator<int> > const&, std::__1::vector<int, std::__1::allocator<int> > const&, std::__1::unique_ptr<arrow::RecordBatchReader, std::__1::default_delete<arrow::RecordBatchReader> >*)::$_0::operator()()::{lambda(int)#1}&&, arrow::internal::Executor*) (num_tasks=1, executor=0x7fc3ec47dc80, use_threads=<optimized out>, func=...)
at ../contrib/arrow/cpp/src/arrow/util/parallel.h:75
#18 ch_parquet::arrow::(anonymous namespace)::FileReaderImpl::GetRecordBatchReader(std::__1::vector<int, std::__1::allocator<int> > const&, std::__1::vector<int, std::__1::allocator<int> > const&, std::__1::unique_ptr<arrow::RecordBatchReader, std::__1::default_delete<arrow::RecordBatchReader> >*)::$_0::operator()() (this=0x7fc3ec47f240)
at ../utils/local-engine/Storages/ch_parquet/arrow/reader.cc:1021
#19 arrow::FunctionIterator<ch_parquet::arrow::(anonymous namespace)::FileReaderImpl::GetRecordBatchReader(std::__1::vector<int, std::__1::allocator<int> > const&, std::__1::vector<int, std::__1::allocator<int> > const&, std::__1::unique_ptr<arrow::RecordBatchReader, std::__1::default_delete<arrow::RecordBatchReader> >*)::$_0, arrow::Iterator<std::__1::shared_ptr<arrow::RecordBatch> > >::Next() (this=0x7fc3ec47f240) at ../contrib/arrow/cpp/src/arrow/util/iterator.h:346
#20 arrow::Iterator<arrow::Iterator<std::__1::shared_ptr<arrow::RecordBatch> > >::Next<arrow::FunctionIterator<ch_parquet::arrow::(anonymous namespace)::FileReaderImpl::GetRecordBatchReader(std::__1::vector<int, std::__1::allocator<int> > const&, std::__1::vector<int, std::__1::allocator<int> > const&, std::__1::unique_ptr<arrow::RecordBatchReader, std::__1::default_delete<arrow::RecordBatchReader> >*)::$_0, arrow::Iterator<std::__1::shared_ptr<arrow::RecordBatch> > > >(void*) (ptr=0x7fc3ec47f240) at ../contrib/arrow/cpp/src/arrow/util/iterator.h:200
#21 0x00007fc497926b6f in arrow::Iterator<arrow::Iterator<std::__1::shared_ptr<arrow::RecordBatch> > >::Next (
this=0x7fc3ec47d100) at ../contrib/arrow/cpp/src/arrow/util/iterator.h:110
#22 arrow::FlattenIterator<std::__1::shared_ptr<arrow::RecordBatch> >::Next (this=0x7fc3ec47d100)
at ../contrib/arrow/cpp/src/arrow/util/iterator.h:530
#23 0x00007fc497926ae9 in arrow::Iterator<std::__1::shared_ptr<arrow::RecordBatch> >::Next<arrow::FlattenIterator<std::__1::shared_ptr<arrow::RecordBatch> > > (ptr=0x7fc4150f56a0) at ../contrib/arrow/cpp/src/arrow/util/iterator.h:200
#24 0x00007fc49791b0c1 in arrow::Iterator<std::__1::shared_ptr<arrow::RecordBatch> >::Next (this=0x8)
at ../contrib/arrow/cpp/src/arrow/util/iterator.h:110
#25 ch_parquet::arrow::(anonymous namespace)::RowGroupRecordBatchReader::ReadNext (this=0x0, out=0x0)
at ../utils/local-engine/Storages/ch_parquet/arrow/reader.cc:386
#26 0x00007fc494d7e3c2 in arrow::RecordBatchReader::Next (this=0x7fc4150f56a0)
at ../contrib/arrow/cpp/src/arrow/record_batch.h:222
#27 0x00007fc494d7da2b in local_engine::ArrowParquetBlockInputFormat::generate (this=0x7fc3ec4757a8)
at ../utils/local-engine/Storages/ArrowParquetBlockInputFormat.cpp:74
#28 0x00007fc49edfaff5 in DB::ISource::tryGenerate (this=0x7fc4150f56a0) at ../src/Processors/ISource.cpp:124
#29 0x00007fc49edfac85 in DB::ISource::work (this=0x7fc3ec4757a8) at ../src/Processors/ISource.cpp:94
--Type <RET> for more, q to quit, c to continue without paging--
#30 0x00007fc49ee11f20 in DB::executeJob (node=0x7fc3ec485970, read_progress_callback=0x7fc3ec4868b0)
at ../src/Processors/Executors/ExecutionThreadContext.cpp:47
#31 DB::ExecutionThreadContext::executeTask (this=0x7fc3ec36eea0)
at ../src/Processors/Executors/ExecutionThreadContext.cpp:92
#32 0x00007fc49ee086db in DB::PipelineExecutor::executeStepImpl (this=this@entry=0x7fc3ec36d4c8,
thread_num=thread_num@entry=0, yield_flag=yield_flag@entry=0x7fc3ec373310)
at ../src/Processors/Executors/PipelineExecutor.cpp:229
#33 0x00007fc49ee08229 in DB::PipelineExecutor::executeStep (this=0x7fc3ec36d4c8, yield_flag=0x7fc3ec373310)
at ../src/Processors/Executors/PipelineExecutor.cpp:126
#34 0x00007fc49ee1594a in DB::PullingPipelineExecutor::pull (this=0x7fc3ec373310, chunk=...)
at ../src/Processors/Executors/PullingPipelineExecutor.cpp:54
#35 0x00007fc497a00968 in local_engine::NormalFileReader::pull (this=0x7fc3ec36e4f0, chunk=...)
at ../utils/local-engine/Storages/SubstraitSource/SubstraitFileSource.cpp:368
#36 0x00007fc4979fdf47 in local_engine::SubstraitFileSource::generate (this=0x7fc3ec35e6a8)
at ../utils/local-engine/Storages/SubstraitSource/SubstraitFileSource.cpp:108
#37 0x00007fc49edfaff5 in DB::ISource::tryGenerate (this=0x7fc4150f56a0) at ../src/Processors/ISource.cpp:124
#38 0x00007fc49edfac85 in DB::ISource::work (this=0x7fc3ec35e6a8) at ../src/Processors/ISource.cpp:94
#39 0x00007fc49ee11f20 in DB::executeJob (node=0x7fc3ec36ba30, read_progress_callback=0x7fc3ec36a550)
at ../src/Processors/Executors/ExecutionThreadContext.cpp:47
#40 DB::ExecutionThreadContext::executeTask (this=0x7fc3ec36e3b0)
at ../src/Processors/Executors/ExecutionThreadContext.cpp:92
#41 0x00007fc49ee086db in DB::PipelineExecutor::executeStepImpl (this=this@entry=0x7fc3ec36b658,
thread_num=thread_num@entry=0, yield_flag=yield_flag@entry=0x7fc3ec367530)
at ../src/Processors/Executors/PipelineExecutor.cpp:229
#42 0x00007fc49ee08229 in DB::PipelineExecutor::executeStep (this=0x7fc3ec36b658, yield_flag=0x7fc3ec367530)
at ../src/Processors/Executors/PipelineExecutor.cpp:126
#43 0x00007fc49ee1594a in DB::PullingPipelineExecutor::pull (this=0x7fc3ec367530, chunk=...)
at ../src/Processors/Executors/PullingPipelineExecutor.cpp:54
#44 0x00007fc49ee15b30 in DB::PullingPipelineExecutor::pull (this=0x2, block=...)
at ../src/Processors/Executors/PullingPipelineExecutor.cpp:65
#45 0x00007fc494d2ed10 in local_engine::LocalExecutor::hasNext (this=0x7fc3ec368b90)
at ../utils/local-engine/Parser/SerializedPlanParser.cpp:2656
#46 0x00007fc4950f8c97 in Java_io_glutenproject_vectorized_BatchIterator_nativeHasNext (env=0x7fc538041a58,
executor_address=0) at ../utils/local-engine/local_engine_jni.cpp:258
#47 0x00007fc6d1018427 in ?? ()
#48 0x00007fc4150f6ce8 in ?? ()
#49 0x00007fc6d1018187 in ?? ()
#50 0x00007fc6d1018142 in ?? ()
#51 0x00007fc4150f6ca8 in ?? ()
#52 0x00007fc544bece20 in ?? ()
#53 0x00007fc4150f6d18 in ?? ()
#54 0x00007fc3eb817520 in ?? ()
#55 0x0000000000000000 in ?? ()
Should we use the parquet reader in CH instead?
A clear and concise description of what works not as it is supposed to.
A link to reproducer in https://fiddle.clickhouse.com/.
Does it reproduce on recent release?
Enable crash reporting
If possible, change "enabled" to true in "send_crash_reports" section in
config.xml:
<send_crash_reports>
<!-- Changing <enabled> to true allows sending crash reports to -->
<!-- the ClickHouse core developers team via Sentry https://sentry.io -->
<enabled>false</enabled>
How to reproduce
I think this is related to specified test data set.
Expected behavior
A clear and concise description of what you expected to happen.
Error message and/or stacktrace
If applicable, add screenshots to help explain your problem.
Additional context
Add any other context about the problem here.