Skip to content

Parquet argument error: Parquet error: encountered non UTF-8 data  #1300

Open
@jiacai2050

Description

Describe this problem

This error arise from one of our cluster, it seems we store non utf8 string into a string column.

2023-11-08 13:49:55.018 ERRO [analytic_engine/src/compaction/scheduler.rs:544] Failed to compact table, table_name:xxx, table_id:9196, request_id:80914480, err:Failed to write sst, file_path:0/9196/345089.sst, source:Failed to poll record batch, err:Failed to pull record batch, error:Failed to decode record batch, err:Meet a parquet error, err:Arrow: Parquet argument error: Parquet error: encountered non UTF-8 data
Backtrace:
 0 <snafu::backtrace_shim::Backtrace as snafu::GenerateBacktrace>::generate::h54b5cf46dc653acc
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/snafu-0.6.10/src/backtrace_shim.rs:15
   <analytic_engine::sst::reader::error::ParquetError as snafu::IntoError<analytic_engine::sst::reader::error::Error>>::into_error::h26ab79ad83d94414
   /home/db/ceresdb/analytic_engine/src/sst/reader.rs:27
   <core::result::Result<T,E> as snafu::ResultExt<T,E>>::with_context::{{closure}}::h502414bc5134fa38
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/snafu-0.6.10/src/lib.rs:329
   core::result::Result<T,E>::map_err::h8b5aae79bf4eeee7
   /rustc/8550f15e148407159af401e02b1d9259762b3496/library/core/src/result.rs:829
   <core::result::Result<T,E> as snafu::ResultExt<T,E>>::with_context::h5f0d22c6cfc4c9a7
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/snafu-0.6.10/src/lib.rs:327
   analytic_engine::sst::parquet::async_reader::Reader::fetch_record_batch_streams::{{closure}}::{{closure}}::h5032bb1589d30d49
   /home/db/ceresdb/analytic_engine/src/sst/parquet/async_reader.rs:338
   <T as futures_util::fns::FnMut1<A>>::call_mut::h4803b5271a79a10a
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/futures-util-0.3.28/src/fns.rs:28
   <futures_util::stream::stream::map::Map<St,F> as futures_core::stream::Stream>::poll_next::{{closure}}::h82eb17ff05aa3317
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/futures-util-0.3.28/src/stream/stream/map.rs:59
   core::option::Option<T>::map::h3aa96e39d04db90d
   /rustc/8550f15e148407159af401e02b1d9259762b3496/library/core/src/option.rs:1075
   <futures_util::stream::stream::map::Map<St,F> as futures_core::stream::Stream>::poll_next::h266d6f3961d47a67
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/futures-util-0.3.28/src/stream/stream/map.rs:59
 1 <core::pin::Pin<P> as futures_core::stream::Stream>::poll_next::h08e75d419af5e6e2
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/futures-core-0.3.28/src/stream.rs:120
   futures_util::stream::stream::StreamExt::poll_next_unpin::hfb1d654748c8b788
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/futures-util-0.3.28/src/stream/stream/mod.rs:1632
   <analytic_engine::sst::parquet::async_reader::RecordBatchProjector as futures_core::stream::Stream>::poll_next::hd2c15bdab7f57920
   /home/db/ceresdb/analytic_engine/src/sst/parquet/async_reader.rs:518
 2 futures_core::stream::if_alloc::<impl futures_core::stream::Stream for alloc::boxed::Box<S>>::poll_next::hac4396be82a57db3
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/futures-core-0.3.28/src/stream.rs:209
   futures_util::stream::stream::StreamExt::poll_next_unpin::hde73d86315dca273
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/futures-util-0.3.28/src/stream/stream/mod.rs:1632
   <futures_util::stream::stream::next::Next<St> as core::future::future::Future>::poll::h3d1cbbc2b1cd7585
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/futures-util-0.3.28/src/stream/stream/next.rs:32
   analytic_engine::sst::parquet::async_reader::ThreadedReader::read_record_batches_from_sub_reader::{{closure}}::h7c0d12422d5c273b
   /home/db/ceresdb/analytic_engine/src/sst/parquet/async_reader.rs:696
 3 tokio::runtime::task::core::Core<T,S>::poll::{{closure}}::h715e7d77b31c390f
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/task/core.rs:311
   tokio::loom::std::unsafe_cell::UnsafeCell<T>::with_mut::h58626b95480bd76a
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/loom/std/unsafe_cell.rs:14
   tokio::runtime::task::core::Core<T,S>::poll::ha524851086f3a884
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/task/core.rs:300
   tokio::runtime::task::harness::poll_future::{{closure}}::h696b10229d0c843d
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/task/harness.rs:476
   <core::panic::unwind_safe::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once::h979fc5db0783c4d6
   /rustc/8550f15e148407159af401e02b1d9259762b3496/library/core/src/panic/unwind_safe.rs:271
   std::panicking::try::do_call::hf8e3ba57652ca889
   /rustc/8550f15e148407159af401e02b1d9259762b3496/library/std/src/panicking.rs:524
   std::panicking::try::hd29ae04528e7a837
   /rustc/8550f15e148407159af401e02b1d9259762b3496/library/std/src/panicking.rs:488
   std::panic::catch_unwind::hb6125781bc368ede
   /rustc/8550f15e148407159af401e02b1d9259762b3496/library/std/src/panic.rs:142
   tokio::runtime::task::harness::poll_future::h49d00d55acef36c9
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/task/harness.rs:464
   tokio::runtime::task::harness::Harness<T,S>::poll_inner::hc56dbf0ca33e1ca3
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/task/harness.rs:198
   tokio::runtime::task::harness::Harness<T,S>::poll::hc6ac7853754be6ed
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/task/harness.rs:152
   tokio::runtime::task::raw::poll::ha92596021a564794
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/task/raw.rs:276
 4 tokio::runtime::task::raw::RawTask::poll::hb45f2fe81691c337
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/task/raw.rs:200
   tokio::runtime::task::LocalNotified<S>::run::h0536a90178ddee16
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/task/mod.rs:400
   tokio::runtime::scheduler::multi_thread::worker::Context::run_task::{{closure}}::h7d241c72fbddb052
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/scheduler/multi_thread/worker.rs:576
   tokio::runtime::coop::with_budget::hec65761cece85585
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/coop.rs:107
   tokio::runtime::coop::budget::h0d1a8787eff4d994
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/coop.rs:73
   tokio::runtime::scheduler::multi_thread::worker::Context::run_task::hc185bee783ef0f6a
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/scheduler/multi_thread/worker.rs:575
 5 tokio::runtime::scheduler::multi_thread::worker::Context::run::h58a2f09f23f395c4
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/scheduler/multi_thread/worker.rs:526
   tokio::runtime::scheduler::multi_thread::worker::run::{{closure}}::{{closure}}::h517dc94dae364c92
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/scheduler/multi_thread/worker.rs:491
   tokio::runtime::context::scoped::Scoped<T>::set::h36363213b4e04c2b
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/context/scoped.rs:40
   tokio::runtime::context::set_scheduler::{{closure}}::h37825f5042431ee1
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/context.rs:176
   std::thread::local::LocalKey<T>::try_with::hec479b72c81fd313
   /rustc/8550f15e148407159af401e02b1d9259762b3496/library/std/src/thread/local.rs:270
   std::thread::local::LocalKey<T>::with::he078dc0ac2b82166
   /rustc/8550f15e148407159af401e02b1d9259762b3496/library/std/src/thread/local.rs:246
   tokio::runtime::context::set_scheduler::hc5da922222919a15
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/context.rs:176
   tokio::runtime::scheduler::multi_thread::worker::run::{{closure}}::h779fc6adc01e47ce
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/scheduler/multi_thread/worker.rs:486
   tokio::runtime::context::runtime::enter_runtime::haab5099640c5426f
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/context/runtime.rs:65
   tokio::runtime::scheduler::multi_thread::worker::run::h79384f145d6d8416
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/scheduler/multi_thread/worker.rs:478
 6 tokio::runtime::scheduler::multi_thread::worker::Launch::launch::{{closure}}::h0cbe85cdf2846e78
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/scheduler/multi_thread/worker.rs:447
   <tokio::runtime::blocking::task::BlockingTask<T> as core::future::future::Future>::poll::he547489663e6982f
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/blocking/task.rs:42
   tokio::runtime::task::core::Core<T,S>::poll::{{closure}}::h50c9ca7f694a0561
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/task/core.rs:311
   tokio::loom::std::unsafe_cell::UnsafeCell<T>::with_mut::h06d3ff95963a525b
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/loom/std/unsafe_cell.rs:14
   tokio::runtime::task::core::Core<T,S>::poll::h4e7a6528a4c4f978
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/task/core.rs:300
   tokio::runtime::task::harness::poll_future::{{closure}}::h19a6006bf6f679c6
   /home/db/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.29.1/src/runtime/task/harness.rs:476
   <core::panic::unwind_safe::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once::h74e084ab7cc47154
   /rustc/8550f15e148407159af401e02b1d9259762b3496/library/core/src/panic/unwind_safe.rs:271
   std::panicking::try::do_call::hfd68c80fe0b41e09
   /rustc/8550f15e148407159af401e02b1d9259762b3496/library/std/src/panicking.rs:524
   std::panicking::try::h612ba8841f413452
   /rustc/8550f15e148407159af401e02b1d9259762b3496/library/std/src/panicking.rs:488
   std::panic::catch_unwind::hc305b19dce8f8392
   /rustc/8550f15e148407159af401e02b1d9259762b3496/library/std/src/panic.rs:142

Server version

All version.

Steps to reproduce

NA

Expected behavior

If there are non utf8 string, we should throw error when write, not compaction.

Additional Information

No response

Activity

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Metadata

Assignees

Labels

bugSomething isn't working

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions