Skip to content

Commit

Permalink
GH-45048: [C++][Parquet] Deprecate unused chunk_size parameter in `…
Browse files Browse the repository at this point in the history
…parquet::arrow::FileWriter::NewRowGroup()` (#45088)

### Rationale for this change

Just noticed that the implementation doesn't use the parameter.

### What changes are included in this PR?

Remove the parameter from `NewRowGroup()`

### Are these changes tested?

### Are there any user-facing changes?

The `chunk_size` parameter is now deprecated.

* GitHub Issue: #45048

Lead-authored-by: Krisztian Szucs <[email protected]>
Co-authored-by: Raúl Cumplido <[email protected]>
Co-authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Raúl Cumplido <[email protected]>
  • Loading branch information
3 people authored Jan 15, 2025
1 parent 3b932bb commit d7dc586
Show file tree
Hide file tree
Showing 8 changed files with 21 additions and 22 deletions.
7 changes: 2 additions & 5 deletions c_glib/parquet-glib/arrow-file-writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,6 @@ gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
/**
* gparquet_arrow_file_writer_new_row_group:
* @writer: A #GParquetArrowFileWriter.
* @chunk_size: The max number of rows in a row group.
* @error: (nullable): Return location for a #GError or %NULL.
*
* Start a new row group.
Expand All @@ -584,13 +583,11 @@ gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
* Since: 18.0.0
*/
gboolean
gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
gsize chunk_size,
GError **error)
gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer, GError **error)
{
auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
return garrow::check(error,
parquet_arrow_file_writer->NewRowGroup(chunk_size),
parquet_arrow_file_writer->NewRowGroup(),
"[parquet][arrow][file-writer][new-row-group]");
}

Expand Down
4 changes: 1 addition & 3 deletions c_glib/parquet-glib/arrow-file-writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,7 @@ gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,

GPARQUET_AVAILABLE_IN_18_0
gboolean
gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
gsize chunk_size,
GError **error);
gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer, GError **error);

GPARQUET_AVAILABLE_IN_18_0
gboolean
Expand Down
4 changes: 2 additions & 2 deletions c_glib/test/parquet/test-arrow-file-writer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,10 @@ def test_write_table
def test_write_chunked_array
schema = build_schema("enabled" => :boolean)
writer = Parquet::ArrowFileWriter.new(schema, @file.path)
writer.new_row_group(2)
writer.new_row_group
chunked_array = Arrow::ChunkedArray.new([build_boolean_array([true, nil])])
writer.write_chunked_array(chunked_array)
writer.new_row_group(1)
writer.new_row_group
chunked_array = Arrow::ChunkedArray.new([build_boolean_array([false])])
writer.write_chunked_array(chunked_array)
writer.close
Expand Down
3 changes: 1 addition & 2 deletions cpp/src/arrow/dataset/file_parquet_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,14 @@ class ParquetFormatHelper {
static Status WriteRecordBatch(const RecordBatch& batch,
parquet::arrow::FileWriter* writer) {
auto schema = batch.schema();
auto size = batch.num_rows();

if (!schema->Equals(*writer->schema(), false)) {
return Status::Invalid("RecordBatch schema does not match this writer's. batch:'",
schema->ToString(), "' this:'", writer->schema()->ToString(),
"'");
}

RETURN_NOT_OK(writer->NewRowGroup(size));
RETURN_NOT_OK(writer->NewRowGroup());
for (int i = 0; i < batch.num_columns(); i++) {
RETURN_NOT_OK(writer->WriteColumnChunk(*batch.column(i)));
}
Expand Down
10 changes: 5 additions & 5 deletions cpp/src/parquet/arrow/arrow_reader_writer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -739,7 +739,7 @@ class ParquetIOTestBase : public ::testing::Test {
ASSERT_OK_NO_THROW(FileWriter::Make(::arrow::default_memory_pool(),
MakeWriter(schema), arrow_schema,
default_arrow_writer_properties(), &writer));
ASSERT_OK_NO_THROW(writer->NewRowGroup(values->length()));
ASSERT_OK_NO_THROW(writer->NewRowGroup());
ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*values));
ASSERT_OK_NO_THROW(writer->Close());
// writer->Close() should be idempotent
Expand Down Expand Up @@ -1053,7 +1053,7 @@ TYPED_TEST(TestParquetIO, SingleColumnRequiredChunkedWrite) {
this->MakeWriter(schema), arrow_schema,
default_arrow_writer_properties(), &writer));
for (int i = 0; i < 4; i++) {
ASSERT_OK_NO_THROW(writer->NewRowGroup(chunk_size));
ASSERT_OK_NO_THROW(writer->NewRowGroup());
std::shared_ptr<Array> sliced_array = values->Slice(i * chunk_size, chunk_size);
ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*sliced_array));
}
Expand Down Expand Up @@ -1126,7 +1126,7 @@ TYPED_TEST(TestParquetIO, SingleColumnOptionalChunkedWrite) {
this->MakeWriter(schema), arrow_schema,
default_arrow_writer_properties(), &writer));
for (int i = 0; i < 4; i++) {
ASSERT_OK_NO_THROW(writer->NewRowGroup(chunk_size));
ASSERT_OK_NO_THROW(writer->NewRowGroup());
std::shared_ptr<Array> sliced_array = values->Slice(i * chunk_size, chunk_size);
ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*sliced_array));
}
Expand Down Expand Up @@ -5149,7 +5149,7 @@ class TestIntegerAnnotateDecimalTypeParquetIO : public TestParquetIO<TestType> {
::arrow::default_memory_pool(),
ParquetFileWriter::Open(this->sink_, schema_node, writer_properties),
arrow_schema, default_arrow_writer_properties(), &writer));
ASSERT_OK_NO_THROW(writer->NewRowGroup(values->length()));
ASSERT_OK_NO_THROW(writer->NewRowGroup());
ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*values));
ASSERT_OK_NO_THROW(writer->Close());
}
Expand Down Expand Up @@ -5481,7 +5481,7 @@ TEST(TestArrowReadWrite, OperationsOnClosedWriter) {
// Operations on closed writer are invalid
ASSERT_OK(writer->Close());

ASSERT_RAISES(Invalid, writer->NewRowGroup(1));
ASSERT_RAISES(Invalid, writer->NewRowGroup());
ASSERT_RAISES(Invalid, writer->WriteColumnChunk(table->column(0), 0, 1));
ASSERT_RAISES(Invalid, writer->NewBufferedRowGroup());
ASSERT_OK_AND_ASSIGN(auto record_batch, table->CombineChunksToBatch());
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/parquet/arrow/writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ class FileWriterImpl : public FileWriter {
default_arrow_reader_properties(), &schema_manifest_);
}

Status NewRowGroup(int64_t chunk_size) override {
Status NewRowGroup() override {
RETURN_NOT_OK(CheckClosed());
if (row_group_writer_ != nullptr) {
PARQUET_CATCH_NOT_OK(row_group_writer_->Close());
Expand Down Expand Up @@ -379,7 +379,7 @@ class FileWriterImpl : public FileWriter {
}

auto WriteRowGroup = [&](int64_t offset, int64_t size) {
RETURN_NOT_OK(NewRowGroup(size));
RETURN_NOT_OK(NewRowGroup());
for (int i = 0; i < table.num_columns(); i++) {
RETURN_NOT_OK(WriteColumnChunk(table.column(i), offset, size));
}
Expand Down
9 changes: 7 additions & 2 deletions cpp/src/parquet/arrow/writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,14 @@ class PARQUET_EXPORT FileWriter {
/// \brief Start a new row group.
///
/// Returns an error if not all columns have been written.
virtual ::arrow::Status NewRowGroup() = 0;

/// \brief Start a new row group.
///
/// \param chunk_size the number of rows in the next row group.
virtual ::arrow::Status NewRowGroup(int64_t chunk_size) = 0;
/// \deprecated Deprecated in 19.0.0.
ARROW_DEPRECATED(
"Deprecated in 19.0.0. Use NewRowGroup() without the `chunk_size` argument.")
virtual ::arrow::Status NewRowGroup(int64_t chunk_size) { return NewRowGroup(); }

/// \brief Write ColumnChunk in row group using an array.
virtual ::arrow::Status WriteColumnChunk(const ::arrow::Array& data) = 0;
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/_parquet.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,7 @@ cdef extern from "parquet/arrow/writer.h" namespace "parquet::arrow" nogil:
const shared_ptr[ArrowWriterProperties]& arrow_properties)

CStatus WriteTable(const CTable& table, int64_t chunk_size)
CStatus NewRowGroup(int64_t chunk_size)
CStatus NewRowGroup()
CStatus Close()
CStatus AddKeyValueMetadata(const shared_ptr[const CKeyValueMetadata]& key_value_metadata)

Expand Down

0 comments on commit d7dc586

Please sign in to comment.