Skip to content

Commit d7dc586

Browse files
kszucsraulcdkou
authored
GH-45048: [C++][Parquet] Deprecate unused chunk_size parameter in parquet::arrow::FileWriter::NewRowGroup() (#45088)
### Rationale for this change Just noticed that the implementation doesn't use the parameter. ### What changes are included in this PR? Remove the parameter from `NewRowGroup()` ### Are these changes tested? ### Are there any user-facing changes? The `chunk_size` parameter is now deprecated. * GitHub Issue: #45048 Lead-authored-by: Krisztian Szucs <[email protected]> Co-authored-by: Raúl Cumplido <[email protected]> Co-authored-by: Sutou Kouhei <[email protected]> Signed-off-by: Raúl Cumplido <[email protected]>
1 parent 3b932bb commit d7dc586

File tree

8 files changed

+21
-22
lines changed

8 files changed

+21
-22
lines changed

c_glib/parquet-glib/arrow-file-writer.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,6 @@ gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
574574
/**
575575
* gparquet_arrow_file_writer_new_row_group:
576576
* @writer: A #GParquetArrowFileWriter.
577-
* @chunk_size: The max number of rows in a row group.
578577
* @error: (nullable): Return location for a #GError or %NULL.
579578
*
580579
* Start a new row group.
@@ -584,13 +583,11 @@ gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
584583
* Since: 18.0.0
585584
*/
586585
gboolean
587-
gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
588-
gsize chunk_size,
589-
GError **error)
586+
gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer, GError **error)
590587
{
591588
auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
592589
return garrow::check(error,
593-
parquet_arrow_file_writer->NewRowGroup(chunk_size),
590+
parquet_arrow_file_writer->NewRowGroup(),
594591
"[parquet][arrow][file-writer][new-row-group]");
595592
}
596593

c_glib/parquet-glib/arrow-file-writer.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,9 +135,7 @@ gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
135135

136136
GPARQUET_AVAILABLE_IN_18_0
137137
gboolean
138-
gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
139-
gsize chunk_size,
140-
GError **error);
138+
gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer, GError **error);
141139

142140
GPARQUET_AVAILABLE_IN_18_0
143141
gboolean

c_glib/test/parquet/test-arrow-file-writer.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,10 @@ def test_write_table
8989
def test_write_chunked_array
9090
schema = build_schema("enabled" => :boolean)
9191
writer = Parquet::ArrowFileWriter.new(schema, @file.path)
92-
writer.new_row_group(2)
92+
writer.new_row_group
9393
chunked_array = Arrow::ChunkedArray.new([build_boolean_array([true, nil])])
9494
writer.write_chunked_array(chunked_array)
95-
writer.new_row_group(1)
95+
writer.new_row_group
9696
chunked_array = Arrow::ChunkedArray.new([build_boolean_array([false])])
9797
writer.write_chunked_array(chunked_array)
9898
writer.close

cpp/src/arrow/dataset/file_parquet_test.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,15 +85,14 @@ class ParquetFormatHelper {
8585
static Status WriteRecordBatch(const RecordBatch& batch,
8686
parquet::arrow::FileWriter* writer) {
8787
auto schema = batch.schema();
88-
auto size = batch.num_rows();
8988

9089
if (!schema->Equals(*writer->schema(), false)) {
9190
return Status::Invalid("RecordBatch schema does not match this writer's. batch:'",
9291
schema->ToString(), "' this:'", writer->schema()->ToString(),
9392
"'");
9493
}
9594

96-
RETURN_NOT_OK(writer->NewRowGroup(size));
95+
RETURN_NOT_OK(writer->NewRowGroup());
9796
for (int i = 0; i < batch.num_columns(); i++) {
9897
RETURN_NOT_OK(writer->WriteColumnChunk(*batch.column(i)));
9998
}

cpp/src/parquet/arrow/arrow_reader_writer_test.cc

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -739,7 +739,7 @@ class ParquetIOTestBase : public ::testing::Test {
739739
ASSERT_OK_NO_THROW(FileWriter::Make(::arrow::default_memory_pool(),
740740
MakeWriter(schema), arrow_schema,
741741
default_arrow_writer_properties(), &writer));
742-
ASSERT_OK_NO_THROW(writer->NewRowGroup(values->length()));
742+
ASSERT_OK_NO_THROW(writer->NewRowGroup());
743743
ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*values));
744744
ASSERT_OK_NO_THROW(writer->Close());
745745
// writer->Close() should be idempotent
@@ -1053,7 +1053,7 @@ TYPED_TEST(TestParquetIO, SingleColumnRequiredChunkedWrite) {
10531053
this->MakeWriter(schema), arrow_schema,
10541054
default_arrow_writer_properties(), &writer));
10551055
for (int i = 0; i < 4; i++) {
1056-
ASSERT_OK_NO_THROW(writer->NewRowGroup(chunk_size));
1056+
ASSERT_OK_NO_THROW(writer->NewRowGroup());
10571057
std::shared_ptr<Array> sliced_array = values->Slice(i * chunk_size, chunk_size);
10581058
ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*sliced_array));
10591059
}
@@ -1126,7 +1126,7 @@ TYPED_TEST(TestParquetIO, SingleColumnOptionalChunkedWrite) {
11261126
this->MakeWriter(schema), arrow_schema,
11271127
default_arrow_writer_properties(), &writer));
11281128
for (int i = 0; i < 4; i++) {
1129-
ASSERT_OK_NO_THROW(writer->NewRowGroup(chunk_size));
1129+
ASSERT_OK_NO_THROW(writer->NewRowGroup());
11301130
std::shared_ptr<Array> sliced_array = values->Slice(i * chunk_size, chunk_size);
11311131
ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*sliced_array));
11321132
}
@@ -5149,7 +5149,7 @@ class TestIntegerAnnotateDecimalTypeParquetIO : public TestParquetIO<TestType> {
51495149
::arrow::default_memory_pool(),
51505150
ParquetFileWriter::Open(this->sink_, schema_node, writer_properties),
51515151
arrow_schema, default_arrow_writer_properties(), &writer));
5152-
ASSERT_OK_NO_THROW(writer->NewRowGroup(values->length()));
5152+
ASSERT_OK_NO_THROW(writer->NewRowGroup());
51535153
ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*values));
51545154
ASSERT_OK_NO_THROW(writer->Close());
51555155
}
@@ -5481,7 +5481,7 @@ TEST(TestArrowReadWrite, OperationsOnClosedWriter) {
54815481
// Operations on closed writer are invalid
54825482
ASSERT_OK(writer->Close());
54835483

5484-
ASSERT_RAISES(Invalid, writer->NewRowGroup(1));
5484+
ASSERT_RAISES(Invalid, writer->NewRowGroup());
54855485
ASSERT_RAISES(Invalid, writer->WriteColumnChunk(table->column(0), 0, 1));
54865486
ASSERT_RAISES(Invalid, writer->NewBufferedRowGroup());
54875487
ASSERT_OK_AND_ASSIGN(auto record_batch, table->CombineChunksToBatch());

cpp/src/parquet/arrow/writer.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ class FileWriterImpl : public FileWriter {
305305
default_arrow_reader_properties(), &schema_manifest_);
306306
}
307307

308-
Status NewRowGroup(int64_t chunk_size) override {
308+
Status NewRowGroup() override {
309309
RETURN_NOT_OK(CheckClosed());
310310
if (row_group_writer_ != nullptr) {
311311
PARQUET_CATCH_NOT_OK(row_group_writer_->Close());
@@ -379,7 +379,7 @@ class FileWriterImpl : public FileWriter {
379379
}
380380

381381
auto WriteRowGroup = [&](int64_t offset, int64_t size) {
382-
RETURN_NOT_OK(NewRowGroup(size));
382+
RETURN_NOT_OK(NewRowGroup());
383383
for (int i = 0; i < table.num_columns(); i++) {
384384
RETURN_NOT_OK(WriteColumnChunk(table.column(i), offset, size));
385385
}

cpp/src/parquet/arrow/writer.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,14 @@ class PARQUET_EXPORT FileWriter {
8787
/// \brief Start a new row group.
8888
///
8989
/// Returns an error if not all columns have been written.
90+
virtual ::arrow::Status NewRowGroup() = 0;
91+
92+
/// \brief Start a new row group.
9093
///
91-
/// \param chunk_size the number of rows in the next row group.
92-
virtual ::arrow::Status NewRowGroup(int64_t chunk_size) = 0;
94+
/// \deprecated Deprecated in 19.0.0.
95+
ARROW_DEPRECATED(
96+
"Deprecated in 19.0.0. Use NewRowGroup() without the `chunk_size` argument.")
97+
virtual ::arrow::Status NewRowGroup(int64_t chunk_size) { return NewRowGroup(); }
9398

9499
/// \brief Write ColumnChunk in row group using an array.
95100
virtual ::arrow::Status WriteColumnChunk(const ::arrow::Array& data) = 0;

python/pyarrow/_parquet.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -556,7 +556,7 @@ cdef extern from "parquet/arrow/writer.h" namespace "parquet::arrow" nogil:
556556
const shared_ptr[ArrowWriterProperties]& arrow_properties)
557557

558558
CStatus WriteTable(const CTable& table, int64_t chunk_size)
559-
CStatus NewRowGroup(int64_t chunk_size)
559+
CStatus NewRowGroup()
560560
CStatus Close()
561561
CStatus AddKeyValueMetadata(const shared_ptr[const CKeyValueMetadata]& key_value_metadata)
562562

0 commit comments

Comments
 (0)