|
33 | 33 | #include "paimon/core/io/data_file_path_factory.h" |
34 | 34 | #include "paimon/core/io/data_file_writer.h" |
35 | 35 | #include "paimon/core/io/data_increment.h" |
| 36 | +#include "paimon/core/io/multiple_blob_file_writer.h" |
36 | 37 | #include "paimon/core/io/rolling_blob_file_writer.h" |
37 | 38 | #include "paimon/core/io/rolling_file_writer.h" |
38 | 39 | #include "paimon/core/io/single_file_writer.h" |
@@ -212,35 +213,43 @@ AppendOnlyWriter::SingleFileWriterCreator AppendOnlyWriter::GetBlobFileWriterCre |
212 | 213 |
|
213 | 214 | AppendOnlyWriter::RollingFileWriterResult AppendOnlyWriter::CreateRollingBlobWriter( |
214 | 215 | const BlobUtils::SeparatedSchemas& schemas) const { |
215 | | - if (schemas.blob_schema->num_fields() > RollingBlobFileWriter::EXPECTED_BLOB_FIELD_COUNT) { |
216 | | - return Status::Invalid("Limit exactly one blob field in one paimon table yet."); |
217 | | - } |
218 | | - // use a specialized writer that writes blob data to a separate rolling file. |
219 | | - ::ArrowSchema arrow_schema; |
220 | | - ScopeGuard guard([&arrow_schema]() { ArrowSchemaRelease(&arrow_schema); }); |
221 | | - PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportSchema(*schemas.blob_schema, &arrow_schema)); |
222 | | - PAIMON_ASSIGN_OR_RAISE(std::unique_ptr<FileFormat> format, |
223 | | - FileFormatFactory::Get("blob", options_.ToMap())); |
224 | | - PAIMON_ASSIGN_OR_RAISE( |
225 | | - std::shared_ptr<WriterBuilder> writer_builder, |
226 | | - format->CreateWriterBuilder(&arrow_schema, options_.GetWriteBatchSize())); |
227 | | - writer_builder->WithMemoryPool(memory_pool_); |
228 | | - PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportSchema(*schemas.blob_schema, &arrow_schema)); |
229 | | - PAIMON_ASSIGN_OR_RAISE(std::shared_ptr<FormatStatsExtractor> stats_extractor, |
230 | | - format->CreateStatsExtractor(&arrow_schema)); |
231 | | - |
232 | | - auto single_blob_file_writer_creator = GetBlobFileWriterCreator( |
233 | | - writer_builder, stats_extractor, schemas.blob_schema->field_names()); |
234 | | - auto rolling_blob_file_writer_creator = [this, single_blob_file_writer_creator]() |
| 216 | + // Multiple blob fields are supported. Each blob field gets its own rolling file writer |
| 217 | + // via MultipleBlobFileWriter. |
| 218 | + auto blob_schema = schemas.blob_schema; |
| 219 | + auto blob_writer_creator = [this, blob_schema](const std::string& blob_field_name) |
235 | 220 | -> Result< |
236 | 221 | std::unique_ptr<RollingFileWriter<::ArrowArray*, std::shared_ptr<DataFileMeta>>>> { |
| 222 | + // Create a single-field schema for this blob field |
| 223 | + auto field = blob_schema->GetFieldByName(blob_field_name); |
| 224 | + if (!field) { |
| 225 | + return Status::Invalid( |
| 226 | + fmt::format("Blob field '{}' not found in blob schema", blob_field_name)); |
| 227 | + } |
| 228 | + auto single_field_schema = arrow::schema({field}); |
| 229 | + ::ArrowSchema arrow_schema; |
| 230 | + ScopeGuard guard([&arrow_schema]() { ArrowSchemaRelease(&arrow_schema); }); |
| 231 | + PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportSchema(*single_field_schema, &arrow_schema)); |
| 232 | + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr<FileFormat> format, |
| 233 | + FileFormatFactory::Get("blob", options_.ToMap())); |
| 234 | + PAIMON_ASSIGN_OR_RAISE( |
| 235 | + std::shared_ptr<WriterBuilder> writer_builder, |
| 236 | + format->CreateWriterBuilder(&arrow_schema, options_.GetWriteBatchSize())); |
| 237 | + writer_builder->WithMemoryPool(memory_pool_); |
| 238 | + PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportSchema(*single_field_schema, &arrow_schema)); |
| 239 | + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr<FormatStatsExtractor> stats_extractor, |
| 240 | + format->CreateStatsExtractor(&arrow_schema)); |
| 241 | + |
| 242 | + std::vector<std::string> write_cols = {blob_field_name}; |
| 243 | + auto single_blob_file_writer_creator = |
| 244 | + GetBlobFileWriterCreator(writer_builder, stats_extractor, write_cols); |
237 | 245 | return std::make_unique<RollingFileWriter<::ArrowArray*, std::shared_ptr<DataFileMeta>>>( |
238 | 246 | options_.GetBlobTargetFileSize(), single_blob_file_writer_creator); |
239 | 247 | }; |
| 248 | + |
240 | 249 | return std::make_unique<RollingBlobFileWriter>( |
241 | 250 | options_.GetTargetFileSize(/*has_primary_key=*/false), |
242 | 251 | GetDataFileWriterCreator(schemas.main_schema, schemas.main_schema->field_names()), |
243 | | - rolling_blob_file_writer_creator, arrow::struct_(write_schema_->fields())); |
| 252 | + blob_schema, blob_writer_creator, arrow::struct_(write_schema_->fields())); |
244 | 253 | } |
245 | 254 |
|
246 | 255 | Status AppendOnlyWriter::Sync() { |
|
0 commit comments