@@ -196,13 +196,15 @@ def _split_data(self, data: pa.RecordBatch) -> Tuple[pa.RecordBatch, pa.RecordBa
196196
197197 return normal_data , blob_data
198198
199- def _process_normal_data (self , data : pa .RecordBatch ) -> pa .Table :
199+ @staticmethod
200+ def _process_normal_data (data : pa .RecordBatch ) -> pa .Table :
200201 """Process normal data (similar to base DataWriter)."""
201202 if data is None or data .num_rows == 0 :
202203 return pa .Table .from_batches ([])
203204 return pa .Table .from_batches ([data ])
204205
205- def _merge_normal_data (self , existing_data : pa .Table , new_data : pa .Table ) -> pa .Table :
206+ @staticmethod
207+ def _merge_normal_data (existing_data : pa .Table , new_data : pa .Table ) -> pa .Table :
206208 return pa .concat_tables ([existing_data , new_data ])
207209
208210 def _should_roll_normal (self ) -> bool :
@@ -243,7 +245,7 @@ def _close_current_writers(self):
243245 logger .info (f"Closed both writers - normal: { normal_meta .file_name } , "
244246 f"added { len (blob_metas )} blob file metadata after normal metadata" )
245247
246- def _write_normal_data_to_file (self , data : pa .Table ) -> DataFileMeta :
248+ def _write_normal_data_to_file (self , data : pa .Table ) -> Optional [ DataFileMeta ] :
247249 if data .num_rows == 0 :
248250 return None
249251
@@ -270,37 +272,15 @@ def _write_normal_data_to_file(self, data: pa.Table) -> DataFileMeta:
270272
271273 def _create_data_file_meta (self , file_name : str , file_path : str , data : pa .Table ,
272274 external_path : Optional [str ] = None ) -> DataFileMeta :
273- # Column stats (only for normal columns)
274- column_stats = {
275- field .name : self ._get_column_stats (data , field .name )
276- for field in self .table .table_schema .fields
277- if field .name != self .blob_column_name
278- }
279-
280- # Get normal fields only
281- normal_fields = [field for field in self .table .table_schema .fields
282- if field .name != self .blob_column_name ]
283-
284- min_value_stats = [column_stats [field .name ]['min_values' ] for field in normal_fields ]
285- max_value_stats = [column_stats [field .name ]['max_values' ] for field in normal_fields ]
286- value_null_counts = [column_stats [field .name ]['null_counts' ] for field in normal_fields ]
287-
288275 self .sequence_generator .start = self .sequence_generator .current
289-
290276 return DataFileMeta .create (
291277 file_name = file_name ,
292278 file_size = self .file_io .get_file_size (file_path ),
293279 row_count = data .num_rows ,
294280 min_key = GenericRow ([], []),
295281 max_key = GenericRow ([], []),
296- key_stats = SimpleStats (
297- GenericRow ([], []),
298- GenericRow ([], []),
299- []),
300- value_stats = SimpleStats (
301- GenericRow (min_value_stats , normal_fields ),
302- GenericRow (max_value_stats , normal_fields ),
303- value_null_counts ),
282+ key_stats = SimpleStats .empty_stats (),
283+ value_stats = SimpleStats .empty_stats (),
304284 min_sequence_number = - 1 ,
305285 max_sequence_number = - 1 ,
306286 schema_id = self .table .table_schema .id ,
@@ -309,7 +289,7 @@ def _create_data_file_meta(self, file_name: str, file_path: str, data: pa.Table,
309289 creation_time = Timestamp .now (),
310290 delete_row_count = 0 ,
311291 file_source = 0 ,
312- value_stats_cols = self . normal_column_names ,
292+ value_stats_cols = [] ,
313293 external_path = external_path ,
314294 file_path = file_path ,
315295 write_cols = self .write_cols )
0 commit comments