26
26
pub mod configs;
27
27
pub ( crate ) mod execution;
28
28
pub ( crate ) mod generated_columns;
29
- pub mod lazy ;
29
+ pub ( crate ) mod metrics ;
30
30
pub ( crate ) mod schema_evolution;
31
31
32
32
use arrow_schema:: Schema ;
33
33
pub use configs:: WriterStatsConfig ;
34
34
use datafusion:: execution:: SessionStateBuilder ;
35
35
use generated_columns:: { add_generated_columns, add_missing_generated_columns} ;
36
+ use metrics:: { WriteMetricExtensionPlanner , SOURCE_COUNT_ID , SOURCE_COUNT_METRIC } ;
36
37
use std:: collections:: HashMap ;
37
38
use std:: str:: FromStr ;
38
39
use std:: sync:: Arc ;
@@ -45,7 +46,7 @@ use datafusion::datasource::MemTable;
45
46
use datafusion:: execution:: context:: { SessionContext , SessionState } ;
46
47
use datafusion:: prelude:: DataFrame ;
47
48
use datafusion_common:: { Column , DFSchema , Result , ScalarValue } ;
48
- use datafusion_expr:: { cast, lit, Expr , LogicalPlan } ;
49
+ use datafusion_expr:: { cast, lit, try_cast , Expr , Extension , LogicalPlan } ;
49
50
use execution:: { prepare_predicate_actions, write_execution_plan_with_predicate} ;
50
51
use futures:: future:: BoxFuture ;
51
52
use parquet:: file:: properties:: WriterProperties ;
@@ -58,6 +59,9 @@ use super::transaction::{CommitBuilder, CommitProperties, TableReference, PROTOC
58
59
use super :: { CreateBuilder , CustomExecuteHandler , Operation } ;
59
60
use crate :: delta_datafusion:: expr:: fmt_expr_to_sql;
60
61
use crate :: delta_datafusion:: expr:: parse_predicate_expression;
62
+ use crate :: delta_datafusion:: logical:: MetricObserver ;
63
+ use crate :: delta_datafusion:: physical:: { find_metric_node, get_metric} ;
64
+ use crate :: delta_datafusion:: planner:: DeltaPlanner ;
61
65
use crate :: delta_datafusion:: register_store;
62
66
use crate :: delta_datafusion:: DataFusionMixins ;
63
67
use crate :: errors:: { DeltaResult , DeltaTableError } ;
@@ -418,16 +422,25 @@ impl std::future::IntoFuture for WriteBuilder {
418
422
let mut metrics = WriteMetrics :: default ( ) ;
419
423
let exec_start = Instant :: now ( ) ;
420
424
425
+ let write_planner = DeltaPlanner :: < WriteMetricExtensionPlanner > {
426
+ extension_planner : WriteMetricExtensionPlanner { } ,
427
+ } ;
428
+
421
429
// Create table actions to initialize table in case it does not yet exist
422
430
// and should be created
423
431
let mut actions = this. check_preconditions ( ) . await ?;
424
432
425
433
let partition_columns = this. get_partition_columns ( ) ?;
426
434
427
435
let state = match this. state {
428
- Some ( state) => state,
436
+ Some ( state) => SessionStateBuilder :: new_from_existing ( state. clone ( ) )
437
+ . with_query_planner ( Arc :: new ( write_planner) )
438
+ . build ( ) ,
429
439
None => {
430
- let state = SessionStateBuilder :: new ( ) . with_default_features ( ) . build ( ) ;
440
+ let state = SessionStateBuilder :: new ( )
441
+ . with_default_features ( )
442
+ . with_query_planner ( Arc :: new ( write_planner) )
443
+ . build ( ) ;
431
444
register_store ( this. log_store . clone ( ) , state. runtime_env ( ) . clone ( ) ) ;
432
445
state
433
446
}
@@ -491,7 +504,8 @@ impl std::future::IntoFuture for WriteBuilder {
491
504
for field in new_schema. fields ( ) {
492
505
// If field exist in source data, we cast to new datatype
493
506
if source_schema. index_of ( field. name ( ) ) . is_ok ( ) {
494
- let cast_expr = cast (
507
+ let cast_fn = if this. safe_cast { try_cast } else { cast } ;
508
+ let cast_expr = cast_fn (
495
509
Expr :: Column ( Column :: from_name ( field. name ( ) ) ) ,
496
510
// col(field.name()),
497
511
field. data_type ( ) . clone ( ) ,
@@ -520,6 +534,16 @@ impl std::future::IntoFuture for WriteBuilder {
520
534
& state,
521
535
) ?;
522
536
537
+ let source = LogicalPlan :: Extension ( Extension {
538
+ node : Arc :: new ( MetricObserver {
539
+ id : "write_source_count" . into ( ) ,
540
+ input : source. logical_plan ( ) . clone ( ) ,
541
+ enable_pushdown : false ,
542
+ } ) ,
543
+ } ) ;
544
+
545
+ let source = DataFrame :: new ( state. clone ( ) , source) ;
546
+
523
547
let schema = Arc :: new ( source. schema ( ) . as_arrow ( ) . clone ( ) ) ;
524
548
525
549
// Maybe create schema action
@@ -576,21 +600,31 @@ impl std::future::IntoFuture for WriteBuilder {
576
600
stats_columns,
577
601
} ;
578
602
603
+ let source_plan = source. clone ( ) . create_physical_plan ( ) . await ?;
604
+
579
605
// Here we need to validate if the new data conforms to a predicate if one is provided
580
606
let add_actions = write_execution_plan_with_predicate (
581
607
predicate. clone ( ) ,
582
608
this. snapshot . as_ref ( ) ,
583
609
state. clone ( ) ,
584
- source . clone ( ) . create_physical_plan ( ) . await ? ,
610
+ source_plan . clone ( ) ,
585
611
partition_columns. clone ( ) ,
586
612
this. log_store . object_store ( Some ( operation_id) ) . clone ( ) ,
587
613
target_file_size,
588
614
this. write_batch_size ,
589
615
this. writer_properties . clone ( ) ,
590
616
writer_stats_config. clone ( ) ,
591
- None ,
592
617
)
593
618
. await ?;
619
+
620
+ let source_count =
621
+ find_metric_node ( SOURCE_COUNT_ID , & source_plan) . ok_or_else ( || {
622
+ DeltaTableError :: Generic ( "Unable to locate expected metric node" . into ( ) )
623
+ } ) ?;
624
+ let source_count_metrics = source_count. metrics ( ) . unwrap ( ) ;
625
+ let num_added_rows = get_metric ( & source_count_metrics, SOURCE_COUNT_METRIC ) ;
626
+ metrics. num_added_rows = num_added_rows;
627
+
594
628
metrics. num_added_files = add_actions. len ( ) ;
595
629
actions. extend ( add_actions) ;
596
630
@@ -989,7 +1023,6 @@ mod tests {
989
1023
assert_eq ! ( table. version( ) , 0 ) ;
990
1024
assert_eq ! ( table. get_files_count( ) , 2 ) ;
991
1025
let write_metrics: WriteMetrics = get_write_metrics ( table. clone ( ) ) . await ;
992
- assert ! ( write_metrics. num_partitions > 0 ) ;
993
1026
assert_eq ! ( write_metrics. num_added_files, 2 ) ;
994
1027
assert_common_write_metrics ( write_metrics) ;
995
1028
@@ -1003,7 +1036,6 @@ mod tests {
1003
1036
assert_eq ! ( table. get_files_count( ) , 4 ) ;
1004
1037
1005
1038
let write_metrics: WriteMetrics = get_write_metrics ( table. clone ( ) ) . await ;
1006
- assert ! ( write_metrics. num_partitions > 0 ) ;
1007
1039
assert_eq ! ( write_metrics. num_added_files, 4 ) ;
1008
1040
assert_common_write_metrics ( write_metrics) ;
1009
1041
}
@@ -1093,7 +1125,6 @@ mod tests {
1093
1125
assert_eq ! ( table. version( ) , 0 ) ;
1094
1126
1095
1127
let write_metrics: WriteMetrics = get_write_metrics ( table. clone ( ) ) . await ;
1096
- assert ! ( write_metrics. num_partitions > 0 ) ;
1097
1128
assert_common_write_metrics ( write_metrics) ;
1098
1129
1099
1130
let mut new_schema_builder = arrow_schema:: SchemaBuilder :: new ( ) ;
@@ -1146,7 +1177,6 @@ mod tests {
1146
1177
assert_eq ! ( part_cols, vec![ "id" , "value" ] ) ; // we want to preserve partitions
1147
1178
1148
1179
let write_metrics: WriteMetrics = get_write_metrics ( table. clone ( ) ) . await ;
1149
- assert ! ( write_metrics. num_partitions > 0 ) ;
1150
1180
assert_common_write_metrics ( write_metrics) ;
1151
1181
}
1152
1182
@@ -1668,7 +1698,6 @@ mod tests {
1668
1698
assert_eq ! ( table. version( ) , 1 ) ;
1669
1699
let write_metrics: WriteMetrics = get_write_metrics ( table. clone ( ) ) . await ;
1670
1700
assert_eq ! ( write_metrics. num_added_rows, 3 ) ;
1671
- assert ! ( write_metrics. num_partitions > 0 ) ;
1672
1701
assert_common_write_metrics ( write_metrics) ;
1673
1702
1674
1703
let table = DeltaOps ( table)
@@ -1680,7 +1709,6 @@ mod tests {
1680
1709
assert_eq ! ( table. version( ) , 2 ) ;
1681
1710
let write_metrics: WriteMetrics = get_write_metrics ( table. clone ( ) ) . await ;
1682
1711
assert_eq ! ( write_metrics. num_added_rows, 1 ) ;
1683
- assert ! ( write_metrics. num_partitions > 0 ) ;
1684
1712
assert ! ( write_metrics. num_removed_files > 0 ) ;
1685
1713
assert_common_write_metrics ( write_metrics) ;
1686
1714
0 commit comments