@@ -29,10 +29,9 @@ use crate::common::table::state_table::ReplicatedStateTable;
29
29
#[ cfg( debug_assertions) ]
30
30
use crate :: executor:: backfill:: utils:: METADATA_STATE_LEN ;
31
31
use crate :: executor:: backfill:: utils:: {
32
- compute_bounds, create_builder, create_limiter, create_limiter_with_state,
33
- get_progress_per_vnode, mapping_chunk, mapping_message, mark_chunk_ref_by_vnode,
34
- owned_row_iter, persist_state_per_vnode, update_pos_by_vnode, BackfillProgressPerVnode ,
35
- BackfillRateLimiter , BackfillState ,
32
+ compute_bounds, create_builder, create_limiter, get_progress_per_vnode, mapping_chunk,
33
+ mapping_message, mark_chunk_ref_by_vnode, owned_row_iter, persist_state_per_vnode,
34
+ update_pos_by_vnode, BackfillProgressPerVnode , BackfillRateLimiter , BackfillState ,
36
35
} ;
37
36
use crate :: executor:: prelude:: * ;
38
37
use crate :: task:: CreateMviewProgress ;
@@ -113,6 +112,7 @@ where
113
112
let mut upstream_table = self . upstream_table ;
114
113
let vnodes = upstream_table. vnodes ( ) . clone ( ) ;
115
114
let rate_limit = self . rate_limit ;
115
+ self . chunk_size = 1 ;
116
116
117
117
// These builders will build data chunks.
118
118
// We must supply them with the full datatypes which correspond to
@@ -139,14 +139,15 @@ where
139
139
// Query the current barrier latency from meta.
140
140
// Permit a 2x fluctuation in barrier latency. Set threshold to 15s.
141
141
let mut total_barrier_latency = Self :: get_total_barrier_latency ( & self . metrics ) ;
142
- let current_barrier_latency = Self :: get_barrier_latency ( & self . metrics ) ;
142
+ let mut highest_barrier_latency = Self :: get_barrier_latency ( & self . metrics ) ;
143
143
let threshold_barrier_latency = {
144
- if current_barrier_latency < 5 .0 {
145
- 15 .0
144
+ if highest_barrier_latency <= 10 .0 {
145
+ 20 .0
146
146
} else {
147
- current_barrier_latency * 2.0
147
+ highest_barrier_latency * 2.0
148
148
}
149
149
} ;
150
+ tracing:: debug!( target: "adaptive_rate_limit" , highest_barrier_latency, threshold_barrier_latency, "initial configs" ) ;
150
151
let adaptive_rate_limit = true ;
151
152
let mut rate_limit = Some ( INITIAL_ADAPTIVE_RATE_LIMIT ) ;
152
153
@@ -556,15 +557,14 @@ where
556
557
557
558
// Adapt Rate Limit
558
559
if adaptive_rate_limit {
559
- let rate_limiter_curr = rate_limiter. take ( ) ;
560
- rate_limiter = Self :: adapt_rate_limit (
560
+ Self :: adapt_rate_limit_3 (
561
561
& self . actor_id ,
562
562
& self . metrics ,
563
563
threshold_barrier_latency,
564
- current_barrier_latency ,
564
+ & mut highest_barrier_latency ,
565
565
& mut total_barrier_latency,
566
566
& mut rate_limit,
567
- rate_limiter_curr ,
567
+ & mut rate_limiter ,
568
568
)
569
569
}
570
570
@@ -638,60 +638,137 @@ where
638
638
}
639
639
}
640
640
641
- fn adapt_rate_limit (
641
+ // 2x rate limit indefinitely. Backpressure will kick in and slowdown the ingestion rate.
642
+ fn adapt_rate_limit_3 (
642
643
actor_id : & ActorId ,
643
644
metrics : & StreamingMetrics ,
644
645
threshold_barrier_latency : f64 ,
645
- current_barrier_latency : f64 ,
646
+ highest_barrier_latency : & mut f64 ,
646
647
total_barrier_latency : & mut f64 ,
647
648
rate_limit : & mut Option < usize > ,
648
- rate_limiter : Option < BackfillRateLimiter > ,
649
- ) -> Option < BackfillRateLimiter > {
649
+ rate_limiter : & mut Option < BackfillRateLimiter > ,
650
+ ) {
651
+ if let Some ( rate_limit_setting) = rate_limit {
652
+ * rate_limit_setting *= 2 ;
653
+ * rate_limiter = create_limiter ( * rate_limit_setting)
654
+ }
655
+ }
656
+
657
+ fn adapt_rate_limit_2 (
658
+ actor_id : & ActorId ,
659
+ metrics : & StreamingMetrics ,
660
+ threshold_barrier_latency : f64 ,
661
+ highest_barrier_latency : & mut f64 ,
662
+ total_barrier_latency : & mut f64 ,
663
+ rate_limit : & mut Option < usize > ,
664
+ rate_limiter : & mut Option < BackfillRateLimiter > ,
665
+ ) {
650
666
let new_total_barrier_latency = Self :: get_total_barrier_latency ( metrics) ;
651
667
let new_barrier_latency = new_total_barrier_latency - * total_barrier_latency;
652
- * total_barrier_latency = new_total_barrier_latency;
653
- let new_rate_limit = if new_barrier_latency == 0.0 {
668
+ * highest_barrier_latency = f64:: max ( new_barrier_latency, * highest_barrier_latency) ;
669
+ tracing:: debug!(
670
+ target: "adaptive_rate_limit" ,
671
+ new_barrier_latency,
672
+ ) ;
673
+ let new_rate_limit = if * highest_barrier_latency > 2_f64 * threshold_barrier_latency {
674
+ Some ( INITIAL_ADAPTIVE_RATE_LIMIT )
675
+ } else if * highest_barrier_latency > threshold_barrier_latency
676
+ && let Some ( rate_limit_set) = rate_limit
677
+ {
654
678
tracing:: debug!(
655
679
target: "adaptive_rate_limit" ,
656
- ?rate_limit,
657
- "waiting for barrier latency"
680
+ "barrier latency keep constant"
658
681
) ;
659
682
* rate_limit
660
- // do nothing
661
- } else if new_barrier_latency > threshold_barrier_latency {
683
+ } else if new_total_barrier_latency > * total_barrier_latency
684
+ && let Some ( rate_limit_set) = rate_limit
685
+ {
686
+ let scaling_factor = 1.1_f64 ;
687
+ let scaled_rate_limit = ( * rate_limit_set as f64 ) * scaling_factor;
688
+ let new_rate_limit = scaled_rate_limit. ceil ( ) as usize ;
689
+ Some ( new_rate_limit)
690
+ } else {
691
+ * rate_limit
692
+ } ;
693
+ * total_barrier_latency = new_total_barrier_latency;
694
+ * highest_barrier_latency = new_barrier_latency;
695
+ if * rate_limit != new_rate_limit
696
+ && let Some ( rate_limit_setting) = new_rate_limit
697
+ {
698
+ * rate_limit = new_rate_limit;
699
+ tracing:: trace!(
700
+ target: "adaptive_rate_limit" ,
701
+ actor_id,
702
+ ?rate_limit,
703
+ "adjusted rate limit"
704
+ ) ;
705
+ * rate_limiter = create_limiter ( rate_limit_setting)
706
+ }
707
+ }
708
+
709
+ fn adapt_rate_limit (
710
+ actor_id : & ActorId ,
711
+ metrics : & StreamingMetrics ,
712
+ threshold_barrier_latency : f64 ,
713
+ highest_barrier_latency : & mut f64 ,
714
+ total_barrier_latency : & mut f64 ,
715
+ rate_limit : & mut Option < usize > ,
716
+ rate_limiter : & mut Option < BackfillRateLimiter > ,
717
+ ) {
718
+ let new_total_barrier_latency = Self :: get_total_barrier_latency ( metrics) ;
719
+ // let new_barrier_latency = new_total_barrier_latency - *total_barrier_latency;
720
+ let new_barrier_latency = Self :: get_barrier_latency ( metrics) ;
721
+ let new_rate_limit = if new_barrier_latency > 2_f64 * threshold_barrier_latency {
662
722
tracing:: debug!(
663
723
target: "adaptive_rate_limit" ,
664
724
new_barrier_latency,
665
- "barrier latency exceeds threshold, reset to initial rate limit"
725
+ "barrier latency exceeds threshold * 2 , reset to initial rate limit"
666
726
) ;
667
727
Some ( INITIAL_ADAPTIVE_RATE_LIMIT )
668
- } else if let Some ( rate_limit_set) = rate_limit {
728
+ } else if new_barrier_latency > threshold_barrier_latency
729
+ && let Some ( rate_limit_set) = rate_limit
730
+ {
731
+ tracing:: debug!(
732
+ target: "adaptive_rate_limit" ,
733
+ new_barrier_latency,
734
+ "barrier latency exceeds threshold, exponential decrease"
735
+ ) ;
736
+ Some ( usize:: max ( 1 , * rate_limit_set / 2 ) )
737
+ } else if new_total_barrier_latency > * total_barrier_latency
738
+ && let Some ( rate_limit_set) = rate_limit
739
+ {
669
740
// We use the following inputs to determine the scaling factor:
670
741
// 1. The barrier latency "left" before we reach the threshold.
671
742
// If we have a lot left, we can scale more aggressively.
672
743
// 2. The change in barrier latency.
673
744
// If the barrier latency increases significantly, we should scale less.
674
745
// That being said, we should not let it be 0 as well, if we still have threshold to scale.
675
746
// So we just let it be a lower number, like 0.1.
676
- let barrier_latency_surplus_ratio =
677
- ( threshold_barrier_latency - new_barrier_latency ) / threshold_barrier_latency ;
747
+ let barrier_latency_surplus_ratio = ( threshold_barrier_latency - new_barrier_latency )
748
+ / ( threshold_barrier_latency * 1.3 ) ;
678
749
let barrier_latency_diff_ratio = ( 1_f64
679
- - ( new_barrier_latency - current_barrier_latency ) / current_barrier_latency )
750
+ - ( new_barrier_latency - * highest_barrier_latency ) / * highest_barrier_latency )
680
751
. clamp ( 0.1_f64 , 1_f64 ) ;
681
752
let scaling_factor = 1_f64 + barrier_latency_surplus_ratio * barrier_latency_diff_ratio;
682
753
let scaled_rate_limit = ( * rate_limit_set as f64 ) * scaling_factor;
683
- let new_rate_limit = f64:: min ( 1_f64 , scaled_rate_limit) . round ( ) as usize ;
754
+ let new_rate_limit = f64:: max ( 1_f64 , scaled_rate_limit) . round ( ) as usize ;
684
755
tracing:: debug!(
685
756
target: "adaptive_rate_limit" ,
686
757
new_rate_limit,
758
+ barrier_latency_surplus_ratio,
759
+ barrier_latency_diff_ratio,
687
760
scaling_factor,
688
761
"scaling rate limit"
689
762
) ;
690
763
Some ( new_rate_limit)
691
764
} else {
692
765
* rate_limit
693
766
} ;
694
- if * rate_limit != new_rate_limit {
767
+ * total_barrier_latency = new_total_barrier_latency;
768
+ * highest_barrier_latency = new_barrier_latency;
769
+ if * rate_limit != new_rate_limit
770
+ && let Some ( rate_limit_setting) = new_rate_limit
771
+ {
695
772
* rate_limit = new_rate_limit;
696
773
tracing:: trace!(
697
774
target: "adaptive_rate_limit" ,
@@ -700,14 +777,7 @@ where
700
777
?rate_limit,
701
778
"adjusted rate limit"
702
779
) ;
703
- }
704
- if let Some ( rate_limit) = rate_limit
705
- && let Some ( rate_limiter) = rate_limiter
706
- {
707
- let store = rate_limiter. into_state_store ( ) ;
708
- create_limiter_with_state ( * rate_limit, store)
709
- } else {
710
- None
780
+ * rate_limiter = create_limiter ( rate_limit_setting)
711
781
}
712
782
}
713
783
0 commit comments