1- mod optimizer ;
1+ mod analyzer ;
22mod plan_node;
33pub mod schema_cast;
44#[ cfg( feature = "sql" ) ]
@@ -13,58 +13,73 @@ use std::{
1313
1414use datafusion:: {
1515 execution:: session_state:: { SessionState , SessionStateBuilder } ,
16- optimizer:: { optimizer:: Optimizer , OptimizerRule } ,
16+ logical_expr:: LogicalPlan ,
17+ optimizer:: {
18+ analyzer:: {
19+ resolve_grouping_function:: ResolveGroupingFunction , type_coercion:: TypeCoercion ,
20+ } ,
21+ Analyzer , AnalyzerRule ,
22+ } ,
1723} ;
1824
19- pub use optimizer :: { get_table_source, FederationOptimizerRule } ;
25+ pub use analyzer :: { get_table_source, FederationAnalyzerRule } ;
2026pub use plan_node:: {
2127 FederatedPlanNode , FederatedPlanner , FederatedQueryPlanner , FederationPlanner ,
2228} ;
2329pub use table_provider:: { FederatedTableProviderAdaptor , FederatedTableSource } ;
2430
2531pub fn default_session_state ( ) -> SessionState {
26- let rules = default_optimizer_rules ( ) ;
32+ let rules = default_analyzer_rules ( ) ;
2733 SessionStateBuilder :: new ( )
28- . with_optimizer_rules ( rules)
34+ . with_analyzer_rules ( rules)
2935 . with_query_planner ( Arc :: new ( FederatedQueryPlanner :: new ( ) ) )
3036 . with_default_features ( )
3137 . build ( )
3238}
3339
34- pub fn default_optimizer_rules ( ) -> Vec < Arc < dyn OptimizerRule + Send + Sync > > {
35- // Get the default optimizer
36- let df_default = Optimizer :: new ( ) ;
37- let mut default_rules = df_default. rules ;
38-
39- // Insert the FederationOptimizerRule after the ScalarSubqueryToJoin.
40- // This ensures ScalarSubquery are replaced before we try to federate.
41- let Some ( pos) = default_rules
42- . iter ( )
43- . position ( |x| x. name ( ) == "scalar_subquery_to_join" )
44- else {
45- panic ! ( "Could not locate ScalarSubqueryToJoin" ) ;
46- } ;
47-
48- // TODO: check if we should allow other optimizers to run before the federation rule.
40+ /// datafusion-federation customizes the order of the analyzer rules, since some of them are only relevant when `DataFusion` is executing the query,
41+ /// as opposed to when underlying federated query engines will execute the query.
42+ ///
43+ /// This list should be kept in sync with the default rules in `Analyzer::new()`, but with the federation analyzer rule added.
44+ pub fn default_analyzer_rules ( ) -> Vec < Arc < dyn AnalyzerRule + Send + Sync > > {
45+ vec ! [
46+ Arc :: new( FederationAnalyzerRule :: new( ) ) ,
47+ // The rest of these rules are run after the federation analyzer since they only affect internal DataFusion execution.
48+ Arc :: new( ResolveGroupingFunction :: new( ) ) ,
49+ Arc :: new( TypeCoercion :: new( ) ) ,
50+ ]
51+ }
4952
50- let federation_rule = Arc :: new ( FederationOptimizerRule :: new ( ) ) ;
51- default_rules. insert ( pos + 1 , federation_rule) ;
53+ pub type FederationProviderRef = Arc < dyn FederationProvider > ;
5254
53- default_rules
55+ impl From < Arc < Analyzer > > for FederationAnalyzerForLogicalPlan {
56+ fn from ( value : Arc < Analyzer > ) -> Self {
57+ Self :: With ( value)
58+ }
5459}
5560
56- pub type FederationProviderRef = Arc < dyn FederationProvider > ;
57- pub trait FederationProvider : Send + Sync {
61+ pub trait FederationProvider : Send + Sync + std:: fmt:: Debug {
5862 // Returns the name of the provider, used for comparison.
5963 fn name ( & self ) -> & str ;
6064
6165 // Returns the compute context in which this federation provider
6266 // will execute a query. For example: database instance & catalog.
6367 fn compute_context ( & self ) -> Option < String > ;
6468
65- // Returns an optimizer that can cut out part of the plan
66- // to federate it.
67- fn optimizer ( & self ) -> Option < Arc < Optimizer > > ;
69+ // Returns an analyzer that can cut out, and federate part of the [`LogicalPlan`].
70+ //
71+ // Returns:
72+ // - [`None`] if the provider cannot federate any plan (e.g. [`crate::analyzer::NopFederationProvider`]).
73+ // - Some(FederationAnalyzerForLogicalPlan::Unable) if the provider cannot federate this specific [`LogicalPlan`].
74+ fn analyzer ( & self , plan : & LogicalPlan ) -> Option < FederationAnalyzerForLogicalPlan > ;
75+ }
76+
77+ /// [`LogicalPlan`] specific federation [`Analyzer`] from a [`FederationProvider`].
78+ #[ derive( Debug ) ]
79+ pub enum FederationAnalyzerForLogicalPlan {
80+ /// The [`FederationProvider`] cannot federate the [`LogicalPlan`].
81+ Unable ,
82+ With ( Arc < Analyzer > ) ,
6883}
6984
7085impl fmt:: Display for dyn FederationProvider {
@@ -88,3 +103,29 @@ impl Hash for dyn FederationProvider {
88103}
89104
90105impl Eq for dyn FederationProvider { }
106+
107+ #[ cfg( test) ]
108+ mod tests {
109+ use datafusion:: optimizer:: Analyzer ;
110+
111+ /// Verifies that the default analyzer rules are in the expected order.
112+ ///
113+ /// If this test fails, `DataFusion` has modified the default analyzer rules and `get_analyzer_rules()` should be updated.
114+ #[ test]
115+ fn test_verify_default_analyzer_rules ( ) {
116+ let default_rules = Analyzer :: new ( ) . rules ;
117+ assert_eq ! (
118+ default_rules. len( ) ,
119+ 2 ,
120+ "Default analyzer rules have changed"
121+ ) ;
122+ let expected_rule_names = vec ! [ "resolve_grouping_function" , "type_coercion" ] ;
123+ for ( rule, expected_name) in default_rules. iter ( ) . zip ( expected_rule_names. into_iter ( ) ) {
124+ assert_eq ! (
125+ expected_name,
126+ rule. name( ) ,
127+ "Default analyzer rule order has changed"
128+ ) ;
129+ }
130+ }
131+ }
0 commit comments