diff --git a/datafusion/optimizer/src/eliminate_sort.rs b/datafusion/optimizer/src/eliminate_sort.rs new file mode 100644 index 000000000000..6ba2ada40a7e --- /dev/null +++ b/datafusion/optimizer/src/eliminate_sort.rs @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_common::tree_node::{Transformed, TreeNode}; +use datafusion_common::Result; +use datafusion_expr::LogicalPlan; + +use crate::{ApplyOrder, OptimizerConfig, OptimizerRule}; + +/// An optimizer rule that eliminates unnecessary Sort operators in subqueries. +#[derive(Default, Debug)] +pub struct EliminateSort {} + +impl EliminateSort { + #[allow(missing_docs)] + pub fn new() -> Self { + Self {} + } +} + +impl OptimizerRule for EliminateSort { + fn name(&self) -> &str { + "optimize_subquery_sort" + } + + fn apply_order(&self) -> Option { + None + } + + fn supports_rewrite(&self) -> bool { + true + } + + fn rewrite( + &self, + plan: LogicalPlan, + _config: &dyn OptimizerConfig, + ) -> Result> { + // When initializing subqueries, we examine sort options since they might be unnecessary. + // They are only important if the subquery result is affected by the ORDER BY statement, + // which can happen when we have: + // 1. DISTINCT ON / ARRAY_AGG ... => Handled by an `Aggregate` and its requirements. + // 2. RANK / ROW_NUMBER ... => Handled by a `WindowAggr` and its requirements. + // 3. LIMIT => Handled by a `Sort`, so we need to search for it. + let mut has_limit = false; + let new_plan = plan.transform_down(|c| { + if let LogicalPlan::Limit(_) = c { + has_limit = true; + return Ok(Transformed::no(c)); + } + match c { + LogicalPlan::Sort(s) => { + if !has_limit { + has_limit = false; + return Ok(Transformed::yes(s.input.as_ref().clone())); + } + Ok(Transformed::no(LogicalPlan::Sort(s))) + } + _ => Ok(Transformed::no(c)), + } + }); + new_plan + } +} diff --git a/datafusion/optimizer/src/lib.rs b/datafusion/optimizer/src/lib.rs index 893cb249a2a8..11a7b5c485c0 100644 --- a/datafusion/optimizer/src/lib.rs +++ b/datafusion/optimizer/src/lib.rs @@ -50,6 +50,7 @@ pub mod eliminate_limit; pub mod eliminate_nested_union; pub mod eliminate_one_union; pub mod eliminate_outer_join; +pub mod eliminate_sort; pub mod extract_equijoin_predicate; pub mod filter_null_join_keys; pub mod optimize_projections; diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs index ffbb95cb7f74..2a6344e69693 100644 --- a/datafusion/optimizer/src/optimizer.rs +++ b/datafusion/optimizer/src/optimizer.rs @@ -43,6 +43,7 @@ use crate::eliminate_limit::EliminateLimit; use crate::eliminate_nested_union::EliminateNestedUnion; use crate::eliminate_one_union::EliminateOneUnion; use crate::eliminate_outer_join::EliminateOuterJoin; +use crate::eliminate_sort::EliminateSort; use crate::extract_equijoin_predicate::ExtractEquijoinPredicate; use crate::filter_null_join_keys::FilterNullJoinKeys; use crate::optimize_projections::OptimizeProjections; @@ -231,6 +232,7 @@ impl Optimizer { Arc::new(EliminateFilter::new()), Arc::new(EliminateCrossJoin::new()), Arc::new(EliminateLimit::new()), + Arc::new(EliminateSort::new()), Arc::new(PropagateEmptyRelation::new()), // Must be after PropagateEmptyRelation Arc::new(EliminateOneUnion::new()), diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs index dee855f8c000..1d4f19d671ea 100644 --- a/datafusion/sql/src/relation/mod.rs +++ b/datafusion/sql/src/relation/mod.rs @@ -19,7 +19,6 @@ use std::sync::Arc; use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; -use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::{ not_impl_err, plan_err, DFSchema, Diagnostic, Result, Span, Spans, TableReference, }; @@ -162,11 +161,10 @@ impl SqlToRel<'_, S> { } }; - let optimized_plan = optimize_subquery_sort(plan)?.data; if let Some(alias) = alias { - self.apply_table_alias(optimized_plan, alias) + self.apply_table_alias(plan, alias) } else { - Ok(optimized_plan) + Ok(plan) } } @@ -225,30 +223,3 @@ impl SqlToRel<'_, S> { } } } - -fn optimize_subquery_sort(plan: LogicalPlan) -> Result> { - // When initializing subqueries, we examine sort options since they might be unnecessary. - // They are only important if the subquery result is affected by the ORDER BY statement, - // which can happen when we have: - // 1. DISTINCT ON / ARRAY_AGG ... => Handled by an `Aggregate` and its requirements. - // 2. RANK / ROW_NUMBER ... => Handled by a `WindowAggr` and its requirements. - // 3. LIMIT => Handled by a `Sort`, so we need to search for it. - let mut has_limit = false; - let new_plan = plan.transform_down(|c| { - if let LogicalPlan::Limit(_) = c { - has_limit = true; - return Ok(Transformed::no(c)); - } - match c { - LogicalPlan::Sort(s) => { - if !has_limit { - has_limit = false; - return Ok(Transformed::yes(s.input.as_ref().clone())); - } - Ok(Transformed::no(LogicalPlan::Sort(s))) - } - _ => Ok(Transformed::no(c)), - } - }); - new_plan -}