Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: nested window function #15033

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
17 changes: 17 additions & 0 deletions datafusion/core/tests/sql/sql_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,23 @@ use datafusion::prelude::*;

use tempfile::TempDir;

#[tokio::test]
async fn test_window_function() {
let ctx = SessionContext::new();
let df = ctx
.sql(
r#"SELECT
t1.v1,
SUM(t1.v1) OVER w + 1
FROM
generate_series(1, 10000) AS t1(v1)
WINDOW
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should at least have a .slt test that shows this query running and producing the same result as postgres, perhaps with a smaller number of series:

postgres=# SELECT
  t1.v1,
  SUM(t1.v1) OVER w
FROM
  generate_series(1, 5) AS t1(v1)
WINDOW
  w AS (ORDER BY t1.v1 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW);
 v1 | sum
----+-----
  1 |   1
  2 |   3
  3 |   6
  4 |  10
  5 |  15
(5 rows)

w AS (ORDER BY t1.v1 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW);"#,
)
.await;
assert!(df.is_ok());
}

#[tokio::test]
async fn unsupported_ddl_returns_error() {
// Verify SessionContext::with_sql_options errors appropriately
Expand Down
58 changes: 36 additions & 22 deletions datafusion/sql/src/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
// under the License.

use std::collections::HashSet;
use std::ops::ControlFlow;
use std::sync::Arc;

use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
Expand All @@ -27,7 +28,7 @@ use crate::utils::{

use datafusion_common::error::DataFusionErrorBuilder;
use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
use datafusion_common::{not_impl_err, plan_err, Column, Result};
use datafusion_common::{not_impl_err, plan_err, Column, DataFusionError, Result};
use datafusion_common::{RecursionUnnestOption, UnnestOptions};
use datafusion_expr::expr::{Alias, PlannedReplaceSelectItem, WildcardOptions};
use datafusion_expr::expr_rewriter::{
Expand All @@ -44,8 +45,8 @@ use datafusion_expr::{

use indexmap::IndexMap;
use sqlparser::ast::{
Distinct, Expr as SQLExpr, GroupByExpr, NamedWindowExpr, OrderByExpr,
WildcardAdditionalOptions, WindowType,
visit_expressions_mut, Distinct, Expr as SQLExpr, GroupByExpr, NamedWindowExpr,
OrderByExpr, WildcardAdditionalOptions, WindowType,
};
use sqlparser::ast::{NamedWindowDefinition, Select, SelectItem, TableWithJoins};

Expand Down Expand Up @@ -887,29 +888,42 @@ fn match_window_definitions(
named_windows: &[NamedWindowDefinition],
) -> Result<()> {
for proj in projection.iter_mut() {
if let SelectItem::ExprWithAlias {
expr: SQLExpr::Function(f),
alias: _,
}
| SelectItem::UnnamedExpr(SQLExpr::Function(f)) = proj
if let SelectItem::ExprWithAlias { expr, alias: _ }
| SelectItem::UnnamedExpr(expr) = proj
{
for NamedWindowDefinition(window_ident, window_expr) in named_windows.iter() {
if let Some(WindowType::NamedWindow(ident)) = &f.over {
if ident.eq(window_ident) {
f.over = Some(match window_expr {
NamedWindowExpr::NamedWindow(ident) => {
WindowType::NamedWindow(ident.clone())
}
NamedWindowExpr::WindowSpec(spec) => {
WindowType::WindowSpec(spec.clone())
let mut err = None;
visit_expressions_mut(expr, |expr| {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am sorry @chenkovsky and @2010YOUY01

I don't know what

SELECT
  t1.v1,
  SUM(t1.v1) OVER w + 1
FROM
  generate_series(1, 10) AS t1(v1)
WINDOW
  w AS (ORDER BY t1.v1);

Is supposed to be computing (what does adding one to a window definition like w +1 represent?)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DuckDB interprets it as (SUM(t1.v1) OVER w) + 1, and ... OVER (w + 1) is not valid

D SELECT
    t1.v1,
    SUM(t1.v1) OVER (w + 1)
  FROM
    generate_series(1, 10) AS t1(v1)
  WINDOW
    w AS (ORDER BY t1.v1);
Parser Error: syntax error at or near "+"
LINE 3:   SUM(t1.v1) OVER (w + 1)

if let SQLExpr::Function(f) = expr {
if let Some(WindowType::NamedWindow(_)) = &f.over {
for NamedWindowDefinition(window_ident, window_expr) in
named_windows
{
if let Some(WindowType::NamedWindow(ident)) = &f.over {
if ident.eq(window_ident) {
f.over = Some(match window_expr {
NamedWindowExpr::NamedWindow(ident) => {
WindowType::NamedWindow(ident.clone())
}
NamedWindowExpr::WindowSpec(spec) => {
WindowType::WindowSpec(spec.clone())
}
})
}
}
})
}
// All named windows must be defined with a WindowSpec.
if let Some(WindowType::NamedWindow(ident)) = &f.over {
err = Some(DataFusionError::Plan(format!(
"The window {ident} is not defined!"
)));
return ControlFlow::Break(());
}
}
}
}
// All named windows must be defined with a WindowSpec.
if let Some(WindowType::NamedWindow(ident)) = &f.over {
return plan_err!("The window {ident} is not defined!");
ControlFlow::Continue(())
});
if let Some(err) = err {
return Err(err);
}
}
}
Expand Down