Skip to content

ORCA: Support two step window function #1014

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/backend/gpopt/config/CConfigParamMapping.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,11 @@ CConfigParamMapping::SConfigMappingElem CConfigParamMapping::m_elements[] = {
false, // m_negate_param
GPOS_WSZ_LIT(
"Always pick a plan for aggregate distinct that minimizes skew.")},

{EopttraceForceSplitWindowFunc, &optimizer_force_split_window_function,
false, // m_negate_param
GPOS_WSZ_LIT(
"Always split the window function.")},

{EopttraceEnableEagerAgg, &optimizer_enable_eageragg,
false, // m_negate_param
Expand Down
2 changes: 1 addition & 1 deletion src/backend/gpopt/utils/COptTasks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ COptTasks::CreateOptimizerConfig(CMemoryPool *mp, ICostModel *cost_model,
push_group_by_below_setop_threshold, xform_bind_threshold,
skew_factor),
plan_hints,
GPOS_NEW(mp) CWindowOids(OID(F_ROW_NUMBER), OID(F_RANK_)));
GPOS_NEW(mp) CWindowOids(mp, OID(F_ROW_NUMBER), OID(F_RANK_), OID(F_DENSE_RANK_)));
}

//---------------------------------------------------------------------------
Expand Down
11 changes: 9 additions & 2 deletions src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1594,12 +1594,19 @@ CCostModelGPDB::CostSequenceProject(CMemoryPool *mp, CExpressionHandle &exprhdl,
GPOS_ASSERT(COperator::EopPhysicalSequenceProject ==
exprhdl.Pop()->Eopid());

CPhysicalSequenceProject *psp = CPhysicalSequenceProject::PopConvert(exprhdl.Pop());

if (GPOS_FTRACE(EopttraceForceSplitWindowFunc) &&
psp->Pspt() == COperator::EsptypeGlobalTwoStep) {
return CCost(0);
}

const DOUBLE num_rows_outer = pci->PdRows()[0];
const DOUBLE dWidthOuter = pci->GetWidth()[0];

ULONG ulSortCols = 0;
COrderSpecArray *pdrgpos =
CPhysicalSequenceProject::PopConvert(exprhdl.Pop())->Pdrgpos();
CPhysicalSequenceProject::PopConvert(psp)->Pdrgpos();
const ULONG ulOrderSpecs = pdrgpos->Size();
for (ULONG ul = 0; ul < ulOrderSpecs; ul++)
{
Expand All @@ -1619,7 +1626,7 @@ CCostModelGPDB::CostSequenceProject(CMemoryPool *mp, CExpressionHandle &exprhdl,
dTupDefaultProcCostUnit));
CCost costChild =
CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());

return costLocal + costChild;
}

Expand Down
10 changes: 4 additions & 6 deletions src/backend/gporca/libgpopt/include/gpopt/base/CUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -318,12 +318,10 @@ class CUtils
BOOL fNewComputedCol);

// generate a sequence project expression
static CExpression *PexprLogicalSequenceProject(CMemoryPool *mp,
CDistributionSpec *pds,
COrderSpecArray *pdrgpos,
CWindowFrameArray *pdrgpwf,
CExpression *pexpr,
CExpression *pexprPrjList);
static CExpression *PexprLogicalSequenceProject(
CMemoryPool *mp, COperator::ESPType sptype, CDistributionSpec *pds,
COrderSpecArray *pdrgpos, CWindowFrameArray *pdrgpwf,
CExpression *pexpr, CExpression *pexprPrjList);

// generate a projection of NULL constants
// to the map 'colref_mapping', and add the mappings to the colref_mapping map if not NULL
Expand Down
27 changes: 26 additions & 1 deletion src/backend/gporca/libgpopt/include/gpopt/base/CWindowOids.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,16 @@

#define DUMMY_ROW_NUMBER_OID OID(7000)
#define DUMMY_WIN_RANK OID(7001)
#define DUMMY_WIN_DENSE_RANK OID(7002)

namespace gpmd
{
class IMDId;
}

namespace gpopt
{
using namespace gpmd;
//---------------------------------------------------------------------------
// @class:
// CWindowOids
Expand All @@ -35,17 +41,36 @@ class CWindowOids : public CRefCount
// oid of window operation "row_number" function
OID m_oidRowNumber;

// metadata id of window operation "row_number" function
IMDId *m_MDIdRowNumber;

// oid of window operation "rank" function
OID m_oidRank;

// metadata id of window operation "rank" function
IMDId *m_MDIdRank;

// oid of window operation "dense_rank" function
OID m_oidDenseRank;

// metadata id of window operation "dense_rank" function
IMDId *m_MDDenseRank;

public:
CWindowOids(OID row_number_oid, OID rank_oid);
CWindowOids(CMemoryPool *mp, OID row_number_oid, OID rank_oid,
OID dense_rank_oid);

// accessor of oid value of "row_number" function
OID OidRowNumber() const;
IMDId *MDIdRowNumber() const;

// accessor of oid value of "rank" function
OID OidRank() const;
IMDId *MDIdRank() const;

// accessor of oid value of "dense_rank" function
OID OidDenseRank() const;
IMDId *MDIdDenseRank() const;

// generate default window oids
static CWindowOids *GetWindowOids(CMemoryPool *mp);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,13 @@ class CExpressionHandle
// stats of attached expr/gexpr
IStatistics *Pstats();

// pop the memory pool
CMemoryPool *
Pmp() const
{
return m_mp;
}

// required properties of attached expr/gexpr
CReqdProp *
Prp() const
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ class CDistributionSpec;
class CLogicalSequenceProject : public CLogicalUnary
{
private:
// type of sequence project
COperator::ESPType m_sptype;

// partition by keys
CDistributionSpec *m_pds;

Expand All @@ -58,8 +61,8 @@ class CLogicalSequenceProject : public CLogicalUnary
CLogicalSequenceProject(const CLogicalSequenceProject &) = delete;

// ctor
CLogicalSequenceProject(CMemoryPool *mp, CDistributionSpec *pds,
COrderSpecArray *pdrgpos,
CLogicalSequenceProject(CMemoryPool *mp, COperator::ESPType sptype,
CDistributionSpec *pds, COrderSpecArray *pdrgpos,
CWindowFrameArray *pdrgpwf);

// ctor for pattern
Expand All @@ -82,6 +85,13 @@ class CLogicalSequenceProject : public CLogicalUnary
return "CLogicalSequenceProject";
}

// window type
COperator::ESPType
Pspt() const
{
return m_sptype;
}

// distribution spec
CDistributionSpec *
Pds() const
Expand Down Expand Up @@ -123,7 +133,8 @@ class CLogicalSequenceProject : public CLogicalUnary
BOOL must_exist) override;

// return true if we can pull projections up past this operator from its given child
BOOL FCanPullProjectionsUp(ULONG //child_index
BOOL
FCanPullProjectionsUp(ULONG //child_index
) const override
{
return false;
Expand Down Expand Up @@ -180,6 +191,9 @@ class CLogicalSequenceProject : public CLogicalUnary
// print
IOstream &OsPrint(IOstream &os) const override;

static IOstream &OsPrintWindowType(IOstream &os,
COperator::ESPType wintype);

// remove outer references from Order By/ Partition By clauses, and return a new operator
CLogicalSequenceProject *PopRemoveLocalOuterRefs(
CMemoryPool *mp, CExpressionHandle &exprhdl);
Expand Down
13 changes: 13 additions & 0 deletions src/backend/gporca/libgpopt/include/gpopt/operators/COperator.h
Original file line number Diff line number Diff line change
Expand Up @@ -266,12 +266,25 @@ class COperator : public CRefCount, public DbgPrintMixin<COperator>

EopLogicalIndexOnlyGet,
EopLogicalDynamicIndexOnlyGet,

EopLogicalWindowFunc,
EopSentinel
};

// sequence project type
enum ESPType
{
EsptypeGlobalTwoStep, // global group by sequence project
EsptypeGlobalOneStep, // global group by sequence project
EsptypeLocal, // local group by sequence project

EsptypeSentinel
};

// aggregate type
enum EGbAggType
{
// todo(jiaqizho): change to onestep, twostep(global), twostep(local)
EgbaggtypeGlobal, // global group by aggregate
EgbaggtypeLocal, // local group by aggregate
EgbaggtypeIntermediate, // intermediate group by aggregate
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ class CDistributionSpec;
class CPhysicalSequenceProject : public CPhysical
{
private:
// window type
ESPType m_sptype;

// partition by keys
CDistributionSpec *m_pds;

Expand All @@ -57,8 +60,8 @@ class CPhysicalSequenceProject : public CPhysical
CPhysicalSequenceProject(const CPhysicalSequenceProject &) = delete;

// ctor
CPhysicalSequenceProject(CMemoryPool *mp, CDistributionSpec *pds,
COrderSpecArray *pdrgpos,
CPhysicalSequenceProject(CMemoryPool *mp, ESPType m_sptype,
CDistributionSpec *pds, COrderSpecArray *pdrgpos,
CWindowFrameArray *pdrgpwf);

// dtor
Expand All @@ -78,6 +81,13 @@ class CPhysicalSequenceProject : public CPhysical
return "CPhysicalSequenceProject";
}

// window type
COperator::ESPType
Pspt() const
{
return m_sptype;
}

// partition by keys
CDistributionSpec *
Pds() const
Expand Down
7 changes: 5 additions & 2 deletions src/backend/gporca/libgpopt/include/gpopt/xforms/CXform.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
#include "naucrates/traceflags/traceflags.h"

// Macro for enabling and disabling xforms
#define GPOPT_DISABLE_XFORM_TF(x) EopttraceDisableXformBase + static_cast<int>(x)
#define GPOPT_DISABLE_XFORM_TF(x) \
EopttraceDisableXformBase + static_cast<int>(x)
#define GPOPT_ENABLE_XFORM(x) GPOS_UNSET_TRACE(GPOPT_DISABLE_XFORM_TF(x))
#define GPOPT_DISABLE_XFORM(x) GPOS_SET_TRACE(GPOPT_DISABLE_XFORM_TF(x))
#define GPOPT_FENABLED_XFORM(x) !GPOS_FTRACE(GPOPT_DISABLE_XFORM_TF(x))
Expand Down Expand Up @@ -236,6 +237,7 @@ class CXform : public CRefCount, public DbgPrintMixin<CXform>
ExfLimit2IndexOnlyGet,
ExfFullOuterJoin2HashJoin,
ExfFullJoinCommutativity,
ExfSplitWindowFunc,
ExfInvalid,
ExfSentinel = ExfInvalid
};
Expand Down Expand Up @@ -297,7 +299,8 @@ class CXform : public CRefCount, public DbgPrintMixin<CXform>
}

// check compatibility with another xform
virtual BOOL FCompatible(CXform::EXformId)
virtual BOOL
FCompatible(CXform::EXformId)
{
return true;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*-------------------------------------------------------------------------
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
* @filename:
* CXformSplitWindowFunc.h
*
* @doc:
* Split a window function into pair of local and global window function
*
*-------------------------------------------------------------------------
*/
#ifndef GPOPT_CXformSplitWindowFunc_H
#define GPOPT_CXformSplitWindowFunc_H

#include "gpos/base.h"

#include "gpopt/xforms/CXformExploration.h"

namespace gpopt
{
using namespace gpos;

//---------------------------------------------------------------------------
// @class:
// CXformSplitWindowFunc
//
// @doc:
// Split a window function into pair of local and global window function
//
//---------------------------------------------------------------------------
class CXformSplitWindowFunc : public CXformExploration
{
private:
// generate a project lists for the local and global window function
// from the original window function
static CExpression *PexprWindowFunc(
CMemoryPool *mp, // memory pool
CExpression *
pexprProjListOrig, // project list of the original global window function
CExpression *
ppexprProjListLocal, // project list of the new local window function
CExpression *
ppexprProjListGlobal // project list of the new global window function
);

static void PopulateLocalGlobalProjectList(
CMemoryPool *mp, CExpression *pexprProjList,
CExpression **ppexprProjListLocal, CExpression **ppexprProjListGlobal);

static bool CheckFilterAndProjectList(CExpression *pexprSelect,
CExpression *pexprProjList);

static void PopulateSelect(CMemoryPool *mp, CExpression *pexprSelect,
COperator **ppSelectCopy);

public:
CXformSplitWindowFunc(const CXformSplitWindowFunc &) = delete;

// ctor
explicit CXformSplitWindowFunc(CMemoryPool *mp);

// dtor
~CXformSplitWindowFunc() override = default;

// ident accessors
EXformId
Exfid() const override
{
return ExfSplitWindowFunc;
}

// return a string for xform name
const CHAR *
SzId() const override
{
return "CXformSplitWindowFunc";
}

// Compatibility function for splitting limit
BOOL
FCompatible(CXform::EXformId exfid) override
{
return (CXform::ExfSplitWindowFunc != exfid);
}

// compute xform promise for a given expression handle
EXformPromise Exfp(CExpressionHandle &exprhdl) const override;

// actual transform
void Transform(CXformContext *pxfctxt, CXformResult *pxfres,
CExpression *pexpr) const override;

}; // class CXformSplitWindowFunc

} // namespace gpopt

#endif // !GPOPT_CXformSplitWindowFunc_H

// EOF
Loading
Loading