Skip to content

Commit 37cd4a1

Browse files
committed
[ORCA] Support two step window function
1 parent 5804930 commit 37cd4a1

37 files changed

+1718
-46
lines changed

src/backend/gpopt/config/CConfigParamMapping.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,11 @@ CConfigParamMapping::SConfigMappingElem CConfigParamMapping::m_elements[] = {
265265
false, // m_negate_param
266266
GPOS_WSZ_LIT(
267267
"Always pick a plan for aggregate distinct that minimizes skew.")},
268+
269+
{EopttraceForceSplitWindowFunc, &optimizer_force_split_window_function,
270+
false, // m_negate_param
271+
GPOS_WSZ_LIT(
272+
"Always split the window function.")},
268273

269274
{EopttraceEnableEagerAgg, &optimizer_enable_eageragg,
270275
false, // m_negate_param

src/backend/gpopt/utils/COptTasks.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,7 @@ COptTasks::CreateOptimizerConfig(CMemoryPool *mp, ICostModel *cost_model,
403403
push_group_by_below_setop_threshold, xform_bind_threshold,
404404
skew_factor),
405405
plan_hints,
406-
GPOS_NEW(mp) CWindowOids(OID(F_ROW_NUMBER), OID(F_RANK_)));
406+
GPOS_NEW(mp) CWindowOids(mp, OID(F_ROW_NUMBER), OID(F_RANK_), OID(F_DENSE_RANK_)));
407407
}
408408

409409
//---------------------------------------------------------------------------

src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp

+9-2
Original file line numberDiff line numberDiff line change
@@ -1594,12 +1594,19 @@ CCostModelGPDB::CostSequenceProject(CMemoryPool *mp, CExpressionHandle &exprhdl,
15941594
GPOS_ASSERT(COperator::EopPhysicalSequenceProject ==
15951595
exprhdl.Pop()->Eopid());
15961596

1597+
CPhysicalSequenceProject *psp = CPhysicalSequenceProject::PopConvert(exprhdl.Pop());
1598+
1599+
if (GPOS_FTRACE(EopttraceForceSplitWindowFunc) &&
1600+
psp->Pspt() == COperator::EsptypeGlobalTwoStep) {
1601+
return CCost(0);
1602+
}
1603+
15971604
const DOUBLE num_rows_outer = pci->PdRows()[0];
15981605
const DOUBLE dWidthOuter = pci->GetWidth()[0];
15991606

16001607
ULONG ulSortCols = 0;
16011608
COrderSpecArray *pdrgpos =
1602-
CPhysicalSequenceProject::PopConvert(exprhdl.Pop())->Pdrgpos();
1609+
CPhysicalSequenceProject::PopConvert(psp)->Pdrgpos();
16031610
const ULONG ulOrderSpecs = pdrgpos->Size();
16041611
for (ULONG ul = 0; ul < ulOrderSpecs; ul++)
16051612
{
@@ -1619,7 +1626,7 @@ CCostModelGPDB::CostSequenceProject(CMemoryPool *mp, CExpressionHandle &exprhdl,
16191626
dTupDefaultProcCostUnit));
16201627
CCost costChild =
16211628
CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
1622-
1629+
16231630
return costLocal + costChild;
16241631
}
16251632

src/backend/gporca/libgpopt/include/gpopt/base/CUtils.h

+4-6
Original file line numberDiff line numberDiff line change
@@ -318,12 +318,10 @@ class CUtils
318318
BOOL fNewComputedCol);
319319

320320
// generate a sequence project expression
321-
static CExpression *PexprLogicalSequenceProject(CMemoryPool *mp,
322-
CDistributionSpec *pds,
323-
COrderSpecArray *pdrgpos,
324-
CWindowFrameArray *pdrgpwf,
325-
CExpression *pexpr,
326-
CExpression *pexprPrjList);
321+
static CExpression *PexprLogicalSequenceProject(
322+
CMemoryPool *mp, COperator::ESPType sptype, CDistributionSpec *pds,
323+
COrderSpecArray *pdrgpos, CWindowFrameArray *pdrgpwf,
324+
CExpression *pexpr, CExpression *pexprPrjList);
327325

328326
// generate a projection of NULL constants
329327
// to the map 'colref_mapping', and add the mappings to the colref_mapping map if not NULL

src/backend/gporca/libgpopt/include/gpopt/base/CWindowOids.h

+26-1
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,16 @@
1717

1818
#define DUMMY_ROW_NUMBER_OID OID(7000)
1919
#define DUMMY_WIN_RANK OID(7001)
20+
#define DUMMY_WIN_DENSE_RANK OID(7002)
2021

22+
namespace gpmd
23+
{
24+
class IMDId;
25+
}
2126

2227
namespace gpopt
2328
{
29+
using namespace gpmd;
2430
//---------------------------------------------------------------------------
2531
// @class:
2632
// CWindowOids
@@ -35,17 +41,36 @@ class CWindowOids : public CRefCount
3541
// oid of window operation "row_number" function
3642
OID m_oidRowNumber;
3743

44+
// metadata id of window operation "row_number" function
45+
IMDId *m_MDIdRowNumber;
46+
3847
// oid of window operation "rank" function
3948
OID m_oidRank;
4049

50+
// metadata id of window operation "rank" function
51+
IMDId *m_MDIdRank;
52+
53+
// oid of window operation "dense_rank" function
54+
OID m_oidDenseRank;
55+
56+
// metadata id of window operation "dense_rank" function
57+
IMDId *m_MDDenseRank;
58+
4159
public:
42-
CWindowOids(OID row_number_oid, OID rank_oid);
60+
CWindowOids(CMemoryPool *mp, OID row_number_oid, OID rank_oid,
61+
OID dense_rank_oid);
4362

4463
// accessor of oid value of "row_number" function
4564
OID OidRowNumber() const;
65+
IMDId *MDIdRowNumber() const;
4666

4767
// accessor of oid value of "rank" function
4868
OID OidRank() const;
69+
IMDId *MDIdRank() const;
70+
71+
// accessor of oid value of "dense_rank" function
72+
OID OidDenseRank() const;
73+
IMDId *MDIdDenseRank() const;
4974

5075
// generate default window oids
5176
static CWindowOids *GetWindowOids(CMemoryPool *mp);

src/backend/gporca/libgpopt/include/gpopt/operators/CExpressionHandle.h

+7
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,13 @@ class CExpressionHandle
173173
// stats of attached expr/gexpr
174174
IStatistics *Pstats();
175175

176+
// pop the memory pool
177+
CMemoryPool *
178+
Pmp() const
179+
{
180+
return m_mp;
181+
}
182+
176183
// required properties of attached expr/gexpr
177184
CReqdProp *
178185
Prp() const

src/backend/gporca/libgpopt/include/gpopt/operators/CLogicalSequenceProject.h

+17-3
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ class CDistributionSpec;
3333
class CLogicalSequenceProject : public CLogicalUnary
3434
{
3535
private:
36+
// type of sequence project
37+
COperator::ESPType m_sptype;
38+
3639
// partition by keys
3740
CDistributionSpec *m_pds;
3841

@@ -58,8 +61,8 @@ class CLogicalSequenceProject : public CLogicalUnary
5861
CLogicalSequenceProject(const CLogicalSequenceProject &) = delete;
5962

6063
// ctor
61-
CLogicalSequenceProject(CMemoryPool *mp, CDistributionSpec *pds,
62-
COrderSpecArray *pdrgpos,
64+
CLogicalSequenceProject(CMemoryPool *mp, COperator::ESPType sptype,
65+
CDistributionSpec *pds, COrderSpecArray *pdrgpos,
6366
CWindowFrameArray *pdrgpwf);
6467

6568
// ctor for pattern
@@ -82,6 +85,13 @@ class CLogicalSequenceProject : public CLogicalUnary
8285
return "CLogicalSequenceProject";
8386
}
8487

88+
// window type
89+
COperator::ESPType
90+
Pspt() const
91+
{
92+
return m_sptype;
93+
}
94+
8595
// distribution spec
8696
CDistributionSpec *
8797
Pds() const
@@ -123,7 +133,8 @@ class CLogicalSequenceProject : public CLogicalUnary
123133
BOOL must_exist) override;
124134

125135
// return true if we can pull projections up past this operator from its given child
126-
BOOL FCanPullProjectionsUp(ULONG //child_index
136+
BOOL
137+
FCanPullProjectionsUp(ULONG //child_index
127138
) const override
128139
{
129140
return false;
@@ -180,6 +191,9 @@ class CLogicalSequenceProject : public CLogicalUnary
180191
// print
181192
IOstream &OsPrint(IOstream &os) const override;
182193

194+
static IOstream &OsPrintWindowType(IOstream &os,
195+
COperator::ESPType wintype);
196+
183197
// remove outer references from Order By/ Partition By clauses, and return a new operator
184198
CLogicalSequenceProject *PopRemoveLocalOuterRefs(
185199
CMemoryPool *mp, CExpressionHandle &exprhdl);

src/backend/gporca/libgpopt/include/gpopt/operators/COperator.h

+13
Original file line numberDiff line numberDiff line change
@@ -266,12 +266,25 @@ class COperator : public CRefCount, public DbgPrintMixin<COperator>
266266

267267
EopLogicalIndexOnlyGet,
268268
EopLogicalDynamicIndexOnlyGet,
269+
270+
EopLogicalWindowFunc,
269271
EopSentinel
270272
};
271273

274+
// sequence project type
275+
enum ESPType
276+
{
277+
EsptypeGlobalTwoStep, // global group by sequence project
278+
EsptypeGlobalOneStep, // global group by sequence project
279+
EsptypeLocal, // local group by sequence project
280+
281+
EsptypeSentinel
282+
};
283+
272284
// aggregate type
273285
enum EGbAggType
274286
{
287+
// todo(jiaqizho): change to onestep, twostep(global), twostep(local)
275288
EgbaggtypeGlobal, // global group by aggregate
276289
EgbaggtypeLocal, // local group by aggregate
277290
EgbaggtypeIntermediate, // intermediate group by aggregate

src/backend/gporca/libgpopt/include/gpopt/operators/CPhysicalSequenceProject.h

+12-2
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ class CDistributionSpec;
3232
class CPhysicalSequenceProject : public CPhysical
3333
{
3434
private:
35+
// window type
36+
ESPType m_sptype;
37+
3538
// partition by keys
3639
CDistributionSpec *m_pds;
3740

@@ -57,8 +60,8 @@ class CPhysicalSequenceProject : public CPhysical
5760
CPhysicalSequenceProject(const CPhysicalSequenceProject &) = delete;
5861

5962
// ctor
60-
CPhysicalSequenceProject(CMemoryPool *mp, CDistributionSpec *pds,
61-
COrderSpecArray *pdrgpos,
63+
CPhysicalSequenceProject(CMemoryPool *mp, ESPType m_sptype,
64+
CDistributionSpec *pds, COrderSpecArray *pdrgpos,
6265
CWindowFrameArray *pdrgpwf);
6366

6467
// dtor
@@ -78,6 +81,13 @@ class CPhysicalSequenceProject : public CPhysical
7881
return "CPhysicalSequenceProject";
7982
}
8083

84+
// window type
85+
COperator::ESPType
86+
Pspt() const
87+
{
88+
return m_sptype;
89+
}
90+
8191
// partition by keys
8292
CDistributionSpec *
8393
Pds() const

src/backend/gporca/libgpopt/include/gpopt/xforms/CXform.h

+5-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525
#include "naucrates/traceflags/traceflags.h"
2626

2727
// Macro for enabling and disabling xforms
28-
#define GPOPT_DISABLE_XFORM_TF(x) EopttraceDisableXformBase + static_cast<int>(x)
28+
#define GPOPT_DISABLE_XFORM_TF(x) \
29+
EopttraceDisableXformBase + static_cast<int>(x)
2930
#define GPOPT_ENABLE_XFORM(x) GPOS_UNSET_TRACE(GPOPT_DISABLE_XFORM_TF(x))
3031
#define GPOPT_DISABLE_XFORM(x) GPOS_SET_TRACE(GPOPT_DISABLE_XFORM_TF(x))
3132
#define GPOPT_FENABLED_XFORM(x) !GPOS_FTRACE(GPOPT_DISABLE_XFORM_TF(x))
@@ -236,6 +237,7 @@ class CXform : public CRefCount, public DbgPrintMixin<CXform>
236237
ExfLimit2IndexOnlyGet,
237238
ExfFullOuterJoin2HashJoin,
238239
ExfFullJoinCommutativity,
240+
ExfSplitWindowFunc,
239241
ExfInvalid,
240242
ExfSentinel = ExfInvalid
241243
};
@@ -297,7 +299,8 @@ class CXform : public CRefCount, public DbgPrintMixin<CXform>
297299
}
298300

299301
// check compatibility with another xform
300-
virtual BOOL FCompatible(CXform::EXformId)
302+
virtual BOOL
303+
FCompatible(CXform::EXformId)
301304
{
302305
return true;
303306
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/*-------------------------------------------------------------------------
2+
*
3+
* Licensed to the Apache Software Foundation (ASF) under one
4+
* or more contributor license agreements. See the NOTICE file
5+
* distributed with this work for additional information
6+
* regarding copyright ownership. The ASF licenses this file
7+
* to you under the Apache License, Version 2.0 (the
8+
* "License"); you may not use this file except in compliance
9+
* with the License. You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing,
14+
* software distributed under the License is distributed on an
15+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
* KIND, either express or implied. See the License for the
17+
* specific language governing permissions and limitations
18+
* under the License.
19+
*
20+
* @filename:
21+
* CXformSplitWindowFunc.h
22+
*
23+
* @doc:
24+
* Split a window function into pair of local and global window function
25+
*
26+
*-------------------------------------------------------------------------
27+
*/
28+
#ifndef GPOPT_CXformSplitWindowFunc_H
29+
#define GPOPT_CXformSplitWindowFunc_H
30+
31+
#include "gpos/base.h"
32+
33+
#include "gpopt/xforms/CXformExploration.h"
34+
35+
namespace gpopt
36+
{
37+
using namespace gpos;
38+
39+
//---------------------------------------------------------------------------
40+
// @class:
41+
// CXformSplitWindowFunc
42+
//
43+
// @doc:
44+
// Split a window function into pair of local and global window function
45+
//
46+
//---------------------------------------------------------------------------
47+
class CXformSplitWindowFunc : public CXformExploration
48+
{
49+
private:
50+
// generate a project lists for the local and global window function
51+
// from the original window function
52+
static CExpression *PexprWindowFunc(
53+
CMemoryPool *mp, // memory pool
54+
CExpression *
55+
pexprProjListOrig, // project list of the original global window function
56+
CExpression *
57+
ppexprProjListLocal, // project list of the new local window function
58+
CExpression *
59+
ppexprProjListGlobal // project list of the new global window function
60+
);
61+
62+
static void PopulateLocalGlobalProjectList(
63+
CMemoryPool *mp, CExpression *pexprProjList,
64+
CExpression **ppexprProjListLocal, CExpression **ppexprProjListGlobal);
65+
66+
static bool CheckFilterAndProjectList(CExpression *pexprSelect,
67+
CExpression *pexprProjList);
68+
69+
static void PopulateSelect(CMemoryPool *mp, CExpression *pexprSelect,
70+
COperator **ppSelectCopy);
71+
72+
public:
73+
CXformSplitWindowFunc(const CXformSplitWindowFunc &) = delete;
74+
75+
// ctor
76+
explicit CXformSplitWindowFunc(CMemoryPool *mp);
77+
78+
// dtor
79+
~CXformSplitWindowFunc() override = default;
80+
81+
// ident accessors
82+
EXformId
83+
Exfid() const override
84+
{
85+
return ExfSplitWindowFunc;
86+
}
87+
88+
// return a string for xform name
89+
const CHAR *
90+
SzId() const override
91+
{
92+
return "CXformSplitWindowFunc";
93+
}
94+
95+
// Compatibility function for splitting limit
96+
BOOL
97+
FCompatible(CXform::EXformId exfid) override
98+
{
99+
return (CXform::ExfSplitWindowFunc != exfid);
100+
}
101+
102+
// compute xform promise for a given expression handle
103+
EXformPromise Exfp(CExpressionHandle &exprhdl) const override;
104+
105+
// actual transform
106+
void Transform(CXformContext *pxfctxt, CXformResult *pxfres,
107+
CExpression *pexpr) const override;
108+
109+
}; // class CXformSplitWindowFunc
110+
111+
} // namespace gpopt
112+
113+
#endif // !GPOPT_CXformSplitWindowFunc_H
114+
115+
// EOF

0 commit comments

Comments
 (0)