Skip to content

Commit f4ad0f7

Browse files
authored
ORCA: Support two phase window functions
For TPC-DS 67 queries, due to data skew, most of the data will be motioned to limited segments for processing. If data is filtered before gathering, it can significantly reduce the amount of data that upstream nodes need to process and improve query performance. So we introduce two-phase WindowAgg for the case: explain SELECT * FROM (SELECT rank() OVER (PARTITION BY four ORDER BY ten) AS rank_1, ten, four FROM tenk1 WHERE unique2 < 10) t WHERE rank_1 < 3; QUERY PLAN --------------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..0.00 rows=5 width=16) -> Result (cost=0.00..0.00 rows=2 width=16) Filter: ((rank() OVER (?)) < 3) -> WindowAgg (cost=0.00..0.00 rows=2 width=16) Partition By: four Order By: ten -> Sort (cost=0.00..6.00 rows=2 width=8) Sort Key: four, ten -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..6.00 rows=2 width=8) Hash Key: four -> Result (cost=0.00..6.00 rows=2 width=8) Filter: ((rank() OVER (?)) < 3) -> WindowAgg (cost=0.00..6.00 rows=4 width=16) Partition By: four Order By: ten -> Sort (cost=0.00..6.00 rows=4 width=8) Sort Key: four, ten -> Index Scan using tenk1_unique2 on tenk1 (cost=0.00..6.00 rows=4 width=8) Index Cond: (unique2 < 10) Optimizer: GPORCA (20 rows)
1 parent 3fbebea commit f4ad0f7

37 files changed

+1718
-46
lines changed

src/backend/gpopt/config/CConfigParamMapping.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,11 @@ CConfigParamMapping::SConfigMappingElem CConfigParamMapping::m_elements[] = {
265265
false, // m_negate_param
266266
GPOS_WSZ_LIT(
267267
"Always pick a plan for aggregate distinct that minimizes skew.")},
268+
269+
{EopttraceForceSplitWindowFunc, &optimizer_force_split_window_function,
270+
false, // m_negate_param
271+
GPOS_WSZ_LIT(
272+
"Always split the window function.")},
268273

269274
{EopttraceEnableEagerAgg, &optimizer_enable_eageragg,
270275
false, // m_negate_param

src/backend/gpopt/utils/COptTasks.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,7 @@ COptTasks::CreateOptimizerConfig(CMemoryPool *mp, ICostModel *cost_model,
403403
push_group_by_below_setop_threshold, xform_bind_threshold,
404404
skew_factor),
405405
plan_hints,
406-
GPOS_NEW(mp) CWindowOids(OID(F_ROW_NUMBER), OID(F_RANK_)));
406+
GPOS_NEW(mp) CWindowOids(mp, OID(F_ROW_NUMBER), OID(F_RANK_), OID(F_DENSE_RANK_)));
407407
}
408408

409409
//---------------------------------------------------------------------------

src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1594,12 +1594,19 @@ CCostModelGPDB::CostSequenceProject(CMemoryPool *mp, CExpressionHandle &exprhdl,
15941594
GPOS_ASSERT(COperator::EopPhysicalSequenceProject ==
15951595
exprhdl.Pop()->Eopid());
15961596

1597+
CPhysicalSequenceProject *psp = CPhysicalSequenceProject::PopConvert(exprhdl.Pop());
1598+
1599+
if (GPOS_FTRACE(EopttraceForceSplitWindowFunc) &&
1600+
psp->Pspt() == COperator::EsptypeGlobalTwoStep) {
1601+
return CCost(0);
1602+
}
1603+
15971604
const DOUBLE num_rows_outer = pci->PdRows()[0];
15981605
const DOUBLE dWidthOuter = pci->GetWidth()[0];
15991606

16001607
ULONG ulSortCols = 0;
16011608
COrderSpecArray *pdrgpos =
1602-
CPhysicalSequenceProject::PopConvert(exprhdl.Pop())->Pdrgpos();
1609+
CPhysicalSequenceProject::PopConvert(psp)->Pdrgpos();
16031610
const ULONG ulOrderSpecs = pdrgpos->Size();
16041611
for (ULONG ul = 0; ul < ulOrderSpecs; ul++)
16051612
{
@@ -1619,7 +1626,7 @@ CCostModelGPDB::CostSequenceProject(CMemoryPool *mp, CExpressionHandle &exprhdl,
16191626
dTupDefaultProcCostUnit));
16201627
CCost costChild =
16211628
CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
1622-
1629+
16231630
return costLocal + costChild;
16241631
}
16251632

src/backend/gporca/libgpopt/include/gpopt/base/CUtils.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -320,12 +320,10 @@ class CUtils
320320
BOOL fNewComputedCol);
321321

322322
// generate a sequence project expression
323-
static CExpression *PexprLogicalSequenceProject(CMemoryPool *mp,
324-
CDistributionSpec *pds,
325-
COrderSpecArray *pdrgpos,
326-
CWindowFrameArray *pdrgpwf,
327-
CExpression *pexpr,
328-
CExpression *pexprPrjList);
323+
static CExpression *PexprLogicalSequenceProject(
324+
CMemoryPool *mp, COperator::ESPType sptype, CDistributionSpec *pds,
325+
COrderSpecArray *pdrgpos, CWindowFrameArray *pdrgpwf,
326+
CExpression *pexpr, CExpression *pexprPrjList);
329327

330328
// generate a projection of NULL constants
331329
// to the map 'colref_mapping', and add the mappings to the colref_mapping map if not NULL

src/backend/gporca/libgpopt/include/gpopt/base/CWindowOids.h

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,16 @@
1717

1818
#define DUMMY_ROW_NUMBER_OID OID(7000)
1919
#define DUMMY_WIN_RANK OID(7001)
20+
#define DUMMY_WIN_DENSE_RANK OID(7002)
2021

22+
namespace gpmd
23+
{
24+
class IMDId;
25+
}
2126

2227
namespace gpopt
2328
{
29+
using namespace gpmd;
2430
//---------------------------------------------------------------------------
2531
// @class:
2632
// CWindowOids
@@ -35,17 +41,36 @@ class CWindowOids : public CRefCount
3541
// oid of window operation "row_number" function
3642
OID m_oidRowNumber;
3743

44+
// metadata id of window operation "row_number" function
45+
IMDId *m_MDIdRowNumber;
46+
3847
// oid of window operation "rank" function
3948
OID m_oidRank;
4049

50+
// metadata id of window operation "rank" function
51+
IMDId *m_MDIdRank;
52+
53+
// oid of window operation "dense_rank" function
54+
OID m_oidDenseRank;
55+
56+
// metadata id of window operation "dense_rank" function
57+
IMDId *m_MDDenseRank;
58+
4159
public:
42-
CWindowOids(OID row_number_oid, OID rank_oid);
60+
CWindowOids(CMemoryPool *mp, OID row_number_oid, OID rank_oid,
61+
OID dense_rank_oid);
4362

4463
// accessor of oid value of "row_number" function
4564
OID OidRowNumber() const;
65+
IMDId *MDIdRowNumber() const;
4666

4767
// accessor of oid value of "rank" function
4868
OID OidRank() const;
69+
IMDId *MDIdRank() const;
70+
71+
// accessor of oid value of "dense_rank" function
72+
OID OidDenseRank() const;
73+
IMDId *MDIdDenseRank() const;
4974

5075
// generate default window oids
5176
static CWindowOids *GetWindowOids(CMemoryPool *mp);

src/backend/gporca/libgpopt/include/gpopt/operators/CExpressionHandle.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,13 @@ class CExpressionHandle
173173
// stats of attached expr/gexpr
174174
IStatistics *Pstats();
175175

176+
// pop the memory pool
177+
CMemoryPool *
178+
Pmp() const
179+
{
180+
return m_mp;
181+
}
182+
176183
// required properties of attached expr/gexpr
177184
CReqdProp *
178185
Prp() const

src/backend/gporca/libgpopt/include/gpopt/operators/CLogicalSequenceProject.h

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ class CDistributionSpec;
3333
class CLogicalSequenceProject : public CLogicalUnary
3434
{
3535
private:
36+
// type of sequence project
37+
COperator::ESPType m_sptype;
38+
3639
// partition by keys
3740
CDistributionSpec *m_pds;
3841

@@ -58,8 +61,8 @@ class CLogicalSequenceProject : public CLogicalUnary
5861
CLogicalSequenceProject(const CLogicalSequenceProject &) = delete;
5962

6063
// ctor
61-
CLogicalSequenceProject(CMemoryPool *mp, CDistributionSpec *pds,
62-
COrderSpecArray *pdrgpos,
64+
CLogicalSequenceProject(CMemoryPool *mp, COperator::ESPType sptype,
65+
CDistributionSpec *pds, COrderSpecArray *pdrgpos,
6366
CWindowFrameArray *pdrgpwf);
6467

6568
// ctor for pattern
@@ -82,6 +85,13 @@ class CLogicalSequenceProject : public CLogicalUnary
8285
return "CLogicalSequenceProject";
8386
}
8487

88+
// window type
89+
COperator::ESPType
90+
Pspt() const
91+
{
92+
return m_sptype;
93+
}
94+
8595
// distribution spec
8696
CDistributionSpec *
8797
Pds() const
@@ -123,7 +133,8 @@ class CLogicalSequenceProject : public CLogicalUnary
123133
BOOL must_exist) override;
124134

125135
// return true if we can pull projections up past this operator from its given child
126-
BOOL FCanPullProjectionsUp(ULONG //child_index
136+
BOOL
137+
FCanPullProjectionsUp(ULONG //child_index
127138
) const override
128139
{
129140
return false;
@@ -180,6 +191,9 @@ class CLogicalSequenceProject : public CLogicalUnary
180191
// print
181192
IOstream &OsPrint(IOstream &os) const override;
182193

194+
static IOstream &OsPrintWindowType(IOstream &os,
195+
COperator::ESPType wintype);
196+
183197
// remove outer references from Order By/ Partition By clauses, and return a new operator
184198
CLogicalSequenceProject *PopRemoveLocalOuterRefs(
185199
CMemoryPool *mp, CExpressionHandle &exprhdl);

src/backend/gporca/libgpopt/include/gpopt/operators/COperator.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,12 +266,25 @@ class COperator : public CRefCount, public DbgPrintMixin<COperator>
266266

267267
EopLogicalIndexOnlyGet,
268268
EopLogicalDynamicIndexOnlyGet,
269+
270+
EopLogicalWindowFunc,
269271
EopSentinel
270272
};
271273

274+
// sequence project type
275+
enum ESPType
276+
{
277+
EsptypeGlobalTwoStep, // global group by sequence project
278+
EsptypeGlobalOneStep, // global group by sequence project
279+
EsptypeLocal, // local group by sequence project
280+
281+
EsptypeSentinel
282+
};
283+
272284
// aggregate type
273285
enum EGbAggType
274286
{
287+
// todo(jiaqizho): change to onestep, twostep(global), twostep(local)
275288
EgbaggtypeGlobal, // global group by aggregate
276289
EgbaggtypeLocal, // local group by aggregate
277290
EgbaggtypeIntermediate, // intermediate group by aggregate

src/backend/gporca/libgpopt/include/gpopt/operators/CPhysicalSequenceProject.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ class CDistributionSpec;
3232
class CPhysicalSequenceProject : public CPhysical
3333
{
3434
private:
35+
// window type
36+
ESPType m_sptype;
37+
3538
// partition by keys
3639
CDistributionSpec *m_pds;
3740

@@ -57,8 +60,8 @@ class CPhysicalSequenceProject : public CPhysical
5760
CPhysicalSequenceProject(const CPhysicalSequenceProject &) = delete;
5861

5962
// ctor
60-
CPhysicalSequenceProject(CMemoryPool *mp, CDistributionSpec *pds,
61-
COrderSpecArray *pdrgpos,
63+
CPhysicalSequenceProject(CMemoryPool *mp, ESPType m_sptype,
64+
CDistributionSpec *pds, COrderSpecArray *pdrgpos,
6265
CWindowFrameArray *pdrgpwf);
6366

6467
// dtor
@@ -78,6 +81,13 @@ class CPhysicalSequenceProject : public CPhysical
7881
return "CPhysicalSequenceProject";
7982
}
8083

84+
// window type
85+
COperator::ESPType
86+
Pspt() const
87+
{
88+
return m_sptype;
89+
}
90+
8191
// partition by keys
8292
CDistributionSpec *
8393
Pds() const

src/backend/gporca/libgpopt/include/gpopt/xforms/CXform.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525
#include "naucrates/traceflags/traceflags.h"
2626

2727
// Macro for enabling and disabling xforms
28-
#define GPOPT_DISABLE_XFORM_TF(x) EopttraceDisableXformBase + static_cast<int>(x)
28+
#define GPOPT_DISABLE_XFORM_TF(x) \
29+
EopttraceDisableXformBase + static_cast<int>(x)
2930
#define GPOPT_ENABLE_XFORM(x) GPOS_UNSET_TRACE(GPOPT_DISABLE_XFORM_TF(x))
3031
#define GPOPT_DISABLE_XFORM(x) GPOS_SET_TRACE(GPOPT_DISABLE_XFORM_TF(x))
3132
#define GPOPT_FENABLED_XFORM(x) !GPOS_FTRACE(GPOPT_DISABLE_XFORM_TF(x))
@@ -236,6 +237,7 @@ class CXform : public CRefCount, public DbgPrintMixin<CXform>
236237
ExfLimit2IndexOnlyGet,
237238
ExfFullOuterJoin2HashJoin,
238239
ExfFullJoinCommutativity,
240+
ExfSplitWindowFunc,
239241
ExfInvalid,
240242
ExfSentinel = ExfInvalid
241243
};
@@ -297,7 +299,8 @@ class CXform : public CRefCount, public DbgPrintMixin<CXform>
297299
}
298300

299301
// check compatibility with another xform
300-
virtual BOOL FCompatible(CXform::EXformId)
302+
virtual BOOL
303+
FCompatible(CXform::EXformId)
301304
{
302305
return true;
303306
}
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/*-------------------------------------------------------------------------
2+
*
3+
* Licensed to the Apache Software Foundation (ASF) under one
4+
* or more contributor license agreements. See the NOTICE file
5+
* distributed with this work for additional information
6+
* regarding copyright ownership. The ASF licenses this file
7+
* to you under the Apache License, Version 2.0 (the
8+
* "License"); you may not use this file except in compliance
9+
* with the License. You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing,
14+
* software distributed under the License is distributed on an
15+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
* KIND, either express or implied. See the License for the
17+
* specific language governing permissions and limitations
18+
* under the License.
19+
*
20+
* @filename:
21+
* CXformSplitWindowFunc.h
22+
*
23+
* @doc:
24+
* Split a window function into pair of local and global window function
25+
*
26+
*-------------------------------------------------------------------------
27+
*/
28+
#ifndef GPOPT_CXformSplitWindowFunc_H
29+
#define GPOPT_CXformSplitWindowFunc_H
30+
31+
#include "gpos/base.h"
32+
33+
#include "gpopt/xforms/CXformExploration.h"
34+
35+
namespace gpopt
36+
{
37+
using namespace gpos;
38+
39+
//---------------------------------------------------------------------------
40+
// @class:
41+
// CXformSplitWindowFunc
42+
//
43+
// @doc:
44+
// Split a window function into pair of local and global window function
45+
//
46+
//---------------------------------------------------------------------------
47+
class CXformSplitWindowFunc : public CXformExploration
48+
{
49+
private:
50+
// generate a project lists for the local and global window function
51+
// from the original window function
52+
static CExpression *PexprWindowFunc(
53+
CMemoryPool *mp, // memory pool
54+
CExpression *
55+
pexprProjListOrig, // project list of the original global window function
56+
CExpression *
57+
ppexprProjListLocal, // project list of the new local window function
58+
CExpression *
59+
ppexprProjListGlobal // project list of the new global window function
60+
);
61+
62+
static void PopulateLocalGlobalProjectList(
63+
CMemoryPool *mp, CExpression *pexprProjList,
64+
CExpression **ppexprProjListLocal, CExpression **ppexprProjListGlobal);
65+
66+
static bool CheckFilterAndProjectList(CExpression *pexprSelect,
67+
CExpression *pexprProjList);
68+
69+
static void PopulateSelect(CMemoryPool *mp, CExpression *pexprSelect,
70+
COperator **ppSelectCopy);
71+
72+
public:
73+
CXformSplitWindowFunc(const CXformSplitWindowFunc &) = delete;
74+
75+
// ctor
76+
explicit CXformSplitWindowFunc(CMemoryPool *mp);
77+
78+
// dtor
79+
~CXformSplitWindowFunc() override = default;
80+
81+
// ident accessors
82+
EXformId
83+
Exfid() const override
84+
{
85+
return ExfSplitWindowFunc;
86+
}
87+
88+
// return a string for xform name
89+
const CHAR *
90+
SzId() const override
91+
{
92+
return "CXformSplitWindowFunc";
93+
}
94+
95+
// Compatibility function for splitting limit
96+
BOOL
97+
FCompatible(CXform::EXformId exfid) override
98+
{
99+
return (CXform::ExfSplitWindowFunc != exfid);
100+
}
101+
102+
// compute xform promise for a given expression handle
103+
EXformPromise Exfp(CExpressionHandle &exprhdl) const override;
104+
105+
// actual transform
106+
void Transform(CXformContext *pxfctxt, CXformResult *pxfres,
107+
CExpression *pexpr) const override;
108+
109+
}; // class CXformSplitWindowFunc
110+
111+
} // namespace gpopt
112+
113+
#endif // !GPOPT_CXformSplitWindowFunc_H
114+
115+
// EOF

0 commit comments

Comments
 (0)