Skip to content

Commit 0af1749

Browse files
committed
Refactor analyze for PAX
Refactor analyze logic for PAX like AOCO.
1 parent acec354 commit 0af1749

File tree

6 files changed

+147
-32
lines changed

6 files changed

+147
-32
lines changed

contrib/pax_storage/src/cpp/access/pax_access_handle.cc

Lines changed: 123 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
#include "storage/paxc_smgr.h"
5050
#include "storage/wal/pax_wal.h"
5151
#include "storage/wal/paxc_wal.h"
52+
#include "storage/pax_itemptr.h"
5253

5354
#define NOT_IMPLEMENTED_YET \
5455
ereport(ERROR, \
@@ -64,6 +65,111 @@
6465

6566
// access methods that are implemented in C++
6667
namespace pax {
68+
struct AnalyzeBlockItem {
69+
int block;
70+
int row_count;
71+
int64 start_sample_block;
72+
int64 end_sample_block;
73+
};
74+
75+
static std::vector<AnalyzeBlockItem> extract_micro_partitions(Relation rel, Snapshot snapshot, int64 *totalrows) {
76+
auto iter = pax::MicroPartitionIterator::New(rel, snapshot);
77+
std::vector<AnalyzeBlockItem> analyze_items;
78+
int64 ntuples = 0;
79+
while (iter->HasNext()) {
80+
auto mp = iter->Next();
81+
AnalyzeBlockItem item;
82+
item.block = mp.GetMicroPartitionId();
83+
item.row_count = mp.GetTupleCount();
84+
Assert(item.row_count > 0);
85+
86+
ntuples += item.row_count;
87+
analyze_items.emplace_back(item);
88+
}
89+
iter->Release();
90+
*totalrows = ntuples;
91+
92+
std::sort(analyze_items.begin(), analyze_items.end(),
93+
[](const AnalyzeBlockItem &a, const AnalyzeBlockItem &b) {
94+
return a.block < b.block;
95+
});
96+
return analyze_items;
97+
}
98+
99+
static std::vector<AnalyzeBlockItem>
100+
extract_sample_items(Relation rel, Snapshot snapshot, int64 *totalrows) {
101+
std::vector<AnalyzeBlockItem> analyze_items;
102+
analyze_items = extract_micro_partitions(rel, snapshot, totalrows);
103+
104+
int64 row_index = 0;
105+
for (size_t i = 0; i < analyze_items.size(); i++) {
106+
auto &item = analyze_items[i];
107+
item.start_sample_block = row_index;
108+
item.end_sample_block = row_index + item.row_count;
109+
row_index = item.end_sample_block;
110+
}
111+
112+
return analyze_items;
113+
}
114+
115+
static int pax_acquire_sample_rows(Relation onerel, Snapshot snapshot,
116+
HeapTuple *rows, int targrows,
117+
double *totalrows, double *totaldeadrows) {
118+
std::vector<AnalyzeBlockItem> analyze_items;
119+
int64 ntuples = 0;
120+
analyze_items = extract_sample_items(onerel, snapshot, &ntuples);
121+
122+
TupleTableSlot *slot = cbdb::MakeSingleTupleTableSlot(
123+
RelationGetDescr(onerel), table_slot_callbacks(onerel));
124+
125+
// start sample rows
126+
RowSamplerData rs;
127+
size_t analyze_item_index = 0;
128+
int numrows = 0;
129+
double liverows = 0;
130+
double deadrows = 0;
131+
132+
PaxIndexScanDesc desc(onerel);
133+
RowSampler_Init(&rs, ntuples, targrows, random());
134+
while (RowSampler_HasMore(&rs)) {
135+
int64 sample_row = RowSampler_Next(&rs);
136+
cbdb::VacuumDelayPoint();
137+
138+
// seek to the corresponding analyze item
139+
while (analyze_item_index < analyze_items.size() &&
140+
sample_row >= analyze_items[analyze_item_index].end_sample_block) {
141+
analyze_item_index++;
142+
}
143+
if (analyze_item_index == analyze_items.size()) {
144+
break;
145+
}
146+
147+
const auto &item = analyze_items[analyze_item_index];
148+
Assert(sample_row >= item.start_sample_block &&
149+
sample_row < item.end_sample_block);
150+
Assert(sample_row - item.start_sample_block < item.row_count);
151+
152+
int offset = static_cast<int>(sample_row - item.start_sample_block);
153+
154+
ItemPointerData ctid = pax::MakeCTID(item.block, offset);
155+
156+
bool ok = desc.FetchTuple(&ctid, snapshot, slot, nullptr, nullptr);
157+
if (ok) {
158+
liverows += 1;
159+
rows[numrows++] = cbdb::ExecCopyHeapTuple(slot);
160+
} else {
161+
// dead rows
162+
deadrows += 1;
163+
}
164+
cbdb::ExecClearTuple(slot);
165+
}
166+
*totaldeadrows = deadrows / rs.m * (double) ntuples;
167+
*totalrows = ntuples - *totaldeadrows;
168+
desc.Release();
169+
cbdb::ExecDropSingleTupleTableSlot(slot);
170+
171+
return numrows;
172+
}
67173

68174
TableScanDesc CCPaxAccessMethod::ScanBegin(Relation relation, Snapshot snapshot,
69175
int nkeys, struct ScanKeyData *key,
@@ -285,33 +391,34 @@ TM_Result CCPaxAccessMethod::TupleUpdate(Relation relation, ItemPointer otid,
285391
pg_unreachable();
286392
}
287393

288-
bool CCPaxAccessMethod::ScanAnalyzeNextBlock(TableScanDesc scan,
289-
BlockNumber blockno,
290-
BufferAccessStrategy bstrategy) {
394+
int CCPaxAccessMethod::AcquireSampleRows(Relation onerel, int elevel, HeapTuple *rows,
395+
int targrows, double *totalrows, double *totaldeadrows) {
396+
auto snapshot = GetCatalogSnapshot(InvalidOid);
291397
CBDB_TRY();
292398
{
293-
auto desc = PaxScanDesc::ToDesc(scan);
294-
return desc->ScanAnalyzeNextBlock(blockno, bstrategy);
399+
return pax_acquire_sample_rows(onerel, snapshot, rows, targrows,
400+
totalrows, totaldeadrows);
295401
}
296402
CBDB_CATCH_DEFAULT();
297-
CBDB_FINALLY({});
298403
CBDB_END_TRY();
299404
pg_unreachable();
300405
}
301406

407+
bool CCPaxAccessMethod::ScanAnalyzeNextBlock(TableScanDesc scan,
408+
BlockNumber blockno,
409+
BufferAccessStrategy bstrategy) {
410+
ereport(ERROR,
411+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
412+
errmsg("analyze next block is not supported on pax relations")));
413+
}
414+
302415
bool CCPaxAccessMethod::ScanAnalyzeNextTuple(TableScanDesc scan,
303416
TransactionId oldest_xmin,
304417
double *liverows, double *deadrows,
305418
TupleTableSlot *slot) {
306-
CBDB_TRY();
307-
{
308-
auto desc = PaxScanDesc::ToDesc(scan);
309-
return desc->ScanAnalyzeNextTuple(oldest_xmin, liverows, deadrows, slot);
310-
}
311-
CBDB_CATCH_DEFAULT();
312-
CBDB_FINALLY({});
313-
CBDB_END_TRY();
314-
pg_unreachable();
419+
ereport(ERROR,
420+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
421+
errmsg("analyze next tuple is not supported on pax relations")));
315422
}
316423

317424
bool CCPaxAccessMethod::ScanBitmapNextBlock(TableScanDesc scan,
@@ -770,6 +877,7 @@ static const TableAmRoutine kPaxColumnMethods = {
770877
.relation_vacuum = paxc::PaxAccessMethod::RelationVacuum,
771878
.scan_analyze_next_block = pax::CCPaxAccessMethod::ScanAnalyzeNextBlock,
772879
.scan_analyze_next_tuple = pax::CCPaxAccessMethod::ScanAnalyzeNextTuple,
880+
.relation_acquire_sample_rows = pax::CCPaxAccessMethod::AcquireSampleRows,
773881
.index_build_range_scan = paxc::PaxAccessMethod::IndexBuildRangeScan,
774882
.index_validate_scan = paxc::PaxAccessMethod::IndexValidateScan,
775883

contrib/pax_storage/src/cpp/access/pax_access_handle.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ class CCPaxAccessMethod final {
171171

172172
static void RelationNontransactionalTruncate(Relation rel);
173173

174+
static int AcquireSampleRows(Relation onerel, int elevel, HeapTuple *rows,
175+
int targrows, double *totalrows, double *totaldeadrows);
174176
static bool ScanAnalyzeNextBlock(TableScanDesc scan, BlockNumber blockno,
175177
BufferAccessStrategy bstrategy);
176178
static bool ScanAnalyzeNextTuple(TableScanDesc scan,

contrib/pax_storage/src/cpp/comm/cbdb_api.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ extern "C" {
7979
#include "commands/defrem.h"
8080
#include "commands/progress.h"
8181
#include "commands/tablecmds.h"
82+
#include "commands/vacuum.h"
8283
#include "funcapi.h"
8384
#include "miscadmin.h"
8485
#include "nodes/bitmapset.h"
@@ -135,6 +136,7 @@ extern "C" {
135136
#include "utils/memutils.h"
136137
#include "utils/numeric.h"
137138
#include "utils/relcache.h"
139+
#include "utils/sampling.h"
138140
#include "utils/snapshot.h"
139141
#include "utils/spccache.h"
140142
#include "utils/syscache.h"

contrib/pax_storage/src/cpp/comm/cbdb_wrappers.cc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,12 @@ TupleTableSlot *cbdb::MakeSingleTupleTableSlot(
611611
CBDB_WRAP_END;
612612
}
613613

614+
HeapTuple cbdb::ExecCopyHeapTuple(TupleTableSlot *slot) {
615+
CBDB_WRAP_START;
616+
{ return ::ExecCopySlotHeapTuple(slot); }
617+
CBDB_WRAP_END;
618+
}
619+
614620
void cbdb::SlotGetAllAttrs(TupleTableSlot *slot) {
615621
CBDB_WRAP_START;
616622
{ ::slot_getallattrs(slot); }
@@ -628,3 +634,9 @@ void cbdb::ExecStoreVirtualTuple(TupleTableSlot *slot) {
628634
{ ::ExecStoreVirtualTuple(slot); }
629635
CBDB_WRAP_END;
630636
}
637+
638+
void cbdb::VacuumDelayPoint() {
639+
CBDB_WRAP_START;
640+
{ vacuum_delay_point(); }
641+
CBDB_WRAP_END;
642+
}

contrib/pax_storage/src/cpp/comm/cbdb_wrappers.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,13 +285,16 @@ bool NeedWAL(Relation rel);
285285
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot);
286286
TupleTableSlot *MakeSingleTupleTableSlot(TupleDesc tupdesc,
287287
const TupleTableSlotOps *tts_ops);
288+
HeapTuple ExecCopyHeapTuple(TupleTableSlot *slot);
289+
288290

289291
void SlotGetAllAttrs(TupleTableSlot *slot);
290292

291293
void ExecClearTuple(TupleTableSlot *slot);
292294

293295
void ExecStoreVirtualTuple(TupleTableSlot *slot);
294296

297+
void VacuumDelayPoint();
295298
} // namespace cbdb
296299

297300
// clang-format off

src/backend/commands/analyze.c

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1728,24 +1728,12 @@ acquire_sample_rows(Relation onerel, int elevel,
17281728
* the relation should not be an AO/CO table.
17291729
*/
17301730
Assert(!RelationIsAppendOptimized(onerel));
1731-
if (RelationIsPax(onerel))
1732-
{
1733-
/* PAX use non-fixed block layout */
1734-
BlockNumber pages;
1735-
double tuples;
1736-
double allvisfrac;
1737-
int32 attr_widths;
1738-
1739-
table_relation_estimate_size(onerel, &attr_widths, &pages,
1740-
&tuples, &allvisfrac);
1741-
1742-
if (tuples > UINT_MAX)
1743-
tuples = UINT_MAX;
1731+
/*
1732+
* PAX uses table_relation_acquire_sample_rows() as well.
1733+
*/
1734+
Assert(!RelationIsPax(onerel));
17441735

1745-
totalblocks = (BlockNumber)tuples;
1746-
}
1747-
else
1748-
totalblocks = RelationGetNumberOfBlocks(onerel);
1736+
totalblocks = RelationGetNumberOfBlocks(onerel);
17491737

17501738
/* Need a cutoff xmin for HeapTupleSatisfiesVacuum */
17511739
OldestXmin = GetOldestNonRemovableTransactionId(onerel);

0 commit comments

Comments
 (0)