-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Index backfill optimization to only read columns present in index definition #29928
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
b39fb98
8be0002
b40157a
fb424ac
a8937a1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3340,6 +3340,65 @@ YbApplyMergeSortKeys(YbScanDesc ybScan, Scan *pg_scan_plan) | |
| HandleYBStatus(YBCPgDmlSetMergeSortKeys(stmt, sort_info->numCols, yb_sort_keys)); | ||
| } | ||
|
|
||
| /* | ||
| * Helper function to allocate and initialize basic scan descriptor fields. | ||
| * This is shared between ybcBeginScan and ybc_heap_beginscan_for_index_build. | ||
| */ | ||
| static YbScanDesc | ||
| ybcAllocScanDesc(Relation relation, int nkeys, ScanKey keys) | ||
| { | ||
| YbScanDesc ybScan = (YbScanDesc) palloc0(sizeof(YbScanDescData)); | ||
| TableScanDesc tsdesc = (TableScanDesc) ybScan; | ||
|
|
||
| tsdesc->rs_rd = relation; | ||
| tsdesc->rs_key = keys; | ||
| tsdesc->rs_nkeys = nkeys; | ||
|
|
||
| return ybScan; | ||
| } | ||
|
|
||
| /* | ||
| * Helper function to extract scan keys and check for unsatisfiable conditions. | ||
| * Returns true if scan keys are valid and scan should continue, | ||
| * false if scan should quit early (sets quit_scan = true). | ||
| * This is shared between ybcBeginScan and ybc_heap_beginscan_for_index_build. | ||
| */ | ||
| static bool | ||
| ybcExtractAndCheckScanKeysValid(YbScanDesc ybScan, int nkeys, ScanKey keys) | ||
| { | ||
| /* Flatten keys and store the results in ybScan */ | ||
| ybExtractScanKeys(keys, nkeys, ybScan); | ||
|
|
||
| if (YbIsUnsatisfiableCondition(ybScan->nkeys, ybScan->keys)) | ||
| { | ||
| ybScan->quit_scan = true; | ||
| return false; | ||
| } | ||
| return true; | ||
| } | ||
|
|
||
| /* | ||
| * Helper function to bind scan keys and hash keys. | ||
| * Returns true if binding succeeded, false if scan should quit. | ||
| * On failure, frees scan_plan bitmapsets and sets quit_scan = true. | ||
| * This is shared between ybcBeginScan and ybc_heap_beginscan_for_index_build. | ||
| */ | ||
| static bool | ||
| ybcBindKeysValid(YbScanDesc ybScan, YbScanPlanData *scan_plan, | ||
| Scan *pg_scan_plan, bool is_for_precheck) | ||
| { | ||
| if (!YbBindScanKeys(ybScan, scan_plan, pg_scan_plan, is_for_precheck) || | ||
| !YbBindHashKeys(ybScan)) | ||
| { | ||
| ybScan->quit_scan = true; | ||
| bms_free(scan_plan->hash_key); | ||
| bms_free(scan_plan->primary_key); | ||
| bms_free(scan_plan->sk_cols); | ||
| return false; | ||
| } | ||
| return true; | ||
| } | ||
|
|
||
| /* | ||
| * Begin a scan for | ||
| * SELECT <Targets> FROM <relation> USING <index> WHERE <Binds> | ||
|
|
@@ -3379,21 +3438,11 @@ ybcBeginScan(Relation relation, | |
| bool fetch_ybctids_only) | ||
| { | ||
| /* Set up Yugabyte scan description */ | ||
| YbScanDesc ybScan = (YbScanDesc) palloc0(sizeof(YbScanDescData)); | ||
| TableScanDesc tsdesc = (TableScanDesc) ybScan; | ||
| YbScanDesc ybScan = ybcAllocScanDesc(relation, nkeys, keys); | ||
|
|
||
| tsdesc->rs_rd = relation; | ||
| tsdesc->rs_key = keys; | ||
| tsdesc->rs_nkeys = nkeys; | ||
|
|
||
| /* Flatten keys and store the results in ybScan. */ | ||
| ybExtractScanKeys(keys, nkeys, ybScan); | ||
|
|
||
| if (YbIsUnsatisfiableCondition(ybScan->nkeys, ybScan->keys)) | ||
| { | ||
| ybScan->quit_scan = true; | ||
| if (!ybcExtractAndCheckScanKeysValid(ybScan, nkeys, keys)) | ||
| return ybScan; | ||
| } | ||
|
|
||
| ybScan->exec_params = exec_params; | ||
| ybScan->index = index; | ||
| ybScan->quit_scan = false; | ||
|
|
@@ -3408,16 +3457,9 @@ ybcBeginScan(Relation relation, | |
| ybScan->handle = YbNewSelect(relation, &ybScan->prepare_params); | ||
|
|
||
| /* Set up binds */ | ||
| if (!YbBindScanKeys(ybScan, &scan_plan, pg_scan_plan, | ||
| false /* is_for_precheck */ ) || | ||
| !YbBindHashKeys(ybScan)) | ||
| { | ||
| ybScan->quit_scan = true; | ||
| bms_free(scan_plan.hash_key); | ||
| bms_free(scan_plan.primary_key); | ||
| bms_free(scan_plan.sk_cols); | ||
| if (!ybcBindKeysValid(ybScan, &scan_plan, pg_scan_plan, | ||
| false /* is_for_precheck */)) | ||
| return ybScan; | ||
| } | ||
|
|
||
| /* | ||
| * Set up targets. There are two separate cases: | ||
|
|
@@ -3820,6 +3862,132 @@ ybc_heap_endscan(TableScanDesc tsdesc) | |
| ybc_free_ybscan(ybdesc); | ||
| } | ||
|
|
||
|
|
||
| /* | ||
| * ybc_heap_beginscan_for_index_build | ||
| * Begin a heap scan specifically for index build/backfill operations. | ||
| * | ||
| * Unlike the regular heap scan which fetches all columns, this function | ||
| * only requests the columns needed for the index: | ||
| * - Columns referenced in ii_IndexAttrNumbers (index key and non-key columns) | ||
| * - Columns referenced in ii_Expressions (expression index columns) | ||
| * - Columns referenced in ii_Predicate (partial index predicate) | ||
| * - ybctid (always needed for index entry construction) | ||
| * | ||
| * This optimization significantly reduces the amount of data read from | ||
| * DocDB during concurrent index creation (backfill), especially for tables | ||
| * with many columns where only a few are indexed. | ||
| */ | ||
| TableScanDesc | ||
| ybc_heap_beginscan_for_index_build(Relation relation, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this duplicates code from ybcBeginScan
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 to Jason's comment. It would be good to call into
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I refactored both functions to use some shared logic helpers in a8937a1. Let me know what you think. |
||
| Snapshot snapshot, | ||
| int nkeys, | ||
| ScanKey key, | ||
| uint32 flags, | ||
| IndexInfo *indexInfo) | ||
| { | ||
| YbScanDesc ybScan; | ||
| YbScanPlanData scan_plan; | ||
| TupleDesc tupdesc; | ||
| Bitmapset *required_attrs = NULL; | ||
| int i; | ||
| int idx; | ||
|
|
||
| /* Allocate and initialize scan descriptor */ | ||
| ybScan = ybcAllocScanDesc(relation, nkeys, key); | ||
| TableScanDesc tsdesc = (TableScanDesc) ybScan; | ||
|
|
||
| tsdesc->rs_snapshot = snapshot; | ||
| tsdesc->rs_flags = flags; | ||
|
|
||
| if (!ybcExtractAndCheckScanKeysValid(ybScan, nkeys, key)) | ||
| return tsdesc; | ||
|
|
||
| ybScan->index = NULL; | ||
| ybScan->quit_scan = false; | ||
| ybScan->prepare_params.fetch_ybctids_only = false; | ||
|
|
||
| /* Set up the scan plan */ | ||
| ybcSetupScanPlan(false /* xs_want_itup */, ybScan, &scan_plan); | ||
| ybcSetupScanKeys(ybScan, &scan_plan); | ||
|
|
||
| ybScan->handle = YbNewSelect(relation, &ybScan->prepare_params); | ||
|
|
||
| /* Bind scan keys */ | ||
| if (!ybcBindKeysValid(ybScan, &scan_plan, NULL /* pg_scan_plan */, | ||
| false /* is_for_precheck */)) | ||
| return tsdesc; | ||
|
|
||
| /* | ||
| * Build the set of required attribute numbers based on IndexInfo. | ||
| * Use FirstLowInvalidHeapAttributeNumber as the offset for system columns. | ||
| */ | ||
| tupdesc = RelationGetDescr(relation); | ||
|
|
||
| /* Always need ybctid for index entry construction */ | ||
| required_attrs = bms_add_member(required_attrs, | ||
| YBTupleIdAttributeNumber - | ||
| FirstLowInvalidHeapAttributeNumber); | ||
|
|
||
| /* Add columns directly referenced in the index */ | ||
| for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) | ||
| { | ||
| AttrNumber attnum = indexInfo->ii_IndexAttrNumbers[i]; | ||
|
|
||
| /* | ||
| * attnum == 0 means this is an expression index column, | ||
| * which will be handled by extracting vars from ii_Expressions. | ||
| */ | ||
| if (attnum > 0) | ||
| required_attrs = bms_add_member(required_attrs, | ||
| attnum - | ||
| FirstLowInvalidHeapAttributeNumber); | ||
| } | ||
|
|
||
| /* | ||
| * Add columns referenced in index expressions. | ||
| * Use varno=1 since this is always scanning the base relation. | ||
| */ | ||
| if (indexInfo->ii_Expressions != NIL) | ||
| pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &required_attrs); | ||
|
|
||
| /* | ||
| * Add columns referenced in partial index predicate. | ||
| */ | ||
| if (indexInfo->ii_Predicate != NIL) | ||
| pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &required_attrs); | ||
|
|
||
| /* | ||
| * Now set up targets based on required_attrs. | ||
| * Iterate through the bitmapset and add each required column. | ||
| */ | ||
| idx = -1; | ||
| while ((idx = bms_next_member(required_attrs, idx)) >= 0) | ||
| { | ||
| AttrNumber attnum = idx + FirstLowInvalidHeapAttributeNumber; | ||
|
|
||
| if (attnum > 0) | ||
| { | ||
| /* Regular column - verify it exists and is not dropped */ | ||
| if (attnum <= tupdesc->natts && | ||
| !TupleDescAttr(tupdesc, attnum - 1)->attisdropped) | ||
| YbDmlAppendTargetRegular(tupdesc, attnum, ybScan->handle); | ||
| } | ||
| else | ||
| { | ||
| /* System column (like ybctid) */ | ||
| YbDmlAppendTargetSystem(attnum, ybScan->handle); | ||
| } | ||
| } | ||
|
|
||
| bms_free(required_attrs); | ||
| bms_free(scan_plan.hash_key); | ||
| bms_free(scan_plan.primary_key); | ||
| bms_free(scan_plan.sk_cols); | ||
|
|
||
| return tsdesc; | ||
| } | ||
|
|
||
| /* --------------------------------------------------------------------------------------------- */ | ||
|
|
||
| /* | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -547,6 +547,13 @@ extern bool yb_enable_base_scans_cost_model; | |
| */ | ||
| extern bool yb_enable_update_reltuples_after_create_index; | ||
|
|
||
| /* | ||
| * Enables index backfill column projection optimization. | ||
| * If true, index build/backfill only reads columns needed for the index, | ||
| * rather than all columns from the base table. | ||
| */ | ||
| extern bool yb_enable_index_backfill_column_projection; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This flag can be defined as a GUC. The GUC can also be marked true, by default, for now.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added in b40157a |
||
|
|
||
| /* | ||
| * Total timeout for waiting for backends to have up-to-date catalog version. | ||
| */ | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This logic was copied from
tableam.h#table_beginscan_strat