Skip to content

Commit dee122f

Browse files
Copilotybtsdst
andauthored
docs: document FE index creation flow for storage-compute separation (存算分离) architecture (#1)
* Initial plan * docs: add FE index creation flow documentation for cloud (存算分离) architecture - Add README.md in cloud/alter explaining the complete FE processing flow for CREATE INDEX in storage-compute separation mode, including: - Key class descriptions - Two execution paths (light index change vs full schema change job) - Core differences between cloud and local modes - Complete Mermaid sequence diagram - Add class-level Javadoc to CloudSchemaChangeJobV2 describing the MetaService interaction lifecycle for index creation - Add Javadoc to key override methods in CloudSchemaChangeJobV2 - Add class-level Javadoc to CloudSchemaChangeHandler - Fix logger in CloudSchemaChangeJobV2 to use CloudSchemaChangeJobV2.class Co-authored-by: ybtsdst <3139535+ybtsdst@users.noreply.github.com> * revert: restore original logger in CloudSchemaChangeJobV2 (keep Javadoc only) Co-authored-by: ybtsdst <3139535+ybtsdst@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: ybtsdst <3139535+ybtsdst@users.noreply.github.com>
1 parent c432733 commit dee122f

File tree

3 files changed

+454
-0
lines changed

3 files changed

+454
-0
lines changed

fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,37 @@
5454
import java.util.Set;
5555
import java.util.stream.Collectors;
5656

57+
/**
58+
* Schema Change Job implementation for the storage-compute separation (cloud) architecture.
59+
*
60+
* <p>In storage-compute separation mode, tablet metadata is managed by the MetaService rather
61+
* than directly on BE local disk. This class overrides the key lifecycle methods of
62+
* {@link SchemaChangeJobV2} to interact with the MetaService via RPC calls through
63+
* {@link CloudInternalCatalog} instead of sending tasks directly to BEs.
64+
*
65+
* <p>The index creation flow in storage-compute separation mode:
66+
* <ol>
67+
* <li>{@link #createShadowIndexReplica()} - calls MetaService to prepare and create shadow
68+
* tablets for each partition via {@code prepareMaterializedIndex} and
69+
* {@code sendCreateTabletsRpc}.</li>
70+
* <li>BE executes ALTER tasks to physically rewrite data files with the new index.</li>
71+
* <li>{@link #commitShadowIndex()} - calls MetaService {@code commitMaterializedIndex}
72+
* to atomically promote the shadow index to a visible index.</li>
73+
* <li>{@link #postProcessOriginIndex()} - calls MetaService {@code dropMaterializedIndex}
74+
* to drop the old index and free cloud storage space.</li>
75+
* </ol>
76+
*
77+
* <p>On cancellation, {@link #onCancel()} calls MetaService to remove the shadow index
78+
* and clean up any partially created SchemaChangeJob records.
79+
*/
5780
public class CloudSchemaChangeJobV2 extends SchemaChangeJobV2 {
5881
private static final Logger LOG = LogManager.getLogger(SchemaChangeJobV2.class);
5982

83+
/**
84+
* Creates a new CloudSchemaChangeJobV2 and binds it to the current compute group (cloud cluster).
85+
* The compute group name is captured from {@link ConnectContext} at creation time and used later
86+
* by {@link #ensureCloudClusterExist(List)} to verify that the cluster is still available.
87+
*/
6088
public CloudSchemaChangeJobV2(String rawSql, long jobId, long dbId, long tableId,
6189
String tableName, long timeoutMs) {
6290
super(rawSql, jobId, dbId, tableId, tableName, timeoutMs);
@@ -80,6 +108,9 @@ private CloudSchemaChangeJobV2() {}
80108

81109
@Override
82110
protected void commitShadowIndex() throws AlterCancelException {
111+
// In storage-compute separation mode, the shadow index promotion is done by notifying
112+
// MetaService via commitMaterializedIndex RPC. MetaService atomically switches the
113+
// shadow index to a visible (committed) state so that subsequent queries can use it.
83114
List<Long> shadowIdxList =
84115
indexIdMap.keySet().stream().collect(Collectors.toList());
85116
try {
@@ -100,6 +131,10 @@ protected void onCancel() {
100131
return;
101132
}
102133

134+
// In storage-compute separation mode, cancellation requires two steps:
135+
// 1. Drop the shadow index tablets from MetaService (dropMaterializedIndex RPC).
136+
// 2. Remove each SchemaChangeJob record from MetaService for every
137+
// (partition, originTablet, shadowTablet) combination (removeSchemaChangeJob RPC).
103138
List<Long> shadowIdxList = indexIdMap.keySet().stream().collect(Collectors.toList());
104139
dropIndex(shadowIdxList);
105140

@@ -141,10 +176,16 @@ protected void postProcessOriginIndex() {
141176
return;
142177
}
143178

179+
// After the shadow index has been committed, drop the original index from MetaService
180+
// to free up cloud storage space occupied by the old index data.
144181
List<Long> originIdxList = indexIdMap.values().stream().collect(Collectors.toList());
145182
dropIndex(originIdxList);
146183
}
147184

185+
/**
186+
* Drops the given index list from MetaService with retry logic.
187+
* Used for both cancellation (dropping shadow indexes) and post-processing (dropping origin indexes).
188+
*/
148189
private void dropIndex(List<Long> idxList) {
149190
int tryTimes = 1;
150191
while (true) {
@@ -164,6 +205,21 @@ private void dropIndex(List<Long> idxList) {
164205
dbId, tableId, jobId, idxList);
165206
}
166207

208+
/**
209+
* Creates shadow index replicas in storage-compute separation mode.
210+
*
211+
* <p>Unlike the local mode which directly creates tablet replicas on BE nodes,
212+
* this method:
213+
* <ol>
214+
* <li>Calls {@code prepareMaterializedIndex} RPC to reserve the shadow index slot
215+
* in MetaService with an expiration timestamp.</li>
216+
* <li>Builds {@code TabletMetaCloudPB} for each shadow tablet in each partition and
217+
* sends them to MetaService via {@code sendCreateTabletsRpc} to persist the
218+
* tablet metadata in cloud storage.</li>
219+
* <li>Adds the shadow indexes to the FE in-memory catalog so that BE nodes can
220+
* discover them when processing the ALTER tasks.</li>
221+
* </ol>
222+
*/
167223
@Override
168224
protected void createShadowIndexReplica() throws AlterCancelException {
169225
Database db = Env.getCurrentInternalCatalog()

fe/fe-core/src/main/java/org/apache/doris/cloud/alter/CloudSchemaChangeHandler.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,24 @@
5050
import java.util.Set;
5151
import java.util.stream.Collectors;
5252

53+
/**
54+
* Schema Change handler for the storage-compute separation (cloud) architecture.
55+
*
56+
* <p>This class extends {@link SchemaChangeHandler} and overrides methods that need
57+
* to interact with cloud infrastructure (MetaService) rather than managing tablet
58+
* replicas directly on BE local disks.
59+
*
60+
* <p>In storage-compute separation mode, tablet metadata (including index schema) is
61+
* managed by the MetaService. When creating an index (e.g. {@code ALTER TABLE ... ADD INDEX}),
62+
* the handler delegates to {@link CloudSchemaChangeJobV2} which communicates with
63+
* MetaService via {@link org.apache.doris.cloud.datasource.CloudInternalCatalog} RPCs.
64+
*
65+
* <p>This handler is activated when {@code Config.isCloudMode()} returns {@code true}.
66+
* It is instantiated by {@link org.apache.doris.alter.Alter} at startup.
67+
*
68+
* @see CloudSchemaChangeJobV2
69+
* @see org.apache.doris.alter.SchemaChangeHandler
70+
*/
5371
public class CloudSchemaChangeHandler extends SchemaChangeHandler {
5472
private static final Logger LOG = LogManager.getLogger(CloudSchemaChangeHandler.class);
5573

0 commit comments

Comments
 (0)