Skip to content

Commit eb21fb6

Browse files
authored
Merge pull request #1182 from timeplus-io/backport/storageview-perf-and-clone
StorageView isStreamingQuery cache, view-chain race fix, and join changelog fix for nested aggregation
2 parents 7bc440d + 483b326 commit eb21fb6

12 files changed

Lines changed: 330 additions & 44 deletions

src/Interpreters/Context.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4512,10 +4512,10 @@ cluster::TimerService & Context::getTimerService() const
45124512
return *shared->global_system_timer;
45134513
}
45144514

4515-
Context::DataStreamSemanticCache & Context::getDataStreamSemanticCache() const
4515+
Context::QueryAnalysisCache & Context::getQueryAnalysisCache() const
45164516
{
4517-
assert(hasQueryContext());
4518-
return getQueryContext()->data_stream_semantic_cache;
4517+
chassert(hasQueryContext());
4518+
return getQueryContext()->query_analysis_cache;
45194519
}
45204520

45214521
/// proton: ends.

src/Interpreters/Context.h

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -515,8 +515,17 @@ class ContextData
515515
/// proton: starts. Parameters for time predicates of main table
516516
TimeParam time_param;
517517

518-
using DataStreamSemanticCache = std::unordered_map<std::string, Streaming::DataStreamSemanticEx>;
519-
mutable DataStreamSemanticCache data_stream_semantic_cache;
518+
/// Query-scoped cache for repeated analysis results (data-stream semantic, is-streaming, …).
519+
/// Avoids O(N^2) analyzer recursion on N-deep view chains (issue #11203) and duplicate
520+
/// InterpreterSelectWithUnionQuery builds during getSampleBlock. Per-query lifetime, no
521+
/// invalidation needed. Keyed by query-string or storage-identity depending on caller.
522+
struct QueryAnalysisCacheEntry
523+
{
524+
std::optional<bool> is_streaming;
525+
std::optional<Streaming::DataStreamSemanticEx> data_stream_semantic;
526+
};
527+
using QueryAnalysisCache = std::unordered_map<std::string, QueryAnalysisCacheEntry>;
528+
mutable QueryAnalysisCache query_analysis_cache;
520529
/// proton: end.
521530

522531
/// Use copy constructor or createGlobal() instead
@@ -799,8 +808,8 @@ class Context: public ContextData, public std::enable_shared_from_this<Context>
799808

800809
const std::set<RequiredColumnTuple> & requiredColumns() const { return required_columns; }
801810
void addRequiredColumns(RequiredColumnTuple && column_tuple) { required_columns.insert(std::move(column_tuple)); }
802-
/// Get data stream semantic for subquery
803-
DataStreamSemanticCache & getDataStreamSemanticCache() const;
811+
/// Query-scoped analysis cache (data-stream semantic, is-streaming, …). Issue #11203.
812+
QueryAnalysisCache & getQueryAnalysisCache() const;
804813
StoragePtr getTableFunctionResults(const String & key) const;
805814
void setTableFunctionResults(const String & key, const StoragePtr & table);
806815
/// proton: ends
@@ -1134,6 +1143,10 @@ class Context: public ContextData, public std::enable_shared_from_this<Context>
11341143

11351144
bool isQueryFromMaterializedView() const { return is_query_from_materialized_view; }
11361145
void setQueryFromMaterializedView(bool is_query_from_materialized_view_) { is_query_from_materialized_view = is_query_from_materialized_view_; }
1146+
1147+
/// True when `query_mode == "table"` — the query is a bounded/historical read.
1148+
/// `ProxyStream` pushes this setting when resolving `table(...)` table functions.
1149+
bool isHistoricalQueryMode() const { return getSettingsRef().query_mode.value == "table"; }
11371150
/// proton: ends.
11381151

11391152
ActionLocksManagerPtr getActionLocksManager();

src/Interpreters/InterpreterSelectWithUnionQuery.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -274,12 +274,12 @@ Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_,
274274
/// Using query string because query_ptr changes for every internal SELECT
275275
auto key = queryToString(query_ptr_);
276276

277-
auto & data_stream_semantic_cache = context_->getDataStreamSemanticCache();
277+
auto & analysis_cache = context_->getQueryAnalysisCache();
278278
if (output_data_stream_semantic)
279279
{
280-
auto semantic_iter = data_stream_semantic_cache.find(key);
281-
if (semantic_iter != data_stream_semantic_cache.end())
282-
*output_data_stream_semantic = semantic_iter->second;
280+
auto it = analysis_cache.find(key);
281+
if (it != analysis_cache.end() && it->second.data_stream_semantic.has_value())
282+
*output_data_stream_semantic = *it->second.data_stream_semantic;
283283
}
284284

285285
auto & cache = context_->getSampleBlockCache();
@@ -301,7 +301,7 @@ Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_,
301301
InterpreterSelectWithUnionQuery interpreter(query_ptr_, context_, std::move(select_options));
302302

303303
auto data_stream_semantic = interpreter.getDataStreamSemantic();
304-
data_stream_semantic_cache[key] = data_stream_semantic;
304+
analysis_cache[key].data_stream_semantic = data_stream_semantic;
305305
if (output_data_stream_semantic)
306306
*output_data_stream_semantic = data_stream_semantic;
307307

src/Interpreters/Streaming/CalculateDataStreamSemantic.cpp

Lines changed: 48 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,27 @@ namespace Streaming
99
{
1010
namespace
1111
{
12+
enum class JoinChangelogMode
13+
{
14+
None,
15+
Try,
16+
Force,
17+
};
18+
1219
DataStreamSemanticEx calculateDataStreamSemanticForJoin(
1320
DataStreamSemanticEx left_input_data_stream_semantic,
1421
DataStreamSemanticEx right_input_data_stream_semantic,
1522
std::pair<JoinKind, JoinStrictness> kind_and_strictness,
1623
SelectQueryInfo & query_info,
17-
bool allow_emit_changelog_join_result)
24+
JoinChangelogMode join_changelog_mode)
1825
{
1926
assert(left_input_data_stream_semantic.streaming);
2027

21-
/// Speical handling
28+
/// Special handling
2229
/// 1) for <stream> join <table>, the right inputs don't support changelog semantic
2330
if (!right_input_data_stream_semantic.streaming)
2431
{
25-
/// Left stream semantic (Keep the original semantic)
32+
/// Left stream semantic
2633
query_info.left_input_tracking_changes = isChangelogDataStream(left_input_data_stream_semantic);
2734

2835
/// Right stream semantic (Append or VersionedKV or ChangelogKV)
@@ -36,14 +43,16 @@ DataStreamSemanticEx calculateDataStreamSemanticForJoin(
3643
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "The filled join data doesn't support changelog processing");
3744
}
3845

39-
if (query_info.force_emit_changelog && !isChangelogDataStream(left_input_data_stream_semantic))
46+
/// Try/Force emitting changelog
47+
if (join_changelog_mode != JoinChangelogMode::None && !isChangelogDataStream(left_input_data_stream_semantic))
4048
{
4149
if (canTrackChangesFromInput(left_input_data_stream_semantic))
4250
{
4351
query_info.left_input_tracking_changes = true;
4452
return DataStreamSemantic::Changelog;
4553
}
46-
else
54+
55+
if (join_changelog_mode == JoinChangelogMode::Force)
4756
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for emit changelog from append stream join table result");
4857
}
4958

@@ -72,7 +81,20 @@ DataStreamSemanticEx calculateDataStreamSemanticForJoin(
7281
kind_and_strictness.first,
7382
kind_and_strictness.second);
7483

75-
return DataStreamSemantic::Append;
84+
/// Try/Force emitting changelog
85+
if (join_changelog_mode != JoinChangelogMode::None && !isChangelogDataStream(left_input_data_stream_semantic))
86+
{
87+
if (canTrackChangesFromInput(left_input_data_stream_semantic))
88+
{
89+
query_info.left_input_tracking_changes = true;
90+
return DataStreamSemantic::Changelog;
91+
}
92+
93+
if (join_changelog_mode == JoinChangelogMode::Force)
94+
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for emit changelog from asof/any/cross join results");
95+
}
96+
97+
return left_input_data_stream_semantic;
7698
}
7799

78100
/// Left stream semantic
@@ -99,19 +121,21 @@ DataStreamSemanticEx calculateDataStreamSemanticForJoin(
99121
right_input_data_stream_semantic = DataStreamSemantic::Append;
100122
}
101123

102-
if (isJoinResultChangelog(left_input_data_stream_semantic, right_input_data_stream_semantic))
124+
bool join_result_is_changelog = isJoinResultChangelog(left_input_data_stream_semantic, right_input_data_stream_semantic);
125+
126+
/// Try/Force emitting changelog
127+
if (join_changelog_mode != JoinChangelogMode::None)
103128
{
104-
if (allow_emit_changelog_join_result)
129+
if (join_result_is_changelog)
105130
return DataStreamSemantic::Changelog;
106-
else
107-
/// NOTE: If the current layer doesn't need joined result emit changelog,
108-
/// we shall emit append-only stream with mutable semantic(MutableStream).
109-
/// In this way, the outer layer can still track changes if needed.
110-
/// For example: select count() from (select * from kv join kv2 on kv.key = kv2.key)
111-
return DataStreamSemanticEx{StorageSemantic::NativeKV};
131+
132+
if (join_changelog_mode == JoinChangelogMode::Force)
133+
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for emit changelog from non-changelog join results");
112134
}
113-
else
114-
return DataStreamSemantic::Append;
135+
136+
/// When not emitting changelog, preserve NativeKV semantic so the outer layer can still track changes if needed.
137+
/// e.g. select count() from (select * from kv join kv2 on kv.key = kv2.key)
138+
return join_result_is_changelog ? DataStreamSemanticEx{StorageSemantic::NativeKV} : DataStreamSemantic::Append;
115139
}
116140
}
117141

@@ -146,7 +170,12 @@ DataStreamSemanticPair calculateDataStreamSemantic(
146170
DataStreamSemanticPair semantic_pair;
147171
/// By default, the joined result stream semantic always is append-only unless the current layer has aggregates
148172
/// or the query forces to emit changelog
149-
bool allow_emit_changelog_join_result = current_select_has_aggregates || query_info.force_emit_changelog;
173+
JoinChangelogMode join_changelog_mode = JoinChangelogMode::None;
174+
if (current_select_has_aggregates)
175+
join_changelog_mode = JoinChangelogMode::Try;
176+
else if (query_info.force_emit_changelog)
177+
join_changelog_mode = JoinChangelogMode::Force;
178+
150179
/// First, look at what the current layer does
151180

152181
/// When the current layer has join or aggregates, we calculate the output data semantic locally and its inputs data stream semantic.
@@ -161,7 +190,7 @@ DataStreamSemanticPair calculateDataStreamSemantic(
161190
*right_input_data_stream_semantic,
162191
*kind_and_strictness,
163192
query_info,
164-
allow_emit_changelog_join_result);
193+
join_changelog_mode);
165194
}
166195
else
167196
{
@@ -189,10 +218,7 @@ DataStreamSemanticPair calculateDataStreamSemantic(
189218
*right_input_data_stream_semantic,
190219
*kind_and_strictness,
191220
query_info,
192-
allow_emit_changelog_join_result);
193-
194-
if (query_info.force_emit_changelog && !Streaming::isChangelogDataStream(semantic_pair.effective_input_data_stream_semantic))
195-
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for emit changelog from non-changelog join results");
221+
join_changelog_mode);
196222

197223
semantic_pair.output_data_stream_semantic = semantic_pair.effective_input_data_stream_semantic;
198224
}

src/Storages/StorageView.cpp

Lines changed: 68 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -322,19 +322,74 @@ bool StorageView::isReady() const
322322

323323
bool StorageView::isStreamingQuery(ContextPtr query_context) const
324324
{
325-
auto select = getInMemoryMetadataPtr()->getSelectQuery().inner_query;
325+
/// proton: starts. Issue #11203 — query-scoped memoization so N-deep view chains don't
326+
/// force quadratic analyzer recursion during a single top-level CREATE VIEW / getSampleBlock.
327+
328+
/// Historical `table(view)` path: InterpreterSelectQuery::isStreamingQuery() force-returns
329+
/// false when query_mode == "table" regardless of storage. Short-circuit here so we skip
330+
/// both the interpreter rebuild and the cache probe on that path.
331+
if (query_context->isHistoricalQueryMode())
332+
return false;
333+
334+
const auto metadata_ptr = getInMemoryMetadataPtr();
335+
336+
/// Parameterized views: same storage object can analyze to different queries across
337+
/// invocations, so do not cache and keep the original behavior.
338+
if (is_parameterized_view)
339+
{
340+
auto local_ctx = Context::createCopy(query_context);
341+
local_ctx->setCollectRequiredColumns(false);
342+
auto select = metadata_ptr->getSelectQuery().inner_query->clone();
343+
return InterpreterSelectWithUnionQuery(
344+
select, local_ctx, SelectQueryOptions().createParameterizedView().noModify().analyze())
345+
.isStreamingQuery();
346+
}
347+
348+
const bool have_query_ctx = query_context->hasQueryContext();
349+
std::string cache_key;
350+
if (have_query_ctx)
351+
{
352+
const auto storage_id = getStorageID();
353+
/// Key = view identity + metadata revision proxy + effective query mode.
354+
/// StorageID carries database/table as a fallback when uuid is nil; the metadata
355+
/// pointer acts as a cheap revision token (within one query it is stable, and across
356+
/// metadata updates a fresh pointer forces a recompute even if we were long-lived —
357+
/// though this cache's lifetime is per-query, so that is belt-and-suspenders).
358+
cache_key = fmt::format(
359+
"{}.{}|{}|{}|{}",
360+
storage_id.database_name,
361+
storage_id.table_name,
362+
toString(storage_id.uuid),
363+
fmt::ptr(metadata_ptr.get()),
364+
query_context->getSettingsRef().query_mode.value);
365+
366+
auto & cache = query_context->getQueryAnalysisCache();
367+
if (auto it = cache.find(cache_key); it != cache.end() && it->second.is_streaming.has_value())
368+
return *it->second.is_streaming;
369+
}
370+
326371
auto local_ctx = Context::createCopy(query_context);
327372
local_ctx->setCollectRequiredColumns(false);
328373

329-
if (is_parameterized_view)
330-
return InterpreterSelectWithUnionQuery(select, local_ctx, SelectQueryOptions().createParameterizedView().noModify().analyze()).isStreamingQuery();
331-
else
332-
return InterpreterSelectWithUnionQuery(select, local_ctx, SelectQueryOptions().noModify().analyze()).isStreamingQuery();
374+
/// Analyzer mutates query_ptr in place, so concurrent analyses on the view's shared
375+
/// inner_query race and tear the AST; clone before handing it off.
376+
auto select = metadata_ptr->getSelectQuery().inner_query->clone();
377+
const bool result = InterpreterSelectWithUnionQuery(
378+
select, local_ctx, SelectQueryOptions().noModify().analyze())
379+
.isStreamingQuery();
380+
381+
if (have_query_ctx)
382+
{
383+
auto & cache = query_context->getQueryAnalysisCache();
384+
cache[std::move(cache_key)].is_streaming = result;
385+
}
386+
return result;
387+
/// proton: ends.
333388
}
334389

335390
bool StorageView::hasStreamingGlobalAggregation() const
336391
{
337-
auto select = getInMemoryMetadataPtr()->getSelectQuery().inner_query;
392+
auto select = getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone();
338393
auto ctx = Context::createCopy(local_context);
339394
ctx->setCollectRequiredColumns(false);
340395

@@ -346,20 +401,23 @@ bool StorageView::hasStreamingGlobalAggregation() const
346401

347402
Streaming::DataStreamSemanticEx StorageView::dataStreamSemantic() const
348403
{
404+
std::lock_guard lock(data_stream_semantic_mutex);
349405
if (data_stream_semantic_resolved)
350406
return data_stream_semantic;
351407

352-
auto select = getInMemoryMetadataPtr()->getSelectQuery().inner_query;
408+
auto select = getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone();
353409
auto ctx = Context::createCopy(local_context);
354410
ctx->setCollectRequiredColumns(false);
355411

356412
if (is_parameterized_view)
357-
data_stream_semantic = InterpreterSelectWithUnionQuery(select, ctx, SelectQueryOptions().createParameterizedView().noModify().analyze()).getDataStreamSemantic();
413+
data_stream_semantic
414+
= InterpreterSelectWithUnionQuery(select, ctx, SelectQueryOptions().createParameterizedView().noModify().analyze())
415+
.getDataStreamSemantic();
358416
else
359-
data_stream_semantic = InterpreterSelectWithUnionQuery(select, ctx, SelectQueryOptions().noModify().analyze()).getDataStreamSemantic();
417+
data_stream_semantic
418+
= InterpreterSelectWithUnionQuery(select, ctx, SelectQueryOptions().noModify().analyze()).getDataStreamSemantic();
360419

361420
data_stream_semantic_resolved = true;
362-
363421
return data_stream_semantic;
364422
}
365423

src/Storages/StorageView.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#pragma once
22

3+
#include <mutex>
4+
35
#include <Parsers/ASTSelectQuery.h>
46
#include <Parsers/IAST_fwd.h>
57
#include <Storages/IStorage.h>
@@ -60,6 +62,7 @@ class StorageView final : public shared_ptr_helper<StorageView>, public IStorage
6062

6163
private:
6264
ContextMutablePtr local_context;
65+
mutable std::mutex data_stream_semantic_mutex;
6366
mutable bool data_stream_semantic_resolved = false;
6467
std::atomic_flag started;
6568
/// proton: ends.

tests/queries_ported/0_stateless/99053_global_aggr_over_global_aggr_bugs.reference

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,6 @@
2222
2
2323
3
2424
4
25+
1 1
26+
1 -1
27+
2 1

0 commit comments

Comments
 (0)