Skip to content

Commit 8b21fb9

Browse files
authored
Merge pull request ClickHouse#107583 from groeneai/fix-timeseries-rename-digest-mismatch
Support renaming a TimeSeries table and fix Digest does not match crash in a Replicated database
2 parents 625515a + 9c10e82 commit 8b21fb9

5 files changed

Lines changed: 171 additions & 2 deletions

File tree

src/Databases/DatabaseAtomic.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <Interpreters/ExternalDictionariesLoader.h>
1414
#include <Interpreters/Context.h>
1515
#include <Storages/StorageMaterializedView.h>
16+
#include <Storages/StorageTimeSeries.h>
1617
#include <base/isSharedPtrUnique.h>
1718
#include <Common/PoolId.h>
1819
#include <Common/ZooKeeper/ZooKeeperCommon.h>
@@ -306,6 +307,9 @@ void DatabaseAtomic::renameTable(ContextPtr local_context, const String & table_
306307
if (const auto * mv = dynamic_cast<const StorageMaterializedView *>(table_.get()))
307308
if (mv->hasInnerTable())
308309
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot move MaterializedView with inner table to other database");
310+
if (const auto * ts = dynamic_cast<const StorageTimeSeries *>(table_.get()))
311+
if (ts->hasInnerTables())
312+
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot move TimeSeries table with inner tables to other database");
309313
};
310314

311315
String table_data_path;

src/Databases/DatabaseOrdinary.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,11 @@ bool DatabaseOrdinary::shouldLazyLoad(const ASTCreateQuery & query, LoadingStric
427427
|| query.isParameterizedView() || query.is_window_view)
428428
return false;
429429

430+
/// A lazy proxy would hide the TimeSeries type from the cross-database rename guard, so its
431+
/// inner tables could be orphaned by a cross-database move. Load it eagerly, as for views.
432+
if (query.is_time_series_table)
433+
return false;
434+
430435
/// Already handled by `StorageTableFunctionProxy`.
431436
if (query.as_table_function)
432437
return false;

src/Storages/StorageTimeSeries.cpp

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,11 @@
88
#include <Interpreters/DatabaseCatalog.h>
99
#include <Interpreters/InterpreterCreateQuery.h>
1010
#include <Interpreters/InterpreterDropQuery.h>
11+
#include <Interpreters/InterpreterRenameQuery.h>
1112
#include <Parsers/ASTDropQuery.h>
1213
#include <Parsers/ASTCreateQuery.h>
1314
#include <Parsers/ASTInsertQuery.h>
15+
#include <Parsers/ASTRenameQuery.h>
1416
#include <Backups/BackupEntriesCollector.h>
1517
#include <Backups/IBackup.h>
1618
#include <Backups/RestorerFromBackup.h>
@@ -39,6 +41,7 @@ namespace ErrorCodes
3941
extern const int LOGICAL_ERROR;
4042
extern const int NOT_IMPLEMENTED;
4143
extern const int SUPPORT_IS_DISABLED;
44+
extern const int TABLE_ALREADY_EXISTS;
4245
extern const int UNEXPECTED_TABLE_ENGINE;
4346
extern const int UNKNOWN_TABLE;
4447
}
@@ -482,9 +485,52 @@ void StorageTimeSeries::alter(const AlterCommands & params, ContextPtr local_con
482485
}
483486

484487

485-
void StorageTimeSeries::renameInMemory(const StorageID & /* new_table_id */)
488+
void StorageTimeSeries::renameInMemory(const StorageID & new_table_id)
486489
{
487-
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Renaming is not supported by storage {} yet", getName());
490+
auto old_table_id = getStorageID();
491+
492+
/// In an Atomic/Replicated database both ids carry a UUID; inner tables are addressed by the
493+
/// outer table's UUID (the `.inner_id.<kind>.<uuid>` name), which is preserved by the rename, so
494+
/// only the outer table id changes. In an Ordinary database the inner table names embed the old
495+
/// outer table name, so each inner table has to be renamed too (same as StorageMaterializedView).
496+
bool from_atomic_to_atomic_database = old_table_id.hasUUID() && new_table_id.hasUUID();
497+
498+
if (!from_atomic_to_atomic_database && hasInnerTables())
499+
{
500+
/// Collect every inner table rename first so that all destination names can be checked
501+
/// before any rename is executed. The inner renames are not transactional, so renaming
502+
/// them one by one would leave the table half-renamed (some inner tables moved, the rest
503+
/// not) if a later destination name happened to be occupied.
504+
std::vector<std::pair<StorageID, String>> inner_renames;
505+
for (auto target_kind : getTargetKinds())
506+
{
507+
if (!isInnerTable(target_kind))
508+
continue;
509+
510+
auto inner_table = tryGetTargetTable(target_kind, getContext());
511+
if (!inner_table)
512+
continue;
513+
514+
auto inner_table_id = inner_table->getStorageID();
515+
auto new_inner_table_name = getTimeSeriesInnerTableName(target_kind, new_table_id);
516+
517+
if (DatabaseCatalog::instance().isTableExist(StorageID{new_table_id.database_name, new_inner_table_name}, getContext()))
518+
throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Table {} already exists",
519+
StorageID{new_table_id.database_name, new_inner_table_name}.getNameForLogs());
520+
521+
inner_renames.emplace_back(std::move(inner_table_id), std::move(new_inner_table_name));
522+
}
523+
524+
for (const auto & [inner_table_id, new_inner_table_name] : inner_renames)
525+
{
526+
auto rename = make_intrusive<ASTRenameQuery>();
527+
rename->addElement(inner_table_id.database_name, inner_table_id.table_name,
528+
new_table_id.database_name, new_inner_table_name);
529+
InterpreterRenameQuery(rename, getContext()).execute();
530+
}
531+
}
532+
533+
IStorage::renameInMemory(new_table_id);
488534
}
489535

490536

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
0
2+
1
3+
0
4+
1
5+
1
6+
1
7+
1
8+
rejected
9+
1
10+
1
11+
1
12+
1
13+
rejected
14+
3
15+
0
16+
1
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#!/usr/bin/env bash
2+
# Tags: zookeeper, no-parallel, no-replicated-database
3+
# Tag no-parallel: enables a REGULAR failpoint that affects the whole server process.
4+
# Tag no-replicated-database: the test creates its own Replicated database and the
5+
# failpoint is enabled only on one server node.
6+
7+
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
8+
# shellcheck source=../shell_config.sh
9+
. "$CURDIR"/../shell_config.sh
10+
11+
DB="${CLICKHOUSE_DATABASE}_repl"
12+
ZK_PATH="/test/timeseries_rename_digest/${CLICKHOUSE_TEST_UNIQUE_NAME}"
13+
14+
${CLICKHOUSE_CLIENT} -q "
15+
CREATE DATABASE ${DB}
16+
ENGINE = Replicated('${ZK_PATH}', 's1', 'r1')
17+
"
18+
19+
${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --allow_experimental_time_series_table=1 \
20+
-q "CREATE TABLE ${DB}.ts ENGINE = TimeSeries"
21+
22+
# Force the metadata digest assertion to always run (skip the 1/16 probability gate), so any
23+
# divergence between the in-memory tables map and ZooKeeper is caught immediately. In release
24+
# builds assertDigestWithProbability is compiled out, so this is a no-op there.
25+
${CLICKHOUSE_CLIENT} -q "SYSTEM ENABLE FAILPOINT database_replicated_force_metadata_digest_check"
26+
27+
# RENAME TABLE must succeed and the table must keep working. Before the fix it was rejected from
28+
# renameInMemory() only after DatabaseAtomic::renameTable() had detached the table and committed
29+
# the ZooKeeper transaction, leaving it counted in tables_metadata_digest but missing locally, so
30+
# the next forced digest assertion aborted the server with "Digest does not match".
31+
${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none -q "RENAME TABLE ${DB}.ts TO ${DB}.ts2"
32+
${CLICKHOUSE_CLIENT} -q "EXISTS TABLE ${DB}.ts"
33+
${CLICKHOUSE_CLIENT} -q "EXISTS TABLE ${DB}.ts2"
34+
35+
# RENAME DATABASE must succeed too (it renames every contained table in place).
36+
${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none -q "RENAME DATABASE ${DB} TO ${DB}_renamed"
37+
${CLICKHOUSE_CLIENT} -q "EXISTS DATABASE ${DB}"
38+
${CLICKHOUSE_CLIENT} -q "EXISTS DATABASE ${DB}_renamed"
39+
${CLICKHOUSE_CLIENT} -q "EXISTS TABLE ${DB}_renamed.ts2"
40+
41+
# CREATE OR REPLACE creates a temporary table and renames it onto the target name; that internal
42+
# rename is the path the original report tripped over. With the target absent it must create the
43+
# table cleanly.
44+
${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --allow_experimental_time_series_table=1 \
45+
-q "CREATE OR REPLACE TABLE ${DB}_renamed.cor ENGINE = TimeSeries"
46+
${CLICKHOUSE_CLIENT} -q "EXISTS TABLE ${DB}_renamed.cor"
47+
48+
# With the forced digest check still on, an unrelated DDL must SUCCEED, proving the digest stayed
49+
# consistent through every rename above. Run it without grep so a crash, lost connection, or any
50+
# other failure fails the test instead of being masked.
51+
${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none \
52+
-q "CREATE TABLE ${DB}_renamed.probe (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id"
53+
${CLICKHOUSE_CLIENT} -q "EXISTS TABLE ${DB}_renamed.probe"
54+
55+
${CLICKHOUSE_CLIENT} -q "SYSTEM DISABLE FAILPOINT database_replicated_force_metadata_digest_check"
56+
57+
${CLICKHOUSE_CLIENT} -q "DROP DATABASE ${DB}_renamed SYNC" 2>/dev/null || true
58+
59+
# In a non-UUID (Ordinary) database the inner tables embed the outer table name, so renaming a
60+
# TimeSeries table renames each inner table too. Those inner renames are not transactional, so if
61+
# one destination name is already taken the rename must be rejected up front, before any inner
62+
# table is moved -- otherwise the table would be left half-renamed with an orphaned inner table.
63+
ORD="${CLICKHOUSE_DATABASE}_ord"
64+
${CLICKHOUSE_CLIENT} -q "DROP DATABASE IF EXISTS ${ORD} SYNC"
65+
${CLICKHOUSE_CLIENT} --send_logs_level=fatal --allow_deprecated_database_ordinary=1 -q "CREATE DATABASE ${ORD} ENGINE = Ordinary"
66+
${CLICKHOUSE_CLIENT} --allow_experimental_time_series_table=1 -q "CREATE TABLE ${ORD}.ts ENGINE = TimeSeries"
67+
# Occupy one of the destination inner-table names so the rename cannot complete.
68+
${CLICKHOUSE_CLIENT} -q "CREATE TABLE ${ORD}.\`.inner.tags.ts2\` (x UInt8) ENGINE = MergeTree ORDER BY x"
69+
# The rename is rejected before any inner table is moved.
70+
${CLICKHOUSE_CLIENT} --send_logs_level=fatal -q "RENAME TABLE ${ORD}.ts TO ${ORD}.ts2" 2>&1 | grep -q -F "TABLE_ALREADY_EXISTS" && echo "rejected"
71+
# Every source inner table is still present (none orphaned), so the table keeps working.
72+
${CLICKHOUSE_CLIENT} -q "EXISTS TABLE ${ORD}.\`.inner.samples.ts\`"
73+
${CLICKHOUSE_CLIENT} -q "EXISTS TABLE ${ORD}.\`.inner.tags.ts\`"
74+
${CLICKHOUSE_CLIENT} -q "EXISTS TABLE ${ORD}.\`.inner.metrics.ts\`"
75+
${CLICKHOUSE_CLIENT} -q "EXISTS TABLE ${ORD}.ts"
76+
77+
${CLICKHOUSE_CLIENT} --send_logs_level=fatal -q "DROP DATABASE ${ORD} SYNC" 2>/dev/null || true
78+
79+
# With lazy_load_tables=1 a reloaded TimeSeries table is materialized as a StorageTableProxy. The
80+
# proxy must not hide the TimeSeries type from the cross-database move guard -- otherwise a
81+
# cross-database RENAME would move only the outer table and orphan its inner tables in the old
82+
# database. The outer TimeSeries table is loaded eagerly so the guard sees it.
83+
LZ="${CLICKHOUSE_DATABASE}_lazy"
84+
LZ2="${CLICKHOUSE_DATABASE}_lazy2"
85+
${CLICKHOUSE_CLIENT} -q "DROP DATABASE IF EXISTS ${LZ} SYNC; DROP DATABASE IF EXISTS ${LZ2} SYNC"
86+
${CLICKHOUSE_CLIENT} -q "CREATE DATABASE ${LZ} ENGINE = Atomic SETTINGS lazy_load_tables = 1"
87+
${CLICKHOUSE_CLIENT} -q "CREATE DATABASE ${LZ2} ENGINE = Atomic"
88+
${CLICKHOUSE_CLIENT} --allow_experimental_time_series_table=1 -q "CREATE TABLE ${LZ}.ts ENGINE = TimeSeries"
89+
# Reattach so the table is recreated as a lazy proxy.
90+
${CLICKHOUSE_CLIENT} -q "DETACH DATABASE ${LZ}; ATTACH DATABASE ${LZ}"
91+
# The cross-database move is rejected before any inner table is moved.
92+
${CLICKHOUSE_CLIENT} --send_logs_level=fatal -q "RENAME TABLE ${LZ}.ts TO ${LZ2}.ts" 2>&1 | grep -q -F "Cannot move TimeSeries table with inner tables" && echo "rejected"
93+
# Every inner table stayed in the source database (none orphaned in the target).
94+
${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.tables WHERE database = '${LZ}' AND name LIKE '.inner_id.%'"
95+
${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.tables WHERE database = '${LZ2}' AND name LIKE '.inner_id.%'"
96+
${CLICKHOUSE_CLIENT} -q "EXISTS TABLE ${LZ}.ts"
97+
${CLICKHOUSE_CLIENT} --send_logs_level=fatal -q "DROP DATABASE ${LZ} SYNC" 2>/dev/null || true
98+
${CLICKHOUSE_CLIENT} --send_logs_level=fatal -q "DROP DATABASE ${LZ2} SYNC" 2>/dev/null || true

0 commit comments

Comments
 (0)