Skip to content

Commit 17b1316

Browse files
committed
Route Postgres metadata reads through passthrough
1 parent 1dbb2d9 commit 17b1316

6 files changed

Lines changed: 240 additions & 46 deletions

File tree

src/include/metadata_manager/postgres_metadata_manager.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,27 @@ class PostgresMetadataManager : public DuckLakeMetadataManager {
3232
string GetColumnTypeInternal(const LogicalType &type) override;
3333
shared_ptr<DuckLakeInlinedData> TransformInlinedData(QueryResult &result,
3434
const vector<LogicalType> &expected_types) override;
35+
void InitializeDuckLake(bool has_explicit_schema, DuckLakeEncryption encryption) override;
3536

3637
unique_ptr<QueryResult> Execute(DuckLakeSnapshot snapshot, string &query) override;
38+
unique_ptr<QueryResult> Execute(string &query) override;
3739

3840
unique_ptr<QueryResult> Query(DuckLakeSnapshot snapshot, string &query) override;
41+
unique_ptr<QueryResult> Query(string &query) override;
3942

4043
protected:
4144
string GetLatestSnapshotQuery() const override;
45+
string CastValueToTarget(const Value &val, const LogicalType &type) override;
46+
string CastStatsToTarget(const string &stats, const LogicalType &type) override;
47+
string GenerateConstantFilter(const ConstantFilter &constant_filter, const LogicalType &type,
48+
unordered_set<string> &referenced_stats) override;
4249

4350
private:
4451
unique_ptr<QueryResult> ExecuteQuery(DuckLakeSnapshot snapshot, string &query, string command);
52+
unique_ptr<QueryResult> ExecuteQuery(string &query, string command);
53+
string GetPostgresIndexStatements();
54+
string GetPostgresStatsType(const LogicalType &type);
55+
bool CanCastStatsForValueComparison(const LogicalType &type);
4556
};
4657

4758
} // namespace duckdb

src/include/storage/ducklake_metadata_manager.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,10 @@ class DuckLakeMetadataManager {
134134
virtual DuckLakeMetadata LoadDuckLake();
135135

136136
virtual unique_ptr<QueryResult> Execute(DuckLakeSnapshot snapshot, string &query);
137+
virtual unique_ptr<QueryResult> Execute(string &query);
137138

138139
virtual unique_ptr<QueryResult> Query(DuckLakeSnapshot snapshot, string &query);
140+
virtual unique_ptr<QueryResult> Query(string &query);
139141
//! Get the catalog information for a specific snapshot
140142
virtual DuckLakeCatalogInfo GetCatalogForSnapshot(DuckLakeSnapshot snapshot);
141143
virtual vector<DuckLakeGlobalStatsInfo> GetGlobalTableStats(DuckLakeSnapshot snapshot);

src/metadata_manager/postgres_metadata_manager.cpp

Lines changed: 124 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "metadata_manager/postgres_metadata_manager.hpp"
22
#include "common/ducklake_util.hpp"
33
#include "duckdb/main/database.hpp"
4+
#include "duckdb/planner/filter/constant_filter.hpp"
45
#include "storage/ducklake_catalog.hpp"
56
#include "storage/ducklake_transaction.hpp"
67

@@ -77,6 +78,113 @@ string PostgresMetadataManager::GetColumnTypeInternal(const LogicalType &column_
7778
}
7879
}
7980

81+
string PostgresMetadataManager::GetPostgresIndexStatements() {
82+
return R"(
83+
CREATE INDEX IF NOT EXISTS ducklake_data_file_table_snapshot_idx ON {METADATA_CATALOG}.ducklake_data_file(table_id, begin_snapshot, end_snapshot);
84+
CREATE INDEX IF NOT EXISTS ducklake_delete_file_table_snapshot_idx ON {METADATA_CATALOG}.ducklake_delete_file(table_id, begin_snapshot, end_snapshot);
85+
CREATE INDEX IF NOT EXISTS ducklake_file_column_stats_table_column_idx ON {METADATA_CATALOG}.ducklake_file_column_stats(table_id, column_id);
86+
CREATE INDEX IF NOT EXISTS ducklake_schema_versions_table_schema_version_idx ON {METADATA_CATALOG}.ducklake_schema_versions(table_id, schema_version);
87+
CREATE INDEX IF NOT EXISTS ducklake_column_table_snapshot_idx ON {METADATA_CATALOG}.ducklake_column(table_id, begin_snapshot, end_snapshot);
88+
CREATE INDEX IF NOT EXISTS ducklake_table_column_stats_table_column_idx ON {METADATA_CATALOG}.ducklake_table_column_stats(table_id, column_id);
89+
)";
90+
}
91+
92+
void PostgresMetadataManager::InitializeDuckLake(bool has_explicit_schema, DuckLakeEncryption encryption) {
93+
DuckLakeMetadataManager::InitializeDuckLake(has_explicit_schema, encryption);
94+
auto index_query = GetPostgresIndexStatements();
95+
auto result = Execute(index_query);
96+
if (result->HasError()) {
97+
result->GetErrorObject().Throw("Failed to initialize DuckLake Postgres metadata indexes: ");
98+
}
99+
}
100+
101+
string PostgresMetadataManager::GetPostgresStatsType(const LogicalType &type) {
102+
switch (type.id()) {
103+
case LogicalTypeId::BOOLEAN:
104+
return "BOOLEAN";
105+
case LogicalTypeId::TINYINT:
106+
case LogicalTypeId::SMALLINT:
107+
return "SMALLINT";
108+
case LogicalTypeId::INTEGER:
109+
case LogicalTypeId::UTINYINT:
110+
case LogicalTypeId::USMALLINT:
111+
return "INTEGER";
112+
case LogicalTypeId::BIGINT:
113+
case LogicalTypeId::UINTEGER:
114+
return "BIGINT";
115+
case LogicalTypeId::UBIGINT:
116+
case LogicalTypeId::HUGEINT:
117+
case LogicalTypeId::UHUGEINT:
118+
return "NUMERIC";
119+
case LogicalTypeId::FLOAT:
120+
return "REAL";
121+
case LogicalTypeId::DOUBLE:
122+
return "DOUBLE PRECISION";
123+
default:
124+
return type.ToString();
125+
}
126+
}
127+
128+
bool PostgresMetadataManager::CanCastStatsForValueComparison(const LogicalType &type) {
129+
return type.IsNumeric() || type.id() == LogicalTypeId::BOOLEAN;
130+
}
131+
132+
string PostgresMetadataManager::CastValueToTarget(const Value &val, const LogicalType &type) {
133+
bool value_is_finite = true;
134+
if (val.type().id() == LogicalTypeId::FLOAT || val.type().id() == LogicalTypeId::DOUBLE) {
135+
value_is_finite = Value::IsFinite(val.GetValue<double>());
136+
}
137+
if (type.IsNumeric() && value_is_finite) {
138+
return val.ToString();
139+
}
140+
auto literal = DuckLakeUtil::SQLLiteralToString(val.ToString());
141+
if (RequiresValueComparison(type) && CanCastStatsForValueComparison(type)) {
142+
return literal + "::" + GetPostgresStatsType(type);
143+
}
144+
return literal;
145+
}
146+
147+
string PostgresMetadataManager::CastStatsToTarget(const string &stats, const LogicalType &type) {
148+
if (RequiresValueComparison(type) && CanCastStatsForValueComparison(type)) {
149+
return stats + "::" + GetPostgresStatsType(type);
150+
}
151+
return stats;
152+
}
153+
154+
string PostgresMetadataManager::GenerateConstantFilter(const ConstantFilter &constant_filter, const LogicalType &type,
155+
unordered_set<string> &referenced_stats) {
156+
if (RequiresValueComparison(type) && !CanCastStatsForValueComparison(type)) {
157+
return string();
158+
}
159+
auto constant_str = CastValueToTarget(constant_filter.constant, type);
160+
auto min_value = CastStatsToTarget("min_value", type);
161+
auto max_value = CastStatsToTarget("max_value", type);
162+
switch (constant_filter.comparison_type) {
163+
case ExpressionType::COMPARE_EQUAL:
164+
referenced_stats.insert("min_value");
165+
referenced_stats.insert("max_value");
166+
return StringUtil::Format("%s BETWEEN %s AND %s", constant_str, min_value, max_value);
167+
case ExpressionType::COMPARE_NOTEQUAL:
168+
referenced_stats.insert("min_value");
169+
referenced_stats.insert("max_value");
170+
return StringUtil::Format("NOT (%s = %s AND %s = %s)", min_value, constant_str, max_value, constant_str);
171+
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
172+
referenced_stats.insert("max_value");
173+
return StringUtil::Format("%s >= %s", max_value, constant_str);
174+
case ExpressionType::COMPARE_GREATERTHAN:
175+
referenced_stats.insert("max_value");
176+
return StringUtil::Format("%s > %s", max_value, constant_str);
177+
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
178+
referenced_stats.insert("min_value");
179+
return StringUtil::Format("%s <= %s", min_value, constant_str);
180+
case ExpressionType::COMPARE_LESSTHAN:
181+
referenced_stats.insert("min_value");
182+
return StringUtil::Format("%s < %s", min_value, constant_str);
183+
default:
184+
return string();
185+
}
186+
}
187+
80188
unique_ptr<QueryResult> PostgresMetadataManager::ExecuteQuery(DuckLakeSnapshot snapshot, string &query,
81189
string command) {
82190
auto &commit_info = transaction.GetCommitInfo();
@@ -89,7 +197,6 @@ unique_ptr<QueryResult> PostgresMetadataManager::ExecuteQuery(DuckLakeSnapshot s
89197
query = StringUtil::Replace(query, "{COMMIT_MESSAGE}", commit_info.commit_message.ToSQLString());
90198
query = StringUtil::Replace(query, "{COMMIT_EXTRA_INFO}", commit_info.commit_extra_info.ToSQLString());
91199

92-
auto &connection = transaction.GetConnection();
93200
auto &ducklake_catalog = transaction.GetCatalog();
94201
auto catalog_identifier = DuckLakeUtil::SQLIdentifierToString(ducklake_catalog.MetadataDatabaseName());
95202
auto catalog_literal = DuckLakeUtil::SQLLiteralToString(ducklake_catalog.MetadataDatabaseName());
@@ -107,16 +214,31 @@ unique_ptr<QueryResult> PostgresMetadataManager::ExecuteQuery(DuckLakeSnapshot s
107214
query = StringUtil::Replace(query, "{METADATA_PATH}", metadata_path);
108215
query = StringUtil::Replace(query, "{DATA_PATH}", data_path);
109216

110-
return connection.Query(StringUtil::Format("CALL %s(%s, %s)", command, catalog_literal, SQLString(query)));
217+
auto passthrough_query = StringUtil::Format("CALL %s(%s, %s)", command, catalog_literal, SQLString(query));
218+
return transaction.Query(passthrough_query);
219+
}
220+
221+
unique_ptr<QueryResult> PostgresMetadataManager::ExecuteQuery(string &query, string command) {
222+
// Snapshot-less metadata queries must not contain snapshot placeholders.
223+
DuckLakeSnapshot snapshot;
224+
return ExecuteQuery(snapshot, query, std::move(command));
111225
}
112226
unique_ptr<QueryResult> PostgresMetadataManager::Execute(DuckLakeSnapshot snapshot, string &query) {
113227
return ExecuteQuery(snapshot, query, "postgres_execute");
114228
}
115229

230+
unique_ptr<QueryResult> PostgresMetadataManager::Execute(string &query) {
231+
return ExecuteQuery(query, "postgres_execute");
232+
}
233+
116234
unique_ptr<QueryResult> PostgresMetadataManager::Query(DuckLakeSnapshot snapshot, string &query) {
117235
return ExecuteQuery(snapshot, query, "postgres_query");
118236
}
119237

238+
unique_ptr<QueryResult> PostgresMetadataManager::Query(string &query) {
239+
return ExecuteQuery(query, "postgres_query");
240+
}
241+
120242
string PostgresMetadataManager::GetLatestSnapshotQuery() const {
121243
return R"(
122244
SELECT * FROM postgres_query({METADATA_CATALOG_NAME_LITERAL},

0 commit comments

Comments
 (0)