Skip to content

Commit d04cc41

Browse files
committed
Route Postgres metadata reads through passthrough
1 parent 1dbb2d9 commit d04cc41

6 files changed

Lines changed: 235 additions & 46 deletions

File tree

src/include/metadata_manager/postgres_metadata_manager.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,27 @@ class PostgresMetadataManager : public DuckLakeMetadataManager {
3232
string GetColumnTypeInternal(const LogicalType &type) override;
3333
shared_ptr<DuckLakeInlinedData> TransformInlinedData(QueryResult &result,
3434
const vector<LogicalType> &expected_types) override;
35+
string GetCreateTableStatements() override;
3536

3637
unique_ptr<QueryResult> Execute(DuckLakeSnapshot snapshot, string &query) override;
38+
unique_ptr<QueryResult> Execute(string &query) override;
3739

3840
unique_ptr<QueryResult> Query(DuckLakeSnapshot snapshot, string &query) override;
41+
unique_ptr<QueryResult> Query(string &query) override;
3942

4043
protected:
4144
string GetLatestSnapshotQuery() const override;
45+
string CastValueToTarget(const Value &val, const LogicalType &type) override;
46+
string CastStatsToTarget(const string &stats, const LogicalType &type) override;
47+
string GenerateConstantFilter(const ConstantFilter &constant_filter, const LogicalType &type,
48+
unordered_set<string> &referenced_stats) override;
4249

4350
private:
4451
unique_ptr<QueryResult> ExecuteQuery(DuckLakeSnapshot snapshot, string &query, string command);
52+
unique_ptr<QueryResult> ExecuteQuery(string &query, string command);
53+
string GetPostgresIndexStatements();
54+
string GetPostgresStatsType(const LogicalType &type);
55+
bool CanCastStatsForValueComparison(const LogicalType &type);
4556
};
4657

4758
} // namespace duckdb

src/include/storage/ducklake_metadata_manager.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,10 @@ class DuckLakeMetadataManager {
134134
virtual DuckLakeMetadata LoadDuckLake();
135135

136136
virtual unique_ptr<QueryResult> Execute(DuckLakeSnapshot snapshot, string &query);
137+
virtual unique_ptr<QueryResult> Execute(string &query);
137138

138139
virtual unique_ptr<QueryResult> Query(DuckLakeSnapshot snapshot, string &query);
140+
virtual unique_ptr<QueryResult> Query(string &query);
139141
//! Get the catalog information for a specific snapshot
140142
virtual DuckLakeCatalogInfo GetCatalogForSnapshot(DuckLakeSnapshot snapshot);
141143
virtual vector<DuckLakeGlobalStatsInfo> GetGlobalTableStats(DuckLakeSnapshot snapshot);

src/metadata_manager/postgres_metadata_manager.cpp

Lines changed: 119 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "metadata_manager/postgres_metadata_manager.hpp"
22
#include "common/ducklake_util.hpp"
33
#include "duckdb/main/database.hpp"
4+
#include "duckdb/planner/filter/constant_filter.hpp"
45
#include "storage/ducklake_catalog.hpp"
56
#include "storage/ducklake_transaction.hpp"
67

@@ -77,6 +78,108 @@ string PostgresMetadataManager::GetColumnTypeInternal(const LogicalType &column_
7778
}
7879
}
7980

81+
string PostgresMetadataManager::GetCreateTableStatements() {
82+
return DuckLakeMetadataManager::GetCreateTableStatements() + GetPostgresIndexStatements();
83+
}
84+
85+
string PostgresMetadataManager::GetPostgresIndexStatements() {
86+
return R"(
87+
CREATE INDEX IF NOT EXISTS ducklake_data_file_table_snapshot_idx ON {METADATA_CATALOG}.ducklake_data_file(table_id, begin_snapshot, end_snapshot);
88+
CREATE INDEX IF NOT EXISTS ducklake_delete_file_table_snapshot_idx ON {METADATA_CATALOG}.ducklake_delete_file(table_id, begin_snapshot, end_snapshot);
89+
CREATE INDEX IF NOT EXISTS ducklake_file_column_stats_table_column_idx ON {METADATA_CATALOG}.ducklake_file_column_stats(table_id, column_id);
90+
CREATE INDEX IF NOT EXISTS ducklake_schema_versions_table_schema_version_idx ON {METADATA_CATALOG}.ducklake_schema_versions(table_id, schema_version);
91+
CREATE INDEX IF NOT EXISTS ducklake_column_table_snapshot_idx ON {METADATA_CATALOG}.ducklake_column(table_id, begin_snapshot, end_snapshot);
92+
CREATE INDEX IF NOT EXISTS ducklake_table_column_stats_table_column_idx ON {METADATA_CATALOG}.ducklake_table_column_stats(table_id, column_id);
93+
)";
94+
}
95+
96+
string PostgresMetadataManager::GetPostgresStatsType(const LogicalType &type) {
97+
switch (type.id()) {
98+
case LogicalTypeId::BOOLEAN:
99+
return "BOOLEAN";
100+
case LogicalTypeId::TINYINT:
101+
case LogicalTypeId::SMALLINT:
102+
return "SMALLINT";
103+
case LogicalTypeId::INTEGER:
104+
case LogicalTypeId::UTINYINT:
105+
case LogicalTypeId::USMALLINT:
106+
return "INTEGER";
107+
case LogicalTypeId::BIGINT:
108+
case LogicalTypeId::UINTEGER:
109+
return "BIGINT";
110+
case LogicalTypeId::UBIGINT:
111+
case LogicalTypeId::HUGEINT:
112+
case LogicalTypeId::UHUGEINT:
113+
return "NUMERIC";
114+
case LogicalTypeId::FLOAT:
115+
return "REAL";
116+
case LogicalTypeId::DOUBLE:
117+
return "DOUBLE PRECISION";
118+
default:
119+
return type.ToString();
120+
}
121+
}
122+
123+
bool PostgresMetadataManager::CanCastStatsForValueComparison(const LogicalType &type) {
124+
return type.IsNumeric() || type.id() == LogicalTypeId::BOOLEAN;
125+
}
126+
127+
string PostgresMetadataManager::CastValueToTarget(const Value &val, const LogicalType &type) {
128+
bool value_is_finite = true;
129+
if (val.type().id() == LogicalTypeId::FLOAT || val.type().id() == LogicalTypeId::DOUBLE) {
130+
value_is_finite = Value::IsFinite(val.GetValue<double>());
131+
}
132+
if (type.IsNumeric() && value_is_finite) {
133+
return val.ToString();
134+
}
135+
auto literal = DuckLakeUtil::SQLLiteralToString(val.ToString());
136+
if (RequiresValueComparison(type) && CanCastStatsForValueComparison(type)) {
137+
return literal + "::" + GetPostgresStatsType(type);
138+
}
139+
return literal;
140+
}
141+
142+
string PostgresMetadataManager::CastStatsToTarget(const string &stats, const LogicalType &type) {
143+
if (RequiresValueComparison(type) && CanCastStatsForValueComparison(type)) {
144+
return stats + "::" + GetPostgresStatsType(type);
145+
}
146+
return stats;
147+
}
148+
149+
string PostgresMetadataManager::GenerateConstantFilter(const ConstantFilter &constant_filter, const LogicalType &type,
150+
unordered_set<string> &referenced_stats) {
151+
if (RequiresValueComparison(type) && !CanCastStatsForValueComparison(type)) {
152+
return string();
153+
}
154+
auto constant_str = CastValueToTarget(constant_filter.constant, type);
155+
auto min_value = CastStatsToTarget("min_value", type);
156+
auto max_value = CastStatsToTarget("max_value", type);
157+
switch (constant_filter.comparison_type) {
158+
case ExpressionType::COMPARE_EQUAL:
159+
referenced_stats.insert("min_value");
160+
referenced_stats.insert("max_value");
161+
return StringUtil::Format("%s BETWEEN %s AND %s", constant_str, min_value, max_value);
162+
case ExpressionType::COMPARE_NOTEQUAL:
163+
referenced_stats.insert("min_value");
164+
referenced_stats.insert("max_value");
165+
return StringUtil::Format("NOT (%s = %s AND %s = %s)", min_value, constant_str, max_value, constant_str);
166+
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
167+
referenced_stats.insert("max_value");
168+
return StringUtil::Format("%s >= %s", max_value, constant_str);
169+
case ExpressionType::COMPARE_GREATERTHAN:
170+
referenced_stats.insert("max_value");
171+
return StringUtil::Format("%s > %s", max_value, constant_str);
172+
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
173+
referenced_stats.insert("min_value");
174+
return StringUtil::Format("%s <= %s", min_value, constant_str);
175+
case ExpressionType::COMPARE_LESSTHAN:
176+
referenced_stats.insert("min_value");
177+
return StringUtil::Format("%s < %s", min_value, constant_str);
178+
default:
179+
return string();
180+
}
181+
}
182+
80183
unique_ptr<QueryResult> PostgresMetadataManager::ExecuteQuery(DuckLakeSnapshot snapshot, string &query,
81184
string command) {
82185
auto &commit_info = transaction.GetCommitInfo();
@@ -89,7 +192,6 @@ unique_ptr<QueryResult> PostgresMetadataManager::ExecuteQuery(DuckLakeSnapshot s
89192
query = StringUtil::Replace(query, "{COMMIT_MESSAGE}", commit_info.commit_message.ToSQLString());
90193
query = StringUtil::Replace(query, "{COMMIT_EXTRA_INFO}", commit_info.commit_extra_info.ToSQLString());
91194

92-
auto &connection = transaction.GetConnection();
93195
auto &ducklake_catalog = transaction.GetCatalog();
94196
auto catalog_identifier = DuckLakeUtil::SQLIdentifierToString(ducklake_catalog.MetadataDatabaseName());
95197
auto catalog_literal = DuckLakeUtil::SQLLiteralToString(ducklake_catalog.MetadataDatabaseName());
@@ -107,16 +209,31 @@ unique_ptr<QueryResult> PostgresMetadataManager::ExecuteQuery(DuckLakeSnapshot s
107209
query = StringUtil::Replace(query, "{METADATA_PATH}", metadata_path);
108210
query = StringUtil::Replace(query, "{DATA_PATH}", data_path);
109211

110-
return connection.Query(StringUtil::Format("CALL %s(%s, %s)", command, catalog_literal, SQLString(query)));
212+
auto passthrough_query = StringUtil::Format("CALL %s(%s, %s)", command, catalog_literal, SQLString(query));
213+
return transaction.Query(passthrough_query);
214+
}
215+
216+
unique_ptr<QueryResult> PostgresMetadataManager::ExecuteQuery(string &query, string command) {
217+
// Snapshot-less metadata queries must not contain snapshot placeholders.
218+
DuckLakeSnapshot snapshot;
219+
return ExecuteQuery(snapshot, query, std::move(command));
111220
}
112221
unique_ptr<QueryResult> PostgresMetadataManager::Execute(DuckLakeSnapshot snapshot, string &query) {
113222
return ExecuteQuery(snapshot, query, "postgres_execute");
114223
}
115224

225+
unique_ptr<QueryResult> PostgresMetadataManager::Execute(string &query) {
226+
return ExecuteQuery(query, "postgres_execute");
227+
}
228+
116229
unique_ptr<QueryResult> PostgresMetadataManager::Query(DuckLakeSnapshot snapshot, string &query) {
117230
return ExecuteQuery(snapshot, query, "postgres_query");
118231
}
119232

233+
unique_ptr<QueryResult> PostgresMetadataManager::Query(string &query) {
234+
return ExecuteQuery(query, "postgres_query");
235+
}
236+
120237
string PostgresMetadataManager::GetLatestSnapshotQuery() const {
121238
return R"(
122239
SELECT * FROM postgres_query({METADATA_CATALOG_NAME_LITERAL},

0 commit comments

Comments
 (0)