From f016e2c79b4a6b9a6b98517873990c14245bfeff Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Mon, 2 Mar 2026 15:57:56 +0300 Subject: [PATCH 01/23] Add iceberg_catalog FDW and refactor REST catalog for multi-server support Introduce the iceberg_catalog foreign data wrapper as a configuration framework for Iceberg catalogs. This allows defining named catalog servers via CREATE SERVER, removing the limitation of a single global REST catalog configured through GUCs. Key changes: - Add iceberg_catalog_validator for server option validation - Add RestCatalogConnectionInfo struct to unify connection parameters - Add GetRestCatalogConnectionFromGUCs/FromServer resolution functions - Refactor all REST catalog functions to accept RestCatalogConnectionInfo - Refactor token cache from single global to per-server hash table - Add extension upgrade SQL (3.2--3.3) with FDW, validator, and pre-created postgres/object_store servers Signed-off-by: sfc-gh-npuka --- .../pg_lake/rest_catalog/rest_catalog.h | 36 +- pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql | 32 ++ .../src/rest_catalog/rest_catalog.c | 400 +++++++++++++++--- pg_lake_iceberg/src/test/rest_catalog.c | 4 +- 4 files changed, 403 insertions(+), 69 deletions(-) diff --git a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h index a6634062..f3b345da 100644 --- a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h +++ b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h @@ -34,6 +34,24 @@ extern char *RestCatalogScope; extern int RestCatalogAuthType; extern bool RestCatalogEnableVendedCredentials; +/* + * Holds per-server REST catalog connection settings. Can be populated from + * GUCs (for backward-compatible catalog='rest') or from a ForeignServer + * created via CREATE SERVER ... FOREIGN DATA WRAPPER iceberg_catalog. + */ +typedef struct RestCatalogConnectionInfo +{ + char *serverName; /* server name for cache keying, NULL for + * GUC-based */ + char *host; + char *oauthHostPath; + char *clientId; + char *clientSecret; + char *scope; + int authType; + bool enableVendedCredentials; +} RestCatalogConnectionInfo; + #define REST_CATALOG_AUTH_TOKEN_PATH "%s/api/catalog/v1/oauth/tokens" #define REST_CATALOG_NAMESPACE_NAME "%s/api/catalog/v1/%s/namespaces/%s" @@ -77,20 +95,28 @@ typedef struct RestCatalogRequest #define REST_CATALOG_AUTH_TOKEN_PATH "%s/api/catalog/v1/oauth/tokens" #define GET_REST_CATALOG_METADATA_LOCATION "%s/api/catalog/v1/%s/namespaces/%s/tables/%s" -extern PGDLLEXPORT void RegisterNamespaceToRestCatalog(const char *catalogName, const char *namespaceName); +/* Connection info resolution */ +extern PGDLLEXPORT RestCatalogConnectionInfo * GetRestCatalogConnectionFromGUCs(void); +extern PGDLLEXPORT RestCatalogConnectionInfo * GetRestCatalogConnectionFromServer(const char *serverName); +extern PGDLLEXPORT RestCatalogConnectionInfo * GetRestCatalogConnectionForRelation(Oid relationId); + +/* FDW name for iceberg_catalog servers */ +#define ICEBERG_CATALOG_FDW_NAME "iceberg_catalog" + +extern PGDLLEXPORT void RegisterNamespaceToRestCatalog(RestCatalogConnectionInfo * conn, const char *catalogName, const char *namespaceName); extern PGDLLEXPORT void StartStageRestCatalogIcebergTableCreate(Oid relationId); extern PGDLLEXPORT char *FinishStageRestCatalogIcebergTableCreateRestRequest(Oid relationId, DataFileSchema * dataFileSchema, List *partitionSpecs); -extern PGDLLEXPORT void ErrorIfRestNamespaceDoesNotExist(const char *catalogName, const char *namespaceName); +extern PGDLLEXPORT void ErrorIfRestNamespaceDoesNotExist(RestCatalogConnectionInfo * conn, const char *catalogName, const char *namespaceName); extern PGDLLEXPORT char *GetRestCatalogName(Oid relationId); extern PGDLLEXPORT char *GetRestCatalogNamespace(Oid relationId); extern PGDLLEXPORT char *GetRestCatalogTableName(Oid relationId); extern PGDLLEXPORT bool IsReadOnlyRestCatalogIcebergTable(Oid relationId); -extern PGDLLEXPORT char *GetMetadataLocationFromRestCatalog(const char *restCatalogName, const char *namespaceName, +extern PGDLLEXPORT char *GetMetadataLocationFromRestCatalog(RestCatalogConnectionInfo * conn, const char *restCatalogName, const char *namespaceName, const char *relationName); extern PGDLLEXPORT char *GetMetadataLocationForRestCatalogForIcebergTable(Oid relationId); extern PGDLLEXPORT void ReportHTTPError(HttpResult httpResult, int level); -extern PGDLLEXPORT List *PostHeadersWithAuth(void); -extern PGDLLEXPORT List *DeleteHeadersWithAuth(void); +extern PGDLLEXPORT List *PostHeadersWithAuth(RestCatalogConnectionInfo * conn); +extern PGDLLEXPORT List *DeleteHeadersWithAuth(RestCatalogConnectionInfo * conn); extern PGDLLEXPORT HttpResult SendRequestToRestCatalog(HttpMethod method, const char *url, const char *body, List *headers); extern PGDLLEXPORT RestCatalogRequest * GetAddSnapshotCatalogRequest(IcebergSnapshot * newSnapshot, Oid relationId); extern PGDLLEXPORT RestCatalogRequest * GetAddSchemaCatalogRequest(Oid relationId, DataFileSchema * dataFileSchema); diff --git a/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql b/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql index 11b0f619..aba9d22d 100644 --- a/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql +++ b/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql @@ -15,3 +15,35 @@ CREATE OR REPLACE VIEW pg_catalog.iceberg_tables AS FROM lake_iceberg.tables WHERE metadata_location IS NOT NULL; +/* + * iceberg_catalog foreign data wrapper: allows defining named catalog + * configurations via CREATE SERVER so that users are not limited to a + * single global REST catalog configured through GUC settings. + * + * Example: + * CREATE SERVER my_polaris TYPE 'rest' + * FOREIGN DATA WRAPPER iceberg_catalog + * OPTIONS (rest_endpoint 'http://polaris:8181', + * rest_auth_type 'default', + * client_id '...', + * client_secret '...'); + * + * CREATE TABLE t (a int) USING iceberg WITH (catalog = 'my_polaris'); + */ +CREATE FUNCTION lake_iceberg.iceberg_catalog_validator(text[], oid) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE FOREIGN DATA WRAPPER iceberg_catalog + NO HANDLER + VALIDATOR lake_iceberg.iceberg_catalog_validator; + +/* Pre-created catalog servers for backward compatibility */ +CREATE SERVER postgres + TYPE 'postgres' + FOREIGN DATA WRAPPER iceberg_catalog; + +CREATE SERVER object_store + TYPE 'object_store' + FOREIGN DATA WRAPPER iceberg_catalog; diff --git a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c index 8cf2b2ae..186bb66e 100644 --- a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c +++ b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c @@ -20,9 +20,13 @@ #include "postgres.h" #include "miscadmin.h" +#include "access/reloptions.h" +#include "catalog/pg_foreign_server.h" #include "common/base64.h" #include "commands/dbcommands.h" +#include "commands/defrem.h" #include "foreign/foreign.h" +#include "fmgr.h" #include "lib/stringinfo.h" #include "utils/builtins.h" #include "utils/jsonb.h" @@ -40,6 +44,7 @@ #include "pg_lake/object_store_catalog/object_store_catalog.h" #include "pg_lake/parsetree/options.h" #include "pg_lake/rest_catalog/rest_catalog.h" +#include "pg_lake/util/catalog_type.h" #include "pg_lake/util/url_encode.h" #include "pg_lake/util/rel_utils.h" @@ -54,17 +59,26 @@ int RestCatalogAuthType = REST_CATALOG_AUTH_TYPE_DEFAULT; bool RestCatalogEnableVendedCredentials = true; /* -* Should always be accessed via GetRestCatalogAccessToken() -*/ -static char *RestCatalogAccessToken = NULL; -static TimestampTz RestCatalogAccessTokenExpiry = 0; + * Per-server token cache. Keyed by server name (for server-based catalogs) + * or "GUC" (for GUC-based backward-compatible catalog='rest'). + */ +#define TOKEN_CACHE_KEY_LEN NAMEDATALEN + +typedef struct RestCatalogTokenCacheEntry +{ + char key[TOKEN_CACHE_KEY_LEN]; + char *accessToken; + TimestampTz accessTokenExpiry; +} RestCatalogTokenCacheEntry; -static char *GetRestCatalogAccessToken(bool forceRefreshToken); -static void FetchRestCatalogAccessToken(char **accessToken, int *expiresIn); -static void CreateNamespaceOnRestCatalog(const char *catalogName, const char *namespaceName); +static HTAB *RestCatalogTokenCache = NULL; + +static char *GetRestCatalogAccessToken(RestCatalogConnectionInfo * conn, bool forceRefreshToken); +static void FetchRestCatalogAccessToken(RestCatalogConnectionInfo * conn, char **accessToken, int *expiresIn); +static void CreateNamespaceOnRestCatalog(RestCatalogConnectionInfo * conn, const char *catalogName, const char *namespaceName); static char *EncodeBasicAuth(const char *clientId, const char *clientSecret); static char *JsonbGetStringByPath(const char *jsonb_text, int nkeys,...); -static List *GetHeadersWithAuth(void); +static List *GetHeadersWithAuth(RestCatalogConnectionInfo * conn); static char *AppendIcebergPartitionSpecForRestCatalog(List *partitionSpecs); static void UpdateAuthorizationHeader(List *headers, const char *token); @@ -79,6 +93,213 @@ typedef enum RestCatalogRequestRetryAction REST_CATALOG_RETRY_REFRESH_AUTH /* 419 Token Expired */ } RestCatalogRequestRetryAction; +PG_FUNCTION_INFO_V1(iceberg_catalog_validator); + +/* + * Valid options for iceberg_catalog servers. + */ +static const char *iceberg_catalog_server_options[] = { + "rest_endpoint", + "scope", + "rest_auth_type", + "oauth_endpoint", + "enable_vended_credentials", + "location_prefix", + "catalog_name", + "client_id", + "client_secret", + NULL +}; + + +static bool +is_valid_iceberg_catalog_option(const char *keyword) +{ + for (int i = 0; iceberg_catalog_server_options[i] != NULL; i++) + { + if (strcmp(keyword, iceberg_catalog_server_options[i]) == 0) + return true; + } + return false; +} + + +/* + * iceberg_catalog_validator validates options for the iceberg_catalog FDW. + * Only server-level options are supported. + */ +Datum +iceberg_catalog_validator(PG_FUNCTION_ARGS) +{ + List *options_list = untransformRelOptions(PG_GETARG_DATUM(0)); + Oid catalog = PG_GETARG_OID(1); + ListCell *cell; + + /* + * PostgreSQL calls the validator for CREATE FOREIGN DATA WRAPPER itself + * (with ForeignDataWrapperRelationId), not just for CREATE SERVER. Allow + * empty option lists for non-server contexts so extension creation + * succeeds, but still reject if someone passes options where they don't + * belong. + */ + if (catalog != ForeignServerRelationId) + { + if (list_length(options_list) > 0) + ereport(ERROR, + (errcode(ERRCODE_FDW_INVALID_OPTION_NAME), + errmsg("iceberg_catalog options are only valid for SERVER objects"))); + PG_RETURN_VOID(); + } + + foreach(cell, options_list) + { + DefElem *def = (DefElem *) lfirst(cell); + + if (!is_valid_iceberg_catalog_option(def->defname)) + { + ereport(ERROR, + (errcode(ERRCODE_FDW_INVALID_OPTION_NAME), + errmsg("invalid option \"%s\" for iceberg_catalog server", def->defname), + errhint("Valid options are: rest_endpoint, rest_auth_type, " + "oauth_endpoint, scope, enable_vended_credentials, " + "location_prefix, catalog_name, client_id, client_secret."))); + } + + if (strcmp(def->defname, "rest_auth_type") == 0) + { + char *authType = defGetString(def); + + if (strcmp(authType, "default") != 0 && strcmp(authType, "horizon") != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid rest_auth_type option: \"%s\"", authType), + errhint("Valid values are \"default\" and \"horizon\"."))); + } + else if (strcmp(def->defname, "enable_vended_credentials") == 0) + { + (void) defGetBoolean(def); + } + } + + PG_RETURN_VOID(); +} + + +/* + * GetRestCatalogConnectionFromGUCs returns a RestCatalogConnectionInfo + * populated from the current GUC variables. Used for backward-compatible + * catalog='rest' tables. + */ +RestCatalogConnectionInfo * +GetRestCatalogConnectionFromGUCs(void) +{ + RestCatalogConnectionInfo *conn = palloc0(sizeof(RestCatalogConnectionInfo)); + + conn->serverName = NULL; + conn->host = RestCatalogHost; + conn->oauthHostPath = RestCatalogOauthHostPath; + conn->clientId = RestCatalogClientId; + conn->clientSecret = RestCatalogClientSecret; + conn->scope = RestCatalogScope; + conn->authType = RestCatalogAuthType; + conn->enableVendedCredentials = RestCatalogEnableVendedCredentials; + + return conn; +} + + +/* + * GetRestCatalogConnectionFromServer returns a RestCatalogConnectionInfo + * populated from the options of a ForeignServer (non-secret config) and + * its USER MAPPING (credentials) for the current user. + */ +RestCatalogConnectionInfo * +GetRestCatalogConnectionFromServer(const char *serverName) +{ + ForeignServer *server = GetForeignServerByName(serverName, false); + ForeignDataWrapper *fdw = GetForeignDataWrapper(server->fdwid); + + if (strcmp(fdw->fdwname, ICEBERG_CATALOG_FDW_NAME) != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("server \"%s\" does not use the iceberg_catalog foreign data wrapper", + serverName))); + + RestCatalogConnectionInfo *conn = palloc0(sizeof(RestCatalogConnectionInfo)); + + conn->serverName = pstrdup(serverName); + + /* Set defaults matching the GUC defaults */ + conn->host = NULL; + conn->oauthHostPath = ""; + conn->clientId = NULL; + conn->clientSecret = NULL; + conn->scope = "PRINCIPAL_ROLE:ALL"; + conn->authType = REST_CATALOG_AUTH_TYPE_DEFAULT; + conn->enableVendedCredentials = true; + + ListCell *lc; + + foreach(lc, server->options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "rest_endpoint") == 0) + conn->host = defGetString(def); + else if (strcmp(def->defname, "client_id") == 0) + conn->clientId = defGetString(def); + else if (strcmp(def->defname, "client_secret") == 0) + conn->clientSecret = defGetString(def); + else if (strcmp(def->defname, "scope") == 0) + conn->scope = defGetString(def); + else if (strcmp(def->defname, "rest_auth_type") == 0) + { + char *authType = defGetString(def); + + conn->authType = (strcmp(authType, "horizon") == 0) + ? REST_CATALOG_AUTH_TYPE_HORIZON + : REST_CATALOG_AUTH_TYPE_DEFAULT; + } + else if (strcmp(def->defname, "oauth_endpoint") == 0) + conn->oauthHostPath = defGetString(def); + else if (strcmp(def->defname, "enable_vended_credentials") == 0) + conn->enableVendedCredentials = defGetBoolean(def); + } + + if (conn->host == NULL) + ereport(ERROR, + (errcode(ERRCODE_FDW_OPTION_NAME_NOT_FOUND), + errmsg("\"rest_endpoint\" option is required for iceberg_catalog server \"%s\"", + serverName))); + + return conn; +} + + +/* + * GetRestCatalogConnectionForRelation returns the REST catalog connection + * info for the given relation. If the table uses catalog='rest', the + * connection is built from GUCs. Otherwise, the catalog option is treated + * as a server name and the connection is built from its options. + */ +RestCatalogConnectionInfo * +GetRestCatalogConnectionForRelation(Oid relationId) +{ + ForeignTable *foreignTable = GetForeignTable(relationId); + char *catalog = GetStringOption(foreignTable->options, "catalog", false); + + if (catalog == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("catalog option is not set for relation %u", relationId))); + + if (pg_strncasecmp(catalog, REST_CATALOG_NAME, strlen(catalog)) == 0) + return GetRestCatalogConnectionFromGUCs(); + + return GetRestCatalogConnectionFromServer(catalog); +} + + /* * StartStageRestCatalogIcebergTableCreate stages the creation of an iceberg table * in the rest catalog. On any failure, an error is raised. If the table exists, @@ -120,12 +341,14 @@ StartStageRestCatalogIcebergTableCreate(Oid relationId) const char *catalogName = GetRestCatalogName(relationId); const char *namespaceName = GetRestCatalogNamespace(relationId); + RestCatalogConnectionInfo *conn = GetRestCatalogConnectionForRelation(relationId); + char *postUrl = - psprintf(REST_CATALOG_TABLES, RestCatalogHost, + psprintf(REST_CATALOG_TABLES, conn->host, URLEncodePath(catalogName), URLEncodePath(namespaceName)); - List *headers = PostHeadersWithAuth(); + List *headers = PostHeadersWithAuth(conn); - if (RestCatalogEnableVendedCredentials) + if (conn->enableVendedCredentials) { char *vendedCreds = pstrdup("X-Iceberg-Access-Delegation: vended-credentials"); @@ -256,7 +479,7 @@ FinishStageRestCatalogIcebergTableCreateRestRequest(Oid relationId, DataFileSche * allowed locations as part of the namespace. */ void -RegisterNamespaceToRestCatalog(const char *catalogName, const char *namespaceName) +RegisterNamespaceToRestCatalog(RestCatalogConnectionInfo * conn, const char *catalogName, const char *namespaceName) { /* * First, we need to check if the namespace already exists in Rest Catalog @@ -264,9 +487,9 @@ RegisterNamespaceToRestCatalog(const char *catalogName, const char *namespaceNam */ char *getUrl = psprintf(REST_CATALOG_NAMESPACE_NAME, - RestCatalogHost, URLEncodePath(catalogName), + conn->host, URLEncodePath(catalogName), URLEncodePath(namespaceName)); - HttpResult httpResult = SendRequestToRestCatalog(HTTP_GET, getUrl, NULL, GetHeadersWithAuth()); + HttpResult httpResult = SendRequestToRestCatalog(HTTP_GET, getUrl, NULL, GetHeadersWithAuth(conn)); switch (httpResult.status) { @@ -281,7 +504,7 @@ RegisterNamespaceToRestCatalog(const char *catalogName, const char *namespaceNam /* * Does not exists, we'll create it. */ - CreateNamespaceOnRestCatalog(catalogName, namespaceName); + CreateNamespaceOnRestCatalog(conn, catalogName, namespaceName); break; } @@ -346,7 +569,7 @@ RegisterNamespaceToRestCatalog(const char *catalogName, const char *namespaceNam * namespace exists when creating a table in the given namespace. */ void -ErrorIfRestNamespaceDoesNotExist(const char *catalogName, const char *namespaceName) +ErrorIfRestNamespaceDoesNotExist(RestCatalogConnectionInfo * conn, const char *catalogName, const char *namespaceName) { /* * First, we need to check if the namespace already exists in Rest Catalog @@ -354,9 +577,9 @@ ErrorIfRestNamespaceDoesNotExist(const char *catalogName, const char *namespaceN */ char *getUrl = psprintf(REST_CATALOG_NAMESPACE_NAME, - RestCatalogHost, URLEncodePath(catalogName), + conn->host, URLEncodePath(catalogName), URLEncodePath(namespaceName)); - HttpResult httpResult = SendRequestToRestCatalog(HTTP_GET, getUrl, NULL, GetHeadersWithAuth()); + HttpResult httpResult = SendRequestToRestCatalog(HTTP_GET, getUrl, NULL, GetHeadersWithAuth(conn)); /* namespace not found */ @@ -389,7 +612,9 @@ GetMetadataLocationForRestCatalogForIcebergTable(Oid relationId) const char *relationName = GetRestCatalogTableName(relationId); const char *namespaceName = GetRestCatalogNamespace(relationId); - return GetMetadataLocationFromRestCatalog(restCatalogName, namespaceName, relationName); + RestCatalogConnectionInfo *conn = GetRestCatalogConnectionForRelation(relationId); + + return GetMetadataLocationFromRestCatalog(conn, restCatalogName, namespaceName, relationName); } @@ -397,13 +622,13 @@ GetMetadataLocationForRestCatalogForIcebergTable(Oid relationId) * Gets the metadata location for a relation from the external catalog. */ char * -GetMetadataLocationFromRestCatalog(const char *restCatalogName, const char *namespaceName, const char *relationName) +GetMetadataLocationFromRestCatalog(RestCatalogConnectionInfo * conn, const char *restCatalogName, const char *namespaceName, const char *relationName) { char *getUrl = psprintf(REST_CATALOG_TABLE, - RestCatalogHost, URLEncodePath(restCatalogName), URLEncodePath(namespaceName), URLEncodePath(relationName)); + conn->host, URLEncodePath(restCatalogName), URLEncodePath(namespaceName), URLEncodePath(relationName)); - List *headers = GetHeadersWithAuth(); + List *headers = GetHeadersWithAuth(conn); HttpResult hr = SendRequestToRestCatalog(HTTP_GET, getUrl, NULL, headers); if (hr.status != 200) @@ -425,7 +650,7 @@ GetMetadataLocationFromRestCatalog(const char *restCatalogName, const char *name * an error is raised. */ static void -CreateNamespaceOnRestCatalog(const char *catalogName, const char *namespaceName) +CreateNamespaceOnRestCatalog(RestCatalogConnectionInfo * conn, const char *catalogName, const char *namespaceName) { /* POST create */ StringInfoData body; @@ -449,10 +674,10 @@ CreateNamespaceOnRestCatalog(const char *catalogName, const char *namespaceName) appendStringInfoChar(&body, '}'); /* close body */ char *postUrl = - psprintf(REST_CATALOG_NAMESPACE, RestCatalogHost, + psprintf(REST_CATALOG_NAMESPACE, conn->host, URLEncodePath(catalogName)); - HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, postUrl, body.data, PostHeadersWithAuth()); + HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, postUrl, body.data, PostHeadersWithAuth(conn)); if (httpResult.status != 200) { @@ -464,11 +689,11 @@ CreateNamespaceOnRestCatalog(const char *catalogName, const char *namespaceName) * Creates the headers for a POST request with authentication. */ List * -PostHeadersWithAuth(void) +PostHeadersWithAuth(RestCatalogConnectionInfo * conn) { bool forceRefreshToken = false; - return list_make3(psprintf("Authorization: Bearer %s", GetRestCatalogAccessToken(forceRefreshToken)), + return list_make3(psprintf("Authorization: Bearer %s", GetRestCatalogAccessToken(conn, forceRefreshToken)), pstrdup("Accept: application/json"), pstrdup("Content-Type: application/json")); } @@ -479,11 +704,11 @@ PostHeadersWithAuth(void) * Creates the headers for a DELETE request with authentication. */ List * -DeleteHeadersWithAuth(void) +DeleteHeadersWithAuth(RestCatalogConnectionInfo * conn) { bool forceRefreshToken = false; - return list_make1(psprintf("Authorization: Bearer %s", GetRestCatalogAccessToken(forceRefreshToken))); + return list_make1(psprintf("Authorization: Bearer %s", GetRestCatalogAccessToken(conn, forceRefreshToken))); } @@ -492,11 +717,11 @@ DeleteHeadersWithAuth(void) * Creates the headers for a GET request with authentication. */ static List * -GetHeadersWithAuth(void) +GetHeadersWithAuth(RestCatalogConnectionInfo * conn) { bool forceRefreshToken = false; - return list_make2(psprintf("Authorization: Bearer %s", GetRestCatalogAccessToken(forceRefreshToken)), + return list_make2(psprintf("Authorization: Bearer %s", GetRestCatalogAccessToken(conn, forceRefreshToken)), pstrdup("Accept: application/json")); } @@ -538,12 +763,63 @@ ReportHTTPError(HttpResult httpResult, int level) /* -* Gets an access token from rest catalog using client credentials that are -* configured via GUC variables. Caches the token until it is about to expire. + * Build a cache key for the per-server token cache. Uses server name for + * server-based catalogs, or "GUC" for GUC-based backward-compatible mode. + */ +static void +BuildTokenCacheKey(char *key, const RestCatalogConnectionInfo *conn) +{ + strlcpy(key, + conn->serverName ? conn->serverName : "GUC", + TOKEN_CACHE_KEY_LEN); +} + + +/* + * Initialize the per-server token cache hash table if needed. + */ +static void +InitTokenCacheIfNeeded(void) +{ + if (RestCatalogTokenCache != NULL) + return; + + HASHCTL ctl; + + memset(&ctl, 0, sizeof(ctl)); + ctl.keysize = TOKEN_CACHE_KEY_LEN; + ctl.entrysize = sizeof(RestCatalogTokenCacheEntry); + ctl.hcxt = TopMemoryContext; + + RestCatalogTokenCache = hash_create("REST Catalog Token Cache", + 8, &ctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); +} + + +/* +* Gets an access token from rest catalog. Caches the token per server +* (keyed by host + clientId) until it is about to expire. */ static char * -GetRestCatalogAccessToken(bool forceRefreshToken) +GetRestCatalogAccessToken(RestCatalogConnectionInfo * conn, bool forceRefreshToken) { + InitTokenCacheIfNeeded(); + + char cacheKey[TOKEN_CACHE_KEY_LEN]; + + BuildTokenCacheKey(cacheKey, conn); + + bool found = false; + RestCatalogTokenCacheEntry *entry = + hash_search(RestCatalogTokenCache, cacheKey, HASH_ENTER, &found); + + if (!found) + { + entry->accessToken = NULL; + entry->accessTokenExpiry = 0; + } + /* * Calling initial time or token will expire in 1 minute, fetch a new * token. @@ -551,74 +827,72 @@ GetRestCatalogAccessToken(bool forceRefreshToken) TimestampTz now = GetCurrentTimestamp(); const int MINUTE_IN_MSECS = 60 * 1000; - if (forceRefreshToken || RestCatalogAccessTokenExpiry == 0 || - !TimestampDifferenceExceeds(now, RestCatalogAccessTokenExpiry, MINUTE_IN_MSECS)) + if (forceRefreshToken || entry->accessTokenExpiry == 0 || + !TimestampDifferenceExceeds(now, entry->accessTokenExpiry, MINUTE_IN_MSECS)) { - if (RestCatalogAccessToken) + if (entry->accessToken) { - pfree(RestCatalogAccessToken); - RestCatalogAccessToken = NULL; + pfree(entry->accessToken); + entry->accessToken = NULL; } char *accessToken = NULL; int expiresIn = 0; - FetchRestCatalogAccessToken(&accessToken, &expiresIn); + FetchRestCatalogAccessToken(conn, &accessToken, &expiresIn); - RestCatalogAccessToken = MemoryContextStrdup(TopMemoryContext, accessToken); - RestCatalogAccessTokenExpiry = now + (int64_t) expiresIn * 1000000; /* expiresIn is in - * seconds */ + entry->accessToken = MemoryContextStrdup(TopMemoryContext, accessToken); + entry->accessTokenExpiry = now + (int64_t) expiresIn * 1000000; /* expiresIn is in + * seconds */ } - Assert(RestCatalogAccessToken != NULL); + Assert(entry->accessToken != NULL); - return RestCatalogAccessToken; + return entry->accessToken; } /* -* Fetches an access token from rest catalog using client credentials that are -* configured via GUC variables. +* Fetches an access token from rest catalog using the given connection info. */ static void -FetchRestCatalogAccessToken(char **accessToken, int *expiresIn) +FetchRestCatalogAccessToken(RestCatalogConnectionInfo * conn, char **accessToken, int *expiresIn) { - if (!RestCatalogHost || !*RestCatalogHost) - ereport(ERROR, (errmsg("pg_lake_iceberg.rest_catalog_host should be set"))); - if (!RestCatalogClientSecret || !*RestCatalogClientSecret) - ereport(ERROR, (errmsg("pg_lake_iceberg.rest_catalog_client_secret should be set"))); + if (!conn->host || !*conn->host) + ereport(ERROR, (errmsg("REST catalog host is not configured"))); + if (!conn->clientSecret || !*conn->clientSecret) + ereport(ERROR, (errmsg("REST catalog client_secret is not configured"))); - char *accessTokenUrl = RestCatalogOauthHostPath; + char *accessTokenUrl = conn->oauthHostPath; /* - * if pg_lake_iceberg.rest_catalog_oauth_host_path is not set, use - * Polaris' default oauth token endpoint + * if oauthHostPath is not set, use Polaris' default oauth token endpoint */ - if (*accessTokenUrl == '\0') - accessTokenUrl = psprintf(REST_CATALOG_AUTH_TOKEN_PATH, RestCatalogHost); + if (!accessTokenUrl || *accessTokenUrl == '\0') + accessTokenUrl = psprintf(REST_CATALOG_AUTH_TOKEN_PATH, conn->host); /* Form-encoded body */ StringInfoData body; initStringInfo(&body); appendStringInfo(&body, "grant_type=client_credentials&scope=%s", - URLEncodePath(RestCatalogScope)); + URLEncodePath(conn->scope)); /* Headers */ List *headers = NIL; - if (RestCatalogAuthType == REST_CATALOG_AUTH_TYPE_HORIZON) + if (conn->authType == REST_CATALOG_AUTH_TYPE_HORIZON) { /* Put secret in body (ignore client ID) */ - appendStringInfo(&body, "&client_secret=%s", URLEncodePath(RestCatalogClientSecret)); + appendStringInfo(&body, "&client_secret=%s", URLEncodePath(conn->clientSecret)); } else { - if (!RestCatalogClientId || !*RestCatalogClientId) - ereport(ERROR, (errmsg("pg_lake_iceberg.rest_catalog_client_id should be set"))); + if (!conn->clientId || !*conn->clientId) + ereport(ERROR, (errmsg("REST catalog client_id is not configured"))); /* Build Authorization: Basic */ - char *encodedAuth = EncodeBasicAuth(RestCatalogClientId, RestCatalogClientSecret); + char *encodedAuth = EncodeBasicAuth(conn->clientId, conn->clientSecret); char *authHeader = psprintf("Authorization: Basic %s", encodedAuth); headers = lappend(headers, authHeader); diff --git a/pg_lake_iceberg/src/test/rest_catalog.c b/pg_lake_iceberg/src/test/rest_catalog.c index 791ceb2a..7ad8ffa3 100644 --- a/pg_lake_iceberg/src/test/rest_catalog.c +++ b/pg_lake_iceberg/src/test/rest_catalog.c @@ -37,6 +37,8 @@ register_namespace_to_rest_catalog(PG_FUNCTION_ARGS) char *catalogName = text_to_cstring(PG_GETARG_TEXT_P(0)); char *namespaceName = text_to_cstring(PG_GETARG_TEXT_P(1)); - RegisterNamespaceToRestCatalog(catalogName, namespaceName); + RestCatalogConnectionInfo *conn = GetRestCatalogConnectionFromGUCs(); + + RegisterNamespaceToRestCatalog(conn, catalogName, namespaceName); PG_RETURN_VOID(); } From d80b5741f5c7f0fb7215828c601dd42448ed06be Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Mon, 2 Mar 2026 15:58:04 +0300 Subject: [PATCH 02/23] Recognize server-based REST catalogs in catalog type detection Update IsServerBasedRestCatalog and HasRestCatalogTableOption to check whether a catalog option value refers to an iceberg_catalog foreign server. Servers without an explicit TYPE default to rest. Signed-off-by: sfc-gh-npuka --- .../include/pg_lake/util/catalog_type.h | 1 + pg_lake_engine/src/utils/catalog_type.c | 54 +++++++++++++++++-- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/pg_lake_engine/include/pg_lake/util/catalog_type.h b/pg_lake_engine/include/pg_lake/util/catalog_type.h index 2db12898..e33cda0d 100644 --- a/pg_lake_engine/include/pg_lake/util/catalog_type.h +++ b/pg_lake_engine/include/pg_lake/util/catalog_type.h @@ -61,3 +61,4 @@ extern PGDLLEXPORT IcebergCatalogType GetIcebergCatalogType(Oid relationId); extern PGDLLEXPORT bool HasRestCatalogTableOption(List *options); extern PGDLLEXPORT bool HasObjectStoreCatalogTableOption(List *options); extern PGDLLEXPORT bool HasReadOnlyOption(List *options); +extern PGDLLEXPORT bool IsServerBasedRestCatalog(List *options); diff --git a/pg_lake_engine/src/utils/catalog_type.c b/pg_lake_engine/src/utils/catalog_type.c index cbf678ac..18bb31b9 100644 --- a/pg_lake_engine/src/utils/catalog_type.c +++ b/pg_lake_engine/src/utils/catalog_type.c @@ -68,15 +68,22 @@ GetIcebergCatalogType(Oid relationId) /* - * HasRestCatalogTableOption returns true if the options contain - * catalog='rest'. + * HasRestCatalogTableOption returns true if the catalog option indicates a + * REST catalog: either the literal value 'rest' or the name of an + * iceberg_catalog foreign server with TYPE 'rest'. */ bool HasRestCatalogTableOption(List *options) { char *catalog = GetStringOption(options, "catalog", false); - return catalog ? pg_strncasecmp(catalog, REST_CATALOG_NAME, strlen(catalog)) == 0 : false; + if (catalog == NULL) + return false; + + if (pg_strncasecmp(catalog, REST_CATALOG_NAME, strlen(catalog)) == 0) + return true; + + return IsServerBasedRestCatalog(options); } @@ -104,3 +111,44 @@ HasReadOnlyOption(List *options) return readOnly ? pg_strncasecmp(readOnly, "true", strlen("true")) == 0 : false; } + + +/* + * IsServerBasedRestCatalog returns true if the catalog option refers to a + * ForeignServer created with the iceberg_catalog FDW whose TYPE is 'rest'. + * Returns false if the catalog value is a known literal ('rest', + * 'object_store', 'postgres') or if no matching server is found. + */ +bool +IsServerBasedRestCatalog(List *options) +{ + char *catalog = GetStringOption(options, "catalog", false); + + if (catalog == NULL) + return false; + + /* Skip known literal catalog names */ + if (pg_strncasecmp(catalog, REST_CATALOG_NAME, strlen(REST_CATALOG_NAME)) == 0 || + pg_strncasecmp(catalog, OBJECT_STORE_CATALOG_NAME, strlen(OBJECT_STORE_CATALOG_NAME)) == 0 || + pg_strncasecmp(catalog, POSTGRES_CATALOG_NAME, strlen(POSTGRES_CATALOG_NAME)) == 0) + return false; + + /* Try to look up a server with this name */ + bool missingOK = true; + ForeignServer *server = GetForeignServerByName(catalog, missingOK); + + if (server == NULL) + return false; + + ForeignDataWrapper *fdw = GetForeignDataWrapper(server->fdwid); + + if (strcmp(fdw->fdwname, "iceberg_catalog") != 0) + return false; + + /* Check server TYPE if set */ + if (server->servertype != NULL && *server->servertype != '\0') + return pg_strncasecmp(server->servertype, "rest", strlen("rest")) == 0; + + /* No TYPE specified, assume rest */ + return true; +} From 354c8e64b604bf62141921bee27136b5a1fafa0d Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Mon, 2 Mar 2026 15:58:10 +0300 Subject: [PATCH 03/23] Support server-based catalogs in option validation and table creation Update pg_lake_iceberg_validator to accept server names as valid catalog option values (in addition to the existing postgres/object_store/rest literals). Update ProcessCreateIcebergTableFromForeignTableStmt to resolve REST catalog connection info from the named server when the catalog option refers to an iceberg_catalog server. Signed-off-by: sfc-gh-npuka --- pg_lake_table/src/ddl/create_table.c | 22 +++++++++++++++++++--- pg_lake_table/src/fdw/option.c | 19 +++++++++++++++---- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/pg_lake_table/src/ddl/create_table.c b/pg_lake_table/src/ddl/create_table.c index 510cd7d4..3aebf5f0 100644 --- a/pg_lake_table/src/ddl/create_table.c +++ b/pg_lake_table/src/ddl/create_table.c @@ -734,10 +734,18 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) if (hasRestCatalogOption && hasExternalCatalogReadOnlyOption) { - ErrorIfRestNamespaceDoesNotExist(catalogName, catalogNamespace); + char *catalogOptionValue = GetStringOption(createStmt->options, "catalog", false); + RestCatalogConnectionInfo *conn; + + if (pg_strncasecmp(catalogOptionValue, REST_CATALOG_NAME, strlen(catalogOptionValue)) == 0) + conn = GetRestCatalogConnectionFromGUCs(); + else + conn = GetRestCatalogConnectionFromServer(catalogOptionValue); + + ErrorIfRestNamespaceDoesNotExist(conn, catalogName, catalogNamespace); metadataLocation = - GetMetadataLocationFromRestCatalog(catalogName, catalogNamespace, catalogTableName); + GetMetadataLocationFromRestCatalog(conn, catalogName, catalogNamespace, catalogTableName); } else if (hasObjectStoreCatalogOption && hasExternalCatalogReadOnlyOption) { @@ -942,7 +950,15 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) * database name. We normally encode that in GetRestCatalogName() * etc., but here we need to do it early before the table is created. */ - RegisterNamespaceToRestCatalog(get_database_name(MyDatabaseId), + char *catalogOptionValue = GetStringOption(createStmt->options, "catalog", false); + RestCatalogConnectionInfo *conn; + + if (pg_strncasecmp(catalogOptionValue, REST_CATALOG_NAME, strlen(catalogOptionValue)) == 0) + conn = GetRestCatalogConnectionFromGUCs(); + else + conn = GetRestCatalogConnectionFromServer(catalogOptionValue); + + RegisterNamespaceToRestCatalog(conn, get_database_name(MyDatabaseId), get_namespace_name(namespaceId)); } diff --git a/pg_lake_table/src/fdw/option.c b/pg_lake_table/src/fdw/option.c index fca5c637..f4478dbc 100644 --- a/pg_lake_table/src/fdw/option.c +++ b/pg_lake_table/src/fdw/option.c @@ -33,6 +33,7 @@ #include "catalog/pg_foreign_table.h" #include "commands/defrem.h" #include "commands/extension.h" +#include "foreign/foreign.h" #include "pg_lake/iceberg/catalog.h" #include "pg_lake/partitioning/partition_by_parser.h" #include "pg_lake/permissions/roles.h" @@ -790,10 +791,20 @@ pg_lake_iceberg_validator(PG_FUNCTION_ARGS) else if (pg_strncasecmp(icebergCatalogName, POSTGRES_CATALOG_NAME, strlen(icebergCatalogName)) == 0) icebergCatalogType = POSTGRES_CATALOG; else - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("invalid catalog option: %s", icebergCatalogName), - errdetail("Only " REST_CATALOG_NAME " and " POSTGRES_CATALOG_NAME " are supported for now."))); + { + /* + * Check if the catalog value refers to an iceberg_catalog + * server. If so, treat it as a REST catalog. + */ + if (IsServerBasedRestCatalog(options_list)) + icebergCatalogType = REST_CATALOG_READ_ONLY; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid catalog option: %s", icebergCatalogName), + errdetail("Use \"rest\", \"object_store\", \"postgres\", " + "or the name of an iceberg_catalog server."))); + } } else if (catalog == ForeignTableRelationId && strcmp(def->defname, "read_only") == 0) { From e7e5f58a9f23f18b2adba3904c8b8fb239aa6138 Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Mon, 2 Mar 2026 15:58:17 +0300 Subject: [PATCH 04/23] Track per-table REST catalog connections in transaction handling Refactor transaction-level REST catalog request tracking to resolve and store a RestCatalogConnectionInfo per table. Batch commit requests are now grouped by REST catalog host, so tables using different catalog servers within the same transaction are committed to the correct endpoints independently. Signed-off-by: sfc-gh-npuka --- .../track_iceberg_metadata_changes.c | 173 ++++++++++-------- 1 file changed, 101 insertions(+), 72 deletions(-) diff --git a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c index 2c3e0655..bfaeec16 100644 --- a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c +++ b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c @@ -76,6 +76,9 @@ typedef struct RestCatalogRequestPerTable char *tableRestUrl; char *tableIdentifier; + /* Per-table REST catalog connection info for multi-server support */ + RestCatalogConnectionInfo *conn; + RestCatalogRequest *createTableRequest; RestCatalogRequest *dropTableRequest; @@ -283,7 +286,7 @@ PostAllRestCatalogRequests(void) { HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, requestPerTable->tableRestUrl, - createTableRequest->body, PostHeadersWithAuth()); + createTableRequest->body, PostHeadersWithAuth(requestPerTable->conn)); if (httpResult.status != 200) { @@ -299,7 +302,7 @@ PostAllRestCatalogRequests(void) { HttpResult httpResult = SendRequestToRestCatalog(HTTP_DELETE, requestPerTable->tableRestUrl, - NULL, DeleteHeadersWithAuth()); + NULL, DeleteHeadersWithAuth(requestPerTable->conn)); if (httpResult.status != 204) { @@ -320,16 +323,14 @@ PostAllRestCatalogRequests(void) /* * Now that all create table requests have been posted, we can post all - * the other modifications. All table modifications are sent in a single - * HTTP request to ensure atomicity. + * the other modifications. We group modifications by REST catalog server + * (identified by host) so that each server gets its own transaction + * commit request for atomicity. + * + * We do two passes: first collect tables that have modifications, then + * group by server host and send one batch per server. */ - char *catalogName = NULL; - bool hasRestCatalogChanges = false; - StringInfo batchRequestBody = makeStringInfo(); - - appendStringInfo(batchRequestBody, "{"); /* start msg body */ - appendJsonKey(batchRequestBody, "table-changes"); - appendStringInfo(batchRequestBody, "["); /* start array of changes */ + List *tablesWithModifications = NIL; hash_seq_init(&status, RestCatalogRequestsHash); @@ -337,93 +338,102 @@ PostAllRestCatalogRequests(void) { if (!requestPerTable->isValid) { - /* - * Might only happen if an OOM happened during adding this request - * to the hash table. - */ elog(WARNING, "Skipping invalid REST catalog request for relation %u", requestPerTable->relationId); continue; } - /* TODO: can we ever have multiple catalogs? */ - catalogName = requestPerTable->catalogName; - if (requestPerTable->createTableRequest != NULL && requestPerTable->dropTableRequest != NULL) - { - /* - * table is created and dropped in the same transaction, nothing - * post to do for this table to the REST catalog. - */ continue; - } - else if (requestPerTable->tableModifyRequests == NIL) - { - /* - * no modifications to send for this table - */ + + if (requestPerTable->tableModifyRequests == NIL) continue; - } - if (hasRestCatalogChanges) - { - appendStringInfoChar(batchRequestBody, ','); /* separate previous - * table change */ - } + tablesWithModifications = lappend(tablesWithModifications, requestPerTable); + } - appendStringInfoChar(batchRequestBody, '{'); /* start per-table json - * object */ - appendJsonKey(batchRequestBody, "identifier"); - appendStringInfo(batchRequestBody, "%s", requestPerTable->tableIdentifier); - appendStringInfoChar(batchRequestBody, ','); - appendStringInfoString(batchRequestBody, "\"requirements\":[],"); - appendStringInfoString(batchRequestBody, " \"updates\":["); + /* + * Group by server host and send one batch per server. For each table, + * find if we already started a batch for its server host, otherwise + * start a new one. + */ + while (list_length(tablesWithModifications) > 0) + { + RestCatalogRequestPerTable *firstTable = + (RestCatalogRequestPerTable *) linitial(tablesWithModifications); - ListCell *requestCell = NULL; + char *batchHost = firstTable->conn->host; + char *catalogName = firstTable->catalogName; + RestCatalogConnectionInfo *batchConn = firstTable->conn; + bool hasChanges = false; + StringInfo batchRequestBody = makeStringInfo(); - foreach(requestCell, requestPerTable->tableModifyRequests) - { - RestCatalogRequest *request = (RestCatalogRequest *) lfirst(requestCell); + appendStringInfoChar(batchRequestBody, '{'); + appendJsonKey(batchRequestBody, "table-changes"); + appendStringInfoChar(batchRequestBody, '['); - appendStringInfoString(batchRequestBody, request->body); + List *remaining = NIL; + ListCell *lc; - bool lastRequest = (requestCell == list_tail(requestPerTable->tableModifyRequests)); + foreach(lc, tablesWithModifications) + { + requestPerTable = (RestCatalogRequestPerTable *) lfirst(lc); - if (!lastRequest) + if (strcmp(requestPerTable->conn->host, batchHost) != 0) { - appendStringInfoChar(batchRequestBody, ','); + remaining = lappend(remaining, requestPerTable); + continue; } - if (message_level_is_interesting(DEBUG2)) + if (hasChanges) + appendStringInfoChar(batchRequestBody, ','); + + appendStringInfoChar(batchRequestBody, '{'); + appendJsonKey(batchRequestBody, "identifier"); + appendStringInfo(batchRequestBody, "%s", requestPerTable->tableIdentifier); + appendStringInfoChar(batchRequestBody, ','); + appendStringInfoString(batchRequestBody, "\"requirements\":[],"); + appendStringInfoString(batchRequestBody, " \"updates\":["); + + ListCell *requestCell = NULL; + + foreach(requestCell, requestPerTable->tableModifyRequests) { - elog(DEBUG2, "REST Catalog Request Body size reached: %d bytes", - batchRequestBody->len); - } - } + RestCatalogRequest *request = (RestCatalogRequest *) lfirst(requestCell); - appendStringInfoChar(batchRequestBody, ']'); /* close updates array */ - appendStringInfoChar(batchRequestBody, '}'); /* close per-table json - * object */ + appendStringInfoString(batchRequestBody, request->body); - /* - * We have at least one change to send for this table - */ - hasRestCatalogChanges = true; - } + if (requestCell != list_tail(requestPerTable->tableModifyRequests)) + appendStringInfoChar(batchRequestBody, ','); - if (hasRestCatalogChanges) - { - appendStringInfoChar(batchRequestBody, ']'); /* close table-changes */ - appendStringInfoChar(batchRequestBody, '}'); /* close json body */ + if (message_level_is_interesting(DEBUG2)) + { + elog(DEBUG2, "REST Catalog Request Body size reached: %d bytes", + batchRequestBody->len); + } + } - char *url = psprintf(REST_CATALOG_TRANSACTION_COMMIT, RestCatalogHost, catalogName); - HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, url, batchRequestBody->data, PostHeadersWithAuth()); + appendStringInfoChar(batchRequestBody, ']'); + appendStringInfoChar(batchRequestBody, '}'); + hasChanges = true; + } - if (httpResult.status != 204) + if (hasChanges) { - ReportHTTPError(httpResult, WARNING); + appendStringInfoChar(batchRequestBody, ']'); + appendStringInfoChar(batchRequestBody, '}'); + + char *url = psprintf(REST_CATALOG_TRANSACTION_COMMIT, batchConn->host, catalogName); + HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, url, batchRequestBody->data, PostHeadersWithAuth(batchConn)); + + if (httpResult.status != 204) + { + ReportHTTPError(httpResult, WARNING); + } } + + tablesWithModifications = remaining; } /* @@ -612,6 +622,25 @@ RecordRestCatalogRequestInTx(Oid relationId, RestCatalogOperationType operationT memset(requestPerTable, 0, sizeof(RestCatalogRequestPerTable)); requestPerTable->relationId = relationId; + /* Resolve per-table REST catalog connection info */ + RestCatalogConnectionInfo *conn = GetRestCatalogConnectionForRelation(relationId); + RestCatalogConnectionInfo *persistConn = + MemoryContextAlloc(TopTransactionContext, sizeof(RestCatalogConnectionInfo)); + + memcpy(persistConn, conn, sizeof(RestCatalogConnectionInfo)); + if (conn->serverName) + persistConn->serverName = MemoryContextStrdup(TopTransactionContext, conn->serverName); + persistConn->host = MemoryContextStrdup(TopTransactionContext, conn->host); + if (conn->oauthHostPath) + persistConn->oauthHostPath = MemoryContextStrdup(TopTransactionContext, conn->oauthHostPath); + if (conn->clientId) + persistConn->clientId = MemoryContextStrdup(TopTransactionContext, conn->clientId); + if (conn->clientSecret) + persistConn->clientSecret = MemoryContextStrdup(TopTransactionContext, conn->clientSecret); + if (conn->scope) + persistConn->scope = MemoryContextStrdup(TopTransactionContext, conn->scope); + requestPerTable->conn = persistConn; + requestPerTable->catalogName = MemoryContextStrdup(TopTransactionContext, GetRestCatalogName(relationId)); requestPerTable->catalogNamespace = @@ -628,7 +657,7 @@ RecordRestCatalogRequestInTx(Oid relationId, RestCatalogOperationType operationT requestPerTable->tableRestUrl = MemoryContextStrdup(TopTransactionContext, psprintf(REST_CATALOG_TABLE, - RestCatalogHost, + persistConn->host, requestPerTable->urlEncodedCatalogName, requestPerTable->urlEncodedCatalogNamespace, requestPerTable->urlEncodedCatalogTableName)); From 573cc935fe1546370a44dfedad4a245d3313d1dc Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Mon, 2 Mar 2026 15:58:23 +0300 Subject: [PATCH 05/23] Add tests for iceberg_catalog server configuration Comprehensive test suite covering: - FDW existence and pre-created postgres/object_store servers - CREATE SERVER with valid and invalid options - Foreign table creation on handler-less FDW (query-time error) - ALTER/DROP SERVER operations - Server-based catalog references in CREATE TABLE - Backward compatibility with catalog='rest'/'postgres'/'object_store' Signed-off-by: sfc-gh-npuka --- .../pytests/test_iceberg_catalog_server.py | 489 ++++++++++++++++++ 1 file changed, 489 insertions(+) create mode 100644 pg_lake_table/tests/pytests/test_iceberg_catalog_server.py diff --git a/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py new file mode 100644 index 00000000..f049e8f6 --- /dev/null +++ b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py @@ -0,0 +1,489 @@ +import pytest +from utils_pytest import * + + +# ── FDW and pre-created servers ──────────────────────────────────────────── + + +def test_iceberg_catalog_fdw_exists(pg_conn, extension): + """The iceberg_catalog FDW should be created by the extension.""" + result = run_query( + "SELECT fdwname FROM pg_foreign_data_wrapper WHERE fdwname = 'iceberg_catalog'", + pg_conn, + ) + assert len(result) == 1 + assert result[0]["fdwname"] == "iceberg_catalog" + + +def test_iceberg_catalog_fdw_has_no_handler(pg_conn, extension): + """iceberg_catalog is configuration-only, so it should have no handler.""" + result = run_query( + "SELECT fdwhandler FROM pg_foreign_data_wrapper WHERE fdwname = 'iceberg_catalog'", + pg_conn, + ) + assert result[0]["fdwhandler"] == 0 + + +def test_precreated_postgres_server(pg_conn, extension): + """A 'postgres' server of TYPE 'postgres' should be pre-created.""" + result = run_query( + "SELECT srvname, srvtype FROM pg_foreign_server WHERE srvname = 'postgres'", + pg_conn, + ) + assert len(result) == 1 + assert result[0]["srvname"] == "postgres" + assert result[0]["srvtype"] == "postgres" + + +def test_precreated_object_store_server(pg_conn, extension): + """An 'object_store' server of TYPE 'object_store' should be pre-created.""" + result = run_query( + "SELECT srvname, srvtype FROM pg_foreign_server WHERE srvname = 'object_store'", + pg_conn, + ) + assert len(result) == 1 + assert result[0]["srvname"] == "object_store" + assert result[0]["srvtype"] == "object_store" + + +# ── CREATE SERVER with valid options ─────────────────────────────────────── + + +def test_create_rest_server_with_all_options(superuser_conn, extension): + """All documented options should be accepted for a REST-type server.""" + run_command( + """ + CREATE SERVER test_rest_all_opts TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS ( + rest_endpoint 'http://localhost:8181', + rest_auth_type 'default', + oauth_endpoint 'http://localhost:8181/oauth/tokens', + scope 'PRINCIPAL_ROLE:ALL', + enable_vended_credentials 'true', + location_prefix 's3://bucket/prefix', + catalog_name 'my_catalog', + client_id 'test-id', + client_secret 'test-secret' + ) + """, + superuser_conn, + ) + superuser_conn.rollback() + + +def test_create_rest_server_minimal(superuser_conn, extension): + """A server with just rest_endpoint should be accepted.""" + run_command( + """ + CREATE SERVER test_rest_minimal TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + superuser_conn.rollback() + + +def test_create_server_without_type(superuser_conn, extension): + """A server without TYPE should be accepted (defaults to rest).""" + run_command( + """ + CREATE SERVER test_no_type + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + superuser_conn.rollback() + + +def test_create_server_horizon_auth(superuser_conn, extension): + """Horizon auth type should be accepted.""" + run_command( + """ + CREATE SERVER test_horizon TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS ( + rest_endpoint 'https://horizon.example.com', + rest_auth_type 'horizon', + client_secret 'secret' + ) + """, + superuser_conn, + ) + superuser_conn.rollback() + + +# ── CREATE SERVER with invalid options ───────────────────────────────────── + + +def test_reject_unknown_server_option(superuser_conn, extension): + """Unknown options should be rejected by the validator.""" + err = run_command( + """ + CREATE SERVER test_bad_opt TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181', bogus_option 'x') + """, + superuser_conn, + raise_error=False, + ) + assert "invalid option" in str(err) + assert "bogus_option" in str(err) + superuser_conn.rollback() + + +def test_reject_invalid_auth_type(superuser_conn, extension): + """Only 'default' and 'horizon' are valid for rest_auth_type.""" + err = run_command( + """ + CREATE SERVER test_bad_auth TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181', rest_auth_type 'oauth2') + """, + superuser_conn, + raise_error=False, + ) + assert "invalid rest_auth_type" in str(err) + superuser_conn.rollback() + + +def test_reject_invalid_vended_creds(superuser_conn, extension): + """enable_vended_credentials must be a valid boolean.""" + err = run_command( + """ + CREATE SERVER test_bad_bool TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181', enable_vended_credentials 'maybe') + """, + superuser_conn, + raise_error=False, + ) + assert err is not None + superuser_conn.rollback() + + +def test_reject_options_on_non_server(superuser_conn, extension): + """Options on the FDW itself should be rejected.""" + err = run_command( + """ + ALTER FOREIGN DATA WRAPPER iceberg_catalog OPTIONS (ADD rest_endpoint 'http://x') + """, + superuser_conn, + raise_error=False, + ) + assert "only valid for SERVER" in str(err) + superuser_conn.rollback() + + +# ── Creating foreign tables on iceberg_catalog should fail ───────────────── + + +def test_cannot_query_foreign_table_on_catalog_server(superuser_conn, extension): + """iceberg_catalog has no handler, so querying a foreign table should fail. + + PostgreSQL allows CREATE FOREIGN TABLE on a handler-less FDW; the error + only surfaces at query time when GetFdwRoutineByServerId() is called. + """ + run_command( + """ + CREATE SERVER test_ft_server TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + + run_command( + """ + CREATE FOREIGN TABLE test_ft_table (id int) + SERVER test_ft_server + """, + superuser_conn, + ) + + err = run_command( + "SELECT * FROM test_ft_table", + superuser_conn, + raise_error=False, + ) + assert "has no handler" in str(err) + superuser_conn.rollback() + + +# ── ALTER SERVER ─────────────────────────────────────────────────────────── + + +def test_alter_server_add_option(superuser_conn, extension): + """ALTER SERVER should allow adding new options.""" + run_command( + """ + CREATE SERVER test_alter TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + + run_command( + """ + ALTER SERVER test_alter OPTIONS (ADD scope 'PRINCIPAL_ROLE:ADMIN') + """, + superuser_conn, + ) + + result = run_query( + """ + SELECT srvoptions FROM pg_foreign_server WHERE srvname = 'test_alter' + """, + superuser_conn, + ) + opts = result[0]["srvoptions"] + assert "scope=PRINCIPAL_ROLE:ADMIN" in opts + superuser_conn.rollback() + + +def test_alter_server_set_option(superuser_conn, extension): + """ALTER SERVER should allow changing existing options.""" + run_command( + """ + CREATE SERVER test_alter_set TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + + run_command( + """ + ALTER SERVER test_alter_set OPTIONS (SET rest_endpoint 'http://new-host:8181') + """, + superuser_conn, + ) + + result = run_query( + """ + SELECT srvoptions FROM pg_foreign_server WHERE srvname = 'test_alter_set' + """, + superuser_conn, + ) + opts = result[0]["srvoptions"] + assert "rest_endpoint=http://new-host:8181" in opts + superuser_conn.rollback() + + +def test_alter_server_reject_unknown_option(superuser_conn, extension): + """ALTER SERVER should reject unknown options.""" + run_command( + """ + CREATE SERVER test_alter_bad TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + + err = run_command( + """ + ALTER SERVER test_alter_bad OPTIONS (ADD unknown_opt 'x') + """, + superuser_conn, + raise_error=False, + ) + assert "invalid option" in str(err) + superuser_conn.rollback() + + +# ── DROP SERVER ──────────────────────────────────────────────────────────── + + +def test_drop_server(superuser_conn, extension): + """DROP SERVER should work for iceberg_catalog servers.""" + run_command( + """ + CREATE SERVER test_drop_srv TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + + run_command("DROP SERVER test_drop_srv", superuser_conn) + + result = run_query( + "SELECT count(*) FROM pg_foreign_server WHERE srvname = 'test_drop_srv'", + superuser_conn, + ) + assert result[0]["count"] == 0 + superuser_conn.rollback() + + +# ── Using a server-based catalog in CREATE TABLE ─────────────────────────── + + +def test_create_table_with_server_catalog( + pg_conn, superuser_conn, s3, extension, with_default_location +): + """CREATE TABLE ... USING iceberg WITH (catalog = '') should + recognize the catalog option as a server-based REST catalog.""" + run_command( + """ + CREATE SERVER test_srv_catalog TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS ( + rest_endpoint 'http://localhost:8181', + client_id 'id', + client_secret 'secret' + ) + """, + superuser_conn, + ) + superuser_conn.commit() + + err = run_command( + """ + CREATE TABLE test_srv_tbl () + USING iceberg + WITH (catalog = 'test_srv_catalog', read_only = 'true', + catalog_namespace = 'ns', catalog_table_name = 'tbl') + """, + pg_conn, + raise_error=False, + ) + # The REST endpoint is fake, so we expect a connection error, NOT a + # "invalid catalog option" error. This proves the server was resolved. + assert err is not None + assert "invalid catalog option" not in str(err) + pg_conn.rollback() + + run_command("DROP SERVER test_srv_catalog CASCADE", superuser_conn) + superuser_conn.commit() + + +def test_invalid_catalog_name_errors(pg_conn, s3, extension, with_default_location): + """A catalog name that is neither a known literal nor a valid server should error.""" + err = run_command( + """ + CREATE TABLE test_bad_cat () + USING iceberg + WITH (catalog = 'nonexistent_server', read_only = 'true') + """, + pg_conn, + raise_error=False, + ) + assert "invalid catalog option" in str(err) + pg_conn.rollback() + + +def test_non_iceberg_catalog_server_rejected( + pg_conn, superuser_conn, s3, extension, with_default_location +): + """A foreign server not under iceberg_catalog FDW should not be accepted + as a catalog value.""" + err = run_command( + """ + CREATE TABLE test_wrong_fdw () + USING iceberg + WITH (catalog = 'pg_lake_iceberg', read_only = 'true') + """, + pg_conn, + raise_error=False, + ) + assert "invalid catalog option" in str(err) + pg_conn.rollback() + + +def test_server_without_type_treated_as_rest( + pg_conn, superuser_conn, s3, extension, with_default_location +): + """A server without explicit TYPE should default to rest catalog behavior.""" + run_command( + """ + CREATE SERVER test_no_type_srv + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS ( + rest_endpoint 'http://localhost:8181', + client_id 'id', + client_secret 'secret' + ) + """, + superuser_conn, + ) + superuser_conn.commit() + + err = run_command( + """ + CREATE TABLE test_no_type_tbl () + USING iceberg + WITH (catalog = 'test_no_type_srv', read_only = 'true', + catalog_namespace = 'ns', catalog_table_name = 'tbl') + """, + pg_conn, + raise_error=False, + ) + # Should be treated as REST, not rejected as invalid catalog + assert "invalid catalog option" not in str(err) + pg_conn.rollback() + + run_command("DROP SERVER test_no_type_srv CASCADE", superuser_conn) + superuser_conn.commit() + + +# ── Backward compatibility ───────────────────────────────────────────────── + + +def test_catalog_rest_literal_still_works( + pg_conn, s3, extension, with_default_location +): + """catalog='rest' (literal) should still work via GUC fallback.""" + err = run_command( + """ + CREATE TABLE test_rest_literal () + USING iceberg + WITH (catalog = 'rest', read_only = 'true', + catalog_namespace = 'ns', catalog_table_name = 'tbl') + """, + pg_conn, + raise_error=False, + ) + # Will fail because REST GUCs aren't configured, but should NOT fail + # with "invalid catalog option" + if err is not None: + assert "invalid catalog option" not in str(err) + pg_conn.rollback() + + +def test_catalog_postgres_literal_still_works( + pg_conn, s3, extension, with_default_location +): + """catalog='postgres' (literal) should still work.""" + run_command( + """ + CREATE TABLE test_pg_literal (id int) + USING iceberg + WITH (catalog = 'postgres') + """, + pg_conn, + ) + pg_conn.rollback() + + +def test_catalog_object_store_literal_still_works( + pg_conn, + superuser_conn, + s3, + extension, + with_default_location, + adjust_object_store_settings, +): + """catalog='object_store' (literal) should still work.""" + run_command( + """ + CREATE TABLE test_os_literal (id int) + USING iceberg + WITH (catalog = 'object_store') + """, + pg_conn, + ) + pg_conn.rollback() From 4fd8ce5b7716cce37367c6dbb96ede6e5b6f6e8a Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Thu, 12 Mar 2026 12:27:09 +0300 Subject: [PATCH 06/23] Add ProtectExtensionCatalogServersHandler 1 - Add extension owned rest catalog of type 'rest' 2 - Disallow creation of further 'postgres' or 'object_store' type catalogs unless we are creating_extension 3 - Disallow renaming/dropping extension owned catalogs. 4 - Disallow altering the options of extension-owned postgres and object_store catalogs. Signed-off-by: sfc-gh-npuka --- .../pg_lake/rest_catalog/rest_catalog.h | 4 + pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql | 4 + pg_lake_iceberg/src/init.c | 2 + .../src/rest_catalog/rest_catalog.c | 123 +++++++++++++ .../pytests/test_iceberg_catalog_server.py | 172 ++++++++++++++++++ 5 files changed, 305 insertions(+) diff --git a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h index f3b345da..89c55db7 100644 --- a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h +++ b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h @@ -18,6 +18,7 @@ #pragma once #include "postgres.h" +#include "pg_lake/ddl/utility_hook.h" #include "pg_lake/http/http_client.h" #include "pg_lake/util/rel_utils.h" #include "pg_lake/parquet/field.h" @@ -124,3 +125,6 @@ extern PGDLLEXPORT RestCatalogRequest * GetSetCurrentSchemaCatalogRequest(Oid re extern PGDLLEXPORT RestCatalogRequest * GetAddPartitionCatalogRequest(Oid relationId, List *partitionSpec); extern PGDLLEXPORT RestCatalogRequest * GetSetPartitionDefaultIdCatalogRequest(Oid relationId, int specId); extern PGDLLEXPORT RestCatalogRequest * GetRemoveSnapshotCatalogRequest(List *removedSnapshotIds, Oid relationId); + +/* ProcessUtility handler: protects extension-owned catalog servers */ +extern PGDLLEXPORT bool ProtectExtensionCatalogServersHandler(ProcessUtilityParams *processUtilityParams, void *arg); diff --git a/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql b/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql index aba9d22d..2e229943 100644 --- a/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql +++ b/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql @@ -47,3 +47,7 @@ CREATE SERVER postgres CREATE SERVER object_store TYPE 'object_store' FOREIGN DATA WRAPPER iceberg_catalog; + +CREATE SERVER rest + TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog; diff --git a/pg_lake_iceberg/src/init.c b/pg_lake_iceberg/src/init.c index 63972e7b..0c25493f 100644 --- a/pg_lake_iceberg/src/init.c +++ b/pg_lake_iceberg/src/init.c @@ -329,6 +329,8 @@ _PG_init(void) NULL, NULL, NULL); AvroInit(); + + RegisterUtilityStatementHandler(ProtectExtensionCatalogServersHandler, NULL); } diff --git a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c index 186bb66e..be389806 100644 --- a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c +++ b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c @@ -25,6 +25,7 @@ #include "common/base64.h" #include "commands/dbcommands.h" #include "commands/defrem.h" +#include "commands/extension.h" #include "foreign/foreign.h" #include "fmgr.h" #include "lib/stringinfo.h" @@ -185,6 +186,128 @@ iceberg_catalog_validator(PG_FUNCTION_ARGS) } +/* + * IsIcebergCatalogServer returns true if the named server exists and + * uses the iceberg_catalog FDW. + */ +static bool +IsIcebergCatalogServer(const char *serverName) +{ + ForeignServer *server = GetForeignServerByName(serverName, true); + + if (server == NULL) + return false; + + ForeignDataWrapper *fdw = GetForeignDataWrapper(server->fdwid); + + return strcmp(fdw->fdwname, ICEBERG_CATALOG_FDW_NAME) == 0; +} + + +/* + * ProtectExtensionCatalogServersHandler guards the extension-owned + * iceberg_catalog servers (postgres, object_store, rest) against + * unauthorized DDL. + * + * Rules (outside of CREATE/ALTER EXTENSION): + * - CREATE SERVER with TYPE 'postgres' or 'object_store' is blocked. + * - ALTER SERVER on 'postgres' or 'object_store' is blocked. + * - ALTER SERVER on 'rest' is allowed (users may set options). + * - DROP SERVER on 'postgres', 'object_store', or 'rest' is blocked. + * - ALTER ... RENAME on 'postgres', 'object_store', or 'rest' is blocked. + */ +bool +ProtectExtensionCatalogServersHandler(ProcessUtilityParams *processUtilityParams, + void *arg) +{ + Node *parsetree = processUtilityParams->plannedStmt->utilityStmt; + + if (creating_extension) + return false; + + if (IsA(parsetree, CreateForeignServerStmt)) + { + CreateForeignServerStmt *stmt = (CreateForeignServerStmt *) parsetree; + + if (stmt->fdwname == NULL || + strcmp(stmt->fdwname, ICEBERG_CATALOG_FDW_NAME) != 0) + return false; + + if (stmt->servertype != NULL && + (pg_strcasecmp(stmt->servertype, POSTGRES_CATALOG_NAME) == 0 || + pg_strcasecmp(stmt->servertype, OBJECT_STORE_CATALOG_NAME) == 0)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot create iceberg_catalog server with TYPE '%s'", + stmt->servertype), + errhint("Use the pre-created \"%s\" or \"%s\" server, " + "or create a server of type 'rest'.", + POSTGRES_CATALOG_NAME, OBJECT_STORE_CATALOG_NAME))); + } + else if (IsA(parsetree, AlterForeignServerStmt)) + { + AlterForeignServerStmt *stmt = (AlterForeignServerStmt *) parsetree; + + if (!IsIcebergCatalogServer(stmt->servername)) + return false; + + if (pg_strcasecmp(stmt->servername, POSTGRES_CATALOG_NAME) == 0 || + pg_strcasecmp(stmt->servername, OBJECT_STORE_CATALOG_NAME) == 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter the extension-owned \"%s\" catalog server", + stmt->servername))); + } + else if (IsA(parsetree, DropStmt)) + { + DropStmt *stmt = (DropStmt *) parsetree; + + if (stmt->removeType != OBJECT_FOREIGN_SERVER) + return false; + + ListCell *lc; + + foreach(lc, stmt->objects) + { + char *serverName = strVal(lfirst(lc)); + + if (!IsIcebergCatalogServer(serverName)) + continue; + + if (pg_strcasecmp(serverName, POSTGRES_CATALOG_NAME) == 0 || + pg_strcasecmp(serverName, OBJECT_STORE_CATALOG_NAME) == 0 || + pg_strcasecmp(serverName, REST_CATALOG_NAME) == 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot drop the extension-owned \"%s\" catalog server", + serverName))); + } + } + else if (IsA(parsetree, RenameStmt)) + { + RenameStmt *stmt = (RenameStmt *) parsetree; + + if (stmt->renameType != OBJECT_FOREIGN_SERVER) + return false; + + char *serverName = strVal(stmt->object); + + if (!IsIcebergCatalogServer(serverName)) + return false; + + if (pg_strcasecmp(serverName, POSTGRES_CATALOG_NAME) == 0 || + pg_strcasecmp(serverName, OBJECT_STORE_CATALOG_NAME) == 0 || + pg_strcasecmp(serverName, REST_CATALOG_NAME) == 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot rename the extension-owned \"%s\" catalog server", + serverName))); + } + + return false; +} + + /* * GetRestCatalogConnectionFromGUCs returns a RestCatalogConnectionInfo * populated from the current GUC variables. Used for backward-compatible diff --git a/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py index f049e8f6..1380c3e5 100644 --- a/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py +++ b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py @@ -487,3 +487,175 @@ def test_catalog_object_store_literal_still_works( pg_conn, ) pg_conn.rollback() + + +# ── Protection of extension-owned catalog servers ───────────────────────── + + +def test_reject_create_server_type_postgres(superuser_conn, extension): + """Users cannot create a new server with TYPE 'postgres'.""" + err = run_command( + """ + CREATE SERVER my_postgres TYPE 'postgres' + FOREIGN DATA WRAPPER iceberg_catalog + """, + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "cannot create iceberg_catalog server with TYPE 'postgres'" in str(err) + superuser_conn.rollback() + + +def test_reject_create_server_type_object_store(superuser_conn, extension): + """Users cannot create a new server with TYPE 'object_store'.""" + err = run_command( + """ + CREATE SERVER my_obj_store TYPE 'object_store' + FOREIGN DATA WRAPPER iceberg_catalog + """, + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "cannot create iceberg_catalog server with TYPE 'object_store'" in str(err) + superuser_conn.rollback() + + +def test_reject_alter_postgres_server(superuser_conn, extension): + """ALTER SERVER on the extension-owned 'postgres' server is blocked.""" + err = run_command( + "ALTER SERVER postgres OPTIONS (ADD location_prefix 's3://bucket')", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert 'cannot alter the extension-owned "postgres" catalog server' in str(err) + superuser_conn.rollback() + + +def test_reject_alter_object_store_server(superuser_conn, extension): + """ALTER SERVER on the extension-owned 'object_store' server is blocked.""" + err = run_command( + "ALTER SERVER object_store OPTIONS (ADD location_prefix 's3://bucket')", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert 'cannot alter the extension-owned "object_store" catalog server' in str(err) + superuser_conn.rollback() + + +def test_allow_alter_rest_server(superuser_conn, extension): + """ALTER SERVER on the extension-owned 'rest' server is allowed.""" + run_command( + "ALTER SERVER rest OPTIONS (ADD rest_endpoint 'http://localhost:8181')", + superuser_conn, + ) + run_command( + "ALTER SERVER rest OPTIONS (DROP rest_endpoint)", + superuser_conn, + ) + superuser_conn.rollback() + + +def test_reject_drop_postgres_server(superuser_conn, extension): + """DROP SERVER on the extension-owned 'postgres' server is blocked.""" + err = run_command( + "DROP SERVER postgres", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert 'cannot drop the extension-owned "postgres" catalog server' in str(err) + superuser_conn.rollback() + + +def test_reject_drop_object_store_server(superuser_conn, extension): + """DROP SERVER on the extension-owned 'object_store' server is blocked.""" + err = run_command( + "DROP SERVER object_store", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert 'cannot drop the extension-owned "object_store" catalog server' in str(err) + superuser_conn.rollback() + + +def test_reject_drop_rest_server(superuser_conn, extension): + """DROP SERVER on the extension-owned 'rest' server is blocked.""" + err = run_command( + "DROP SERVER rest", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert 'cannot drop the extension-owned "rest" catalog server' in str(err) + superuser_conn.rollback() + + +def test_reject_rename_postgres_server(superuser_conn, extension): + """RENAME on the extension-owned 'postgres' server is blocked.""" + err = run_command( + "ALTER SERVER postgres RENAME TO my_postgres", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert 'cannot rename the extension-owned "postgres" catalog server' in str(err) + superuser_conn.rollback() + + +def test_reject_rename_object_store_server(superuser_conn, extension): + """RENAME on the extension-owned 'object_store' server is blocked.""" + err = run_command( + "ALTER SERVER object_store RENAME TO my_obj_store", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert 'cannot rename the extension-owned "object_store" catalog server' in str(err) + superuser_conn.rollback() + + +def test_reject_rename_rest_server(superuser_conn, extension): + """RENAME on the extension-owned 'rest' server is blocked.""" + err = run_command( + "ALTER SERVER rest RENAME TO my_rest", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert 'cannot rename the extension-owned "rest" catalog server' in str(err) + superuser_conn.rollback() + + +def test_allow_drop_user_created_server(superuser_conn, extension): + """DROP SERVER on a user-created server should work fine.""" + run_command( + """ + CREATE SERVER user_rest_srv TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + run_command("DROP SERVER user_rest_srv", superuser_conn) + superuser_conn.rollback() + + +def test_allow_rename_user_created_server(superuser_conn, extension): + """RENAME on a user-created server should work fine.""" + run_command( + """ + CREATE SERVER user_rename_srv TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + run_command( + "ALTER SERVER user_rename_srv RENAME TO user_renamed_srv", superuser_conn + ) + superuser_conn.rollback() From f2b393ddc0562c89fc28a66e15d01904f5f05a9d Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Thu, 12 Mar 2026 13:30:23 +0300 Subject: [PATCH 07/23] Address review - Add helper functions: - IsRestCatalogOwnedByExtension ('rest' name) - IsCatalogOwnedByExtension ('postgres', 'object_store', 'rest') - Rename IsServerBasedRestCatalog to IsRestCatalogOwnedByUsers - Use servername per server token cache - Add tests where we modify tables from different catalogs(rest_endpoints) in the same transaction. - Keep some comments I accidentally removed. Signed-off-by: sfc-gh-npuka --- .../include/pg_lake/util/catalog_type.h | 4 +- pg_lake_engine/src/utils/catalog_type.c | 41 ++++-- .../src/rest_catalog/rest_catalog.c | 23 ++-- pg_lake_table/src/ddl/create_table.c | 4 +- pg_lake_table/src/fdw/option.c | 2 +- .../track_iceberg_metadata_changes.c | 18 ++- .../pytests/test_modify_iceberg_rest_table.py | 129 ++++++++++++++++++ 7 files changed, 190 insertions(+), 31 deletions(-) diff --git a/pg_lake_engine/include/pg_lake/util/catalog_type.h b/pg_lake_engine/include/pg_lake/util/catalog_type.h index e33cda0d..375dcfda 100644 --- a/pg_lake_engine/include/pg_lake/util/catalog_type.h +++ b/pg_lake_engine/include/pg_lake/util/catalog_type.h @@ -61,4 +61,6 @@ extern PGDLLEXPORT IcebergCatalogType GetIcebergCatalogType(Oid relationId); extern PGDLLEXPORT bool HasRestCatalogTableOption(List *options); extern PGDLLEXPORT bool HasObjectStoreCatalogTableOption(List *options); extern PGDLLEXPORT bool HasReadOnlyOption(List *options); -extern PGDLLEXPORT bool IsServerBasedRestCatalog(List *options); +extern PGDLLEXPORT bool IsCatalogOwnedByExtension(const char *catalog); +extern PGDLLEXPORT bool IsRestCatalogOwnedByExtension(const char *catalog); +extern PGDLLEXPORT bool IsRestCatalogOwnedByUsers(List *options); diff --git a/pg_lake_engine/src/utils/catalog_type.c b/pg_lake_engine/src/utils/catalog_type.c index 18bb31b9..5d795ebd 100644 --- a/pg_lake_engine/src/utils/catalog_type.c +++ b/pg_lake_engine/src/utils/catalog_type.c @@ -80,10 +80,10 @@ HasRestCatalogTableOption(List *options) if (catalog == NULL) return false; - if (pg_strncasecmp(catalog, REST_CATALOG_NAME, strlen(catalog)) == 0) + if (IsRestCatalogOwnedByExtension(catalog)) return true; - return IsServerBasedRestCatalog(options); + return IsRestCatalogOwnedByUsers(options); } @@ -114,23 +114,44 @@ HasReadOnlyOption(List *options) /* - * IsServerBasedRestCatalog returns true if the catalog option refers to a - * ForeignServer created with the iceberg_catalog FDW whose TYPE is 'rest'. - * Returns false if the catalog value is a known literal ('rest', + * IsRestCatalogOwnedByExtension returns true if the catalog name matches + * the extension-owned 'rest' catalog literal. + */ +bool +IsRestCatalogOwnedByExtension(const char *catalog) +{ + return pg_strncasecmp(catalog, REST_CATALOG_NAME, strlen(REST_CATALOG_NAME)) == 0; +} + + +/* + * IsCatalogOwnedByExtension returns true if the catalog name is one of the + * extension-owned literals: 'rest', 'object_store', or 'postgres'. + */ +bool +IsCatalogOwnedByExtension(const char *catalog) +{ + return IsRestCatalogOwnedByExtension(catalog) || + pg_strncasecmp(catalog, OBJECT_STORE_CATALOG_NAME, strlen(OBJECT_STORE_CATALOG_NAME)) == 0 || + pg_strncasecmp(catalog, POSTGRES_CATALOG_NAME, strlen(POSTGRES_CATALOG_NAME)) == 0; +} + + +/* + * IsRestCatalogOwnedByUsers returns true if the catalog option refers to a + * ForeignServer created by the user with the iceberg_catalog FDW whose TYPE is 'rest'. + * Returns false if the catalog is owned by the extension ('rest', * 'object_store', 'postgres') or if no matching server is found. */ bool -IsServerBasedRestCatalog(List *options) +IsRestCatalogOwnedByUsers(List *options) { char *catalog = GetStringOption(options, "catalog", false); if (catalog == NULL) return false; - /* Skip known literal catalog names */ - if (pg_strncasecmp(catalog, REST_CATALOG_NAME, strlen(REST_CATALOG_NAME)) == 0 || - pg_strncasecmp(catalog, OBJECT_STORE_CATALOG_NAME, strlen(OBJECT_STORE_CATALOG_NAME)) == 0 || - pg_strncasecmp(catalog, POSTGRES_CATALOG_NAME, strlen(POSTGRES_CATALOG_NAME)) == 0) + if (IsCatalogOwnedByExtension(catalog)) return false; /* Try to look up a server with this name */ diff --git a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c index be389806..87d73271 100644 --- a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c +++ b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c @@ -60,8 +60,7 @@ int RestCatalogAuthType = REST_CATALOG_AUTH_TYPE_DEFAULT; bool RestCatalogEnableVendedCredentials = true; /* - * Per-server token cache. Keyed by server name (for server-based catalogs) - * or "GUC" (for GUC-based backward-compatible catalog='rest'). + * Per-server token cache. Keyed by server name. */ #define TOKEN_CACHE_KEY_LEN NAMEDATALEN @@ -274,9 +273,7 @@ ProtectExtensionCatalogServersHandler(ProcessUtilityParams *processUtilityParams if (!IsIcebergCatalogServer(serverName)) continue; - if (pg_strcasecmp(serverName, POSTGRES_CATALOG_NAME) == 0 || - pg_strcasecmp(serverName, OBJECT_STORE_CATALOG_NAME) == 0 || - pg_strcasecmp(serverName, REST_CATALOG_NAME) == 0) + if (IsCatalogOwnedByExtension(serverName)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot drop the extension-owned \"%s\" catalog server", @@ -295,9 +292,7 @@ ProtectExtensionCatalogServersHandler(ProcessUtilityParams *processUtilityParams if (!IsIcebergCatalogServer(serverName)) return false; - if (pg_strcasecmp(serverName, POSTGRES_CATALOG_NAME) == 0 || - pg_strcasecmp(serverName, OBJECT_STORE_CATALOG_NAME) == 0 || - pg_strcasecmp(serverName, REST_CATALOG_NAME) == 0) + if (IsCatalogOwnedByExtension(serverName)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot rename the extension-owned \"%s\" catalog server", @@ -318,7 +313,7 @@ GetRestCatalogConnectionFromGUCs(void) { RestCatalogConnectionInfo *conn = palloc0(sizeof(RestCatalogConnectionInfo)); - conn->serverName = NULL; + conn->serverName = REST_CATALOG_NAME; conn->host = RestCatalogHost; conn->oauthHostPath = RestCatalogOauthHostPath; conn->clientId = RestCatalogClientId; @@ -416,7 +411,7 @@ GetRestCatalogConnectionForRelation(Oid relationId) (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("catalog option is not set for relation %u", relationId))); - if (pg_strncasecmp(catalog, REST_CATALOG_NAME, strlen(catalog)) == 0) + if (IsRestCatalogOwnedByExtension(catalog)) return GetRestCatalogConnectionFromGUCs(); return GetRestCatalogConnectionFromServer(catalog); @@ -886,15 +881,13 @@ ReportHTTPError(HttpResult httpResult, int level) /* - * Build a cache key for the per-server token cache. Uses server name for - * server-based catalogs, or "GUC" for GUC-based backward-compatible mode. + * Build a cache key for the per-server token cache. */ static void BuildTokenCacheKey(char *key, const RestCatalogConnectionInfo *conn) { - strlcpy(key, - conn->serverName ? conn->serverName : "GUC", - TOKEN_CACHE_KEY_LEN); + Assert(conn->serverName != NULL); + strlcpy(key, conn->serverName, TOKEN_CACHE_KEY_LEN); } diff --git a/pg_lake_table/src/ddl/create_table.c b/pg_lake_table/src/ddl/create_table.c index 3aebf5f0..210b445a 100644 --- a/pg_lake_table/src/ddl/create_table.c +++ b/pg_lake_table/src/ddl/create_table.c @@ -737,7 +737,7 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) char *catalogOptionValue = GetStringOption(createStmt->options, "catalog", false); RestCatalogConnectionInfo *conn; - if (pg_strncasecmp(catalogOptionValue, REST_CATALOG_NAME, strlen(catalogOptionValue)) == 0) + if (IsRestCatalogOwnedByExtension(catalogOptionValue)) conn = GetRestCatalogConnectionFromGUCs(); else conn = GetRestCatalogConnectionFromServer(catalogOptionValue); @@ -953,7 +953,7 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) char *catalogOptionValue = GetStringOption(createStmt->options, "catalog", false); RestCatalogConnectionInfo *conn; - if (pg_strncasecmp(catalogOptionValue, REST_CATALOG_NAME, strlen(catalogOptionValue)) == 0) + if (IsRestCatalogOwnedByExtension(catalogOptionValue)) conn = GetRestCatalogConnectionFromGUCs(); else conn = GetRestCatalogConnectionFromServer(catalogOptionValue); diff --git a/pg_lake_table/src/fdw/option.c b/pg_lake_table/src/fdw/option.c index f4478dbc..195595d0 100644 --- a/pg_lake_table/src/fdw/option.c +++ b/pg_lake_table/src/fdw/option.c @@ -796,7 +796,7 @@ pg_lake_iceberg_validator(PG_FUNCTION_ARGS) * Check if the catalog value refers to an iceberg_catalog * server. If so, treat it as a REST catalog. */ - if (IsServerBasedRestCatalog(options_list)) + if (IsRestCatalogOwnedByUsers(options_list)) icebergCatalogType = REST_CATALOG_READ_ONLY; else ereport(ERROR, diff --git a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c index bfaeec16..87aee6dc 100644 --- a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c +++ b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c @@ -338,6 +338,10 @@ PostAllRestCatalogRequests(void) { if (!requestPerTable->isValid) { + /* + * Might only happen if an OOM happened during adding this request + * to the hash table. + */ elog(WARNING, "Skipping invalid REST catalog request for relation %u", requestPerTable->relationId); continue; @@ -345,10 +349,20 @@ PostAllRestCatalogRequests(void) if (requestPerTable->createTableRequest != NULL && requestPerTable->dropTableRequest != NULL) + { + /* + * table is created and dropped in the same transaction, nothing + * post to do for this table to the REST catalog. + */ continue; - - if (requestPerTable->tableModifyRequests == NIL) + } + else if (requestPerTable->tableModifyRequests == NIL) + { + /* + * no modifications to send for this table + */ continue; + } tablesWithModifications = lappend(tablesWithModifications, requestPerTable); } diff --git a/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py b/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py index 5789b2ea..62ad6ee6 100644 --- a/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py +++ b/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py @@ -565,3 +565,132 @@ def get_rest_table_metadata_location(encoded_namespace, encoded_table_name, pg_c status, json_str, headers = res[0] metadata = json.loads(json_str) return metadata["metadata"]["location"] + + +def test_multi_table_different_rest_catalog_hosts_in_single_transaction( + installcheck, + superuser_conn, + pg_conn, + s3, + extension, + with_default_location, + polaris_session, + create_http_helper_functions, +): + """ + Tables from two REST catalog servers with different hosts are modified + in the same transaction. PostAllRestCatalogRequests groups modifications + by conn->host, so using 'localhost' vs '127.0.0.1' (same Polaris, different + host strings) produces two separate batch commit requests. + """ + if installcheck: + return + + server_a = "multi_host_catalog_a" + server_b = "multi_host_catalog_b" + table_a = "multi_host_tx_a" + table_b = "multi_host_tx_b" + ns = TABLE_NAMESPACE + "_multi_host" + + _create_polaris_catalog_server(superuser_conn, server_a, "localhost") + _create_polaris_catalog_server(superuser_conn, server_b, "127.0.0.1") + superuser_conn.commit() + + run_command(f"CREATE SCHEMA IF NOT EXISTS {ns}", pg_conn) + pg_conn.commit() + + run_command( + f"CREATE TABLE {ns}.{table_a} (id bigint, value text) USING iceberg WITH (catalog='{server_a}')", + pg_conn, + ) + pg_conn.commit() + + run_command( + f"CREATE TABLE {ns}.{table_b} (id bigint, value text) USING iceberg WITH (catalog='{server_b}')", + pg_conn, + ) + pg_conn.commit() + + # Insert into both tables (different hosts) within a single transaction + run_command( + f"INSERT INTO {ns}.{table_a} SELECT i, 'a' FROM generate_series(1, 50) i", + pg_conn, + ) + run_command( + f"INSERT INTO {ns}.{table_b} SELECT i, 'b' FROM generate_series(1, 30) i", + pg_conn, + ) + pg_conn.commit() + + results_a = run_query(f"SELECT count(*) FROM {ns}.{table_a}", pg_conn) + assert results_a[0][0] == 50 + + results_b = run_query(f"SELECT count(*) FROM {ns}.{table_b}", pg_conn) + assert results_b[0][0] == 30 + + # Mixed DML across different hosts in a single transaction + run_command( + f"INSERT INTO {ns}.{table_a} SELECT i, 'a2' FROM generate_series(51, 70) i", + pg_conn, + ) + run_command( + f"DELETE FROM {ns}.{table_b} WHERE id <= 10", + pg_conn, + ) + pg_conn.commit() + + results_a = run_query(f"SELECT count(*) FROM {ns}.{table_a}", pg_conn) + assert results_a[0][0] == 70 + + results_b = run_query(f"SELECT count(*) FROM {ns}.{table_b}", pg_conn) + assert results_b[0][0] == 20 + + # UPDATE on both hosts in a single transaction + run_command( + f"UPDATE {ns}.{table_a} SET value = 'updated_a' WHERE id <= 5", + pg_conn, + ) + run_command( + f"UPDATE {ns}.{table_b} SET value = 'updated_b' WHERE id > 20", + pg_conn, + ) + pg_conn.commit() + + results_a = run_query( + f"SELECT count(*) FROM {ns}.{table_a} WHERE value = 'updated_a'", pg_conn + ) + assert results_a[0][0] == 5 + + results_b = run_query( + f"SELECT count(*) FROM {ns}.{table_b} WHERE value = 'updated_b'", pg_conn + ) + assert results_b[0][0] == 10 + + # Cleanup + pg_conn.rollback() + run_command(f"DROP SCHEMA {ns} CASCADE", pg_conn) + pg_conn.commit() + run_command(f"DROP SERVER {server_a}", superuser_conn) + run_command(f"DROP SERVER {server_b}", superuser_conn) + superuser_conn.commit() + + +def _create_polaris_catalog_server(conn, server_name, hostname): + """Create an iceberg_catalog server pointing to the Polaris instance via the given hostname.""" + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://{hostname}:{server_params.POLARIS_PORT}" + + run_command( + f""" + CREATE SERVER {server_name} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS ( + rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}' + ) + """, + conn, + ) From 3b5228af25315e2bc6d324fb4e821d2f4c089f62 Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Thu, 12 Mar 2026 14:16:19 +0300 Subject: [PATCH 08/23] Allow extension owned rest catalog to alter server options This means any type 'rest' server (extension or user-owned) will use GUC options as default, and change any option to the ones set on the server. This eliminates the need of the function IsRestCatalogOwnedByUsers Instead we just have IsRestCatalog function. This also eliminates the need of GetRestCatalogConnectionFromGUCs Instead we just have GetRestCatalogConnectionFromServer - this function uses GUC values as defaults for all type 'rest' servers. Signed-off-by: sfc-gh-npuka --- .../include/pg_lake/util/catalog_type.h | 3 +- pg_lake_engine/src/utils/catalog_type.c | 36 +++-------- .../pg_lake/rest_catalog/rest_catalog.h | 10 ++-- .../src/rest_catalog/rest_catalog.c | 60 +++++++------------ pg_lake_iceberg/src/test/rest_catalog.c | 2 +- pg_lake_table/src/ddl/create_table.c | 16 ++--- pg_lake_table/src/fdw/option.c | 26 ++------ 7 files changed, 44 insertions(+), 109 deletions(-) diff --git a/pg_lake_engine/include/pg_lake/util/catalog_type.h b/pg_lake_engine/include/pg_lake/util/catalog_type.h index 375dcfda..0606e413 100644 --- a/pg_lake_engine/include/pg_lake/util/catalog_type.h +++ b/pg_lake_engine/include/pg_lake/util/catalog_type.h @@ -62,5 +62,4 @@ extern PGDLLEXPORT bool HasRestCatalogTableOption(List *options); extern PGDLLEXPORT bool HasObjectStoreCatalogTableOption(List *options); extern PGDLLEXPORT bool HasReadOnlyOption(List *options); extern PGDLLEXPORT bool IsCatalogOwnedByExtension(const char *catalog); -extern PGDLLEXPORT bool IsRestCatalogOwnedByExtension(const char *catalog); -extern PGDLLEXPORT bool IsRestCatalogOwnedByUsers(List *options); +extern PGDLLEXPORT bool IsRestCatalog(const char *catalog); diff --git a/pg_lake_engine/src/utils/catalog_type.c b/pg_lake_engine/src/utils/catalog_type.c index 5d795ebd..930c8576 100644 --- a/pg_lake_engine/src/utils/catalog_type.c +++ b/pg_lake_engine/src/utils/catalog_type.c @@ -77,13 +77,7 @@ HasRestCatalogTableOption(List *options) { char *catalog = GetStringOption(options, "catalog", false); - if (catalog == NULL) - return false; - - if (IsRestCatalogOwnedByExtension(catalog)) - return true; - - return IsRestCatalogOwnedByUsers(options); + return IsRestCatalog(catalog); } @@ -113,17 +107,6 @@ HasReadOnlyOption(List *options) } -/* - * IsRestCatalogOwnedByExtension returns true if the catalog name matches - * the extension-owned 'rest' catalog literal. - */ -bool -IsRestCatalogOwnedByExtension(const char *catalog) -{ - return pg_strncasecmp(catalog, REST_CATALOG_NAME, strlen(REST_CATALOG_NAME)) == 0; -} - - /* * IsCatalogOwnedByExtension returns true if the catalog name is one of the * extension-owned literals: 'rest', 'object_store', or 'postgres'. @@ -131,28 +114,25 @@ IsRestCatalogOwnedByExtension(const char *catalog) bool IsCatalogOwnedByExtension(const char *catalog) { - return IsRestCatalogOwnedByExtension(catalog) || + return pg_strncasecmp(catalog, REST_CATALOG_NAME, strlen(REST_CATALOG_NAME)) == 0 || pg_strncasecmp(catalog, OBJECT_STORE_CATALOG_NAME, strlen(OBJECT_STORE_CATALOG_NAME)) == 0 || pg_strncasecmp(catalog, POSTGRES_CATALOG_NAME, strlen(POSTGRES_CATALOG_NAME)) == 0; } /* - * IsRestCatalogOwnedByUsers returns true if the catalog option refers to a - * ForeignServer created by the user with the iceberg_catalog FDW whose TYPE is 'rest'. - * Returns false if the catalog is owned by the extension ('rest', - * 'object_store', 'postgres') or if no matching server is found. + * IsRestCatalog returns true if the catalog name identifies a REST catalog. + * This includes the extension-owned 'rest' literal and any user-created + * iceberg_catalog server whose TYPE is 'rest' (or omitted, defaulting to 'rest'). */ bool -IsRestCatalogOwnedByUsers(List *options) +IsRestCatalog(const char *catalog) { - char *catalog = GetStringOption(options, "catalog", false); - if (catalog == NULL) return false; - if (IsCatalogOwnedByExtension(catalog)) - return false; + if (pg_strncasecmp(catalog, REST_CATALOG_NAME, strlen(REST_CATALOG_NAME)) == 0) + return true; /* Try to look up a server with this name */ bool missingOK = true; diff --git a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h index 89c55db7..9eb4a5ab 100644 --- a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h +++ b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h @@ -36,14 +36,13 @@ extern int RestCatalogAuthType; extern bool RestCatalogEnableVendedCredentials; /* - * Holds per-server REST catalog connection settings. Can be populated from - * GUCs (for backward-compatible catalog='rest') or from a ForeignServer - * created via CREATE SERVER ... FOREIGN DATA WRAPPER iceberg_catalog. + * Holds per-server REST catalog connection settings. Populated from the + * server options of an iceberg_catalog ForeignServer, with GUC fallback + * for any option not explicitly set on the server. */ typedef struct RestCatalogConnectionInfo { - char *serverName; /* server name for cache keying, NULL for - * GUC-based */ + char *serverName; /* server name, used for token cache keying */ char *host; char *oauthHostPath; char *clientId; @@ -97,7 +96,6 @@ typedef struct RestCatalogRequest #define GET_REST_CATALOG_METADATA_LOCATION "%s/api/catalog/v1/%s/namespaces/%s/tables/%s" /* Connection info resolution */ -extern PGDLLEXPORT RestCatalogConnectionInfo * GetRestCatalogConnectionFromGUCs(void); extern PGDLLEXPORT RestCatalogConnectionInfo * GetRestCatalogConnectionFromServer(const char *serverName); extern PGDLLEXPORT RestCatalogConnectionInfo * GetRestCatalogConnectionForRelation(Oid relationId); diff --git a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c index 87d73271..e98bba3b 100644 --- a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c +++ b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c @@ -303,37 +303,20 @@ ProtectExtensionCatalogServersHandler(ProcessUtilityParams *processUtilityParams } -/* - * GetRestCatalogConnectionFromGUCs returns a RestCatalogConnectionInfo - * populated from the current GUC variables. Used for backward-compatible - * catalog='rest' tables. - */ -RestCatalogConnectionInfo * -GetRestCatalogConnectionFromGUCs(void) -{ - RestCatalogConnectionInfo *conn = palloc0(sizeof(RestCatalogConnectionInfo)); - - conn->serverName = REST_CATALOG_NAME; - conn->host = RestCatalogHost; - conn->oauthHostPath = RestCatalogOauthHostPath; - conn->clientId = RestCatalogClientId; - conn->clientSecret = RestCatalogClientSecret; - conn->scope = RestCatalogScope; - conn->authType = RestCatalogAuthType; - conn->enableVendedCredentials = RestCatalogEnableVendedCredentials; - - return conn; -} - - /* * GetRestCatalogConnectionFromServer returns a RestCatalogConnectionInfo - * populated from the options of a ForeignServer (non-secret config) and - * its USER MAPPING (credentials) for the current user. + * populated from the options of the named ForeignServer. GUC values are + * used as defaults; any option explicitly set on the server overrides the + * corresponding GUC. This applies to both the extension-owned 'rest' + * server and user-created iceberg_catalog servers. */ RestCatalogConnectionInfo * GetRestCatalogConnectionFromServer(const char *serverName) { + /* Normalize case-insensitive match to the canonical pre-created name */ + if (pg_strcasecmp(serverName, REST_CATALOG_NAME) == 0) + serverName = REST_CATALOG_NAME; + ForeignServer *server = GetForeignServerByName(serverName, false); ForeignDataWrapper *fdw = GetForeignDataWrapper(server->fdwid); @@ -347,14 +330,14 @@ GetRestCatalogConnectionFromServer(const char *serverName) conn->serverName = pstrdup(serverName); - /* Set defaults matching the GUC defaults */ - conn->host = NULL; - conn->oauthHostPath = ""; - conn->clientId = NULL; - conn->clientSecret = NULL; - conn->scope = "PRINCIPAL_ROLE:ALL"; - conn->authType = REST_CATALOG_AUTH_TYPE_DEFAULT; - conn->enableVendedCredentials = true; + /* GUC values serve as defaults; server options override below */ + conn->host = RestCatalogHost; + conn->oauthHostPath = RestCatalogOauthHostPath; + conn->clientId = RestCatalogClientId; + conn->clientSecret = RestCatalogClientSecret; + conn->scope = RestCatalogScope; + conn->authType = RestCatalogAuthType; + conn->enableVendedCredentials = RestCatalogEnableVendedCredentials; ListCell *lc; @@ -384,7 +367,7 @@ GetRestCatalogConnectionFromServer(const char *serverName) conn->enableVendedCredentials = defGetBoolean(def); } - if (conn->host == NULL) + if (conn->host == NULL || conn->host[0] == '\0') ereport(ERROR, (errcode(ERRCODE_FDW_OPTION_NAME_NOT_FOUND), errmsg("\"rest_endpoint\" option is required for iceberg_catalog server \"%s\"", @@ -396,9 +379,9 @@ GetRestCatalogConnectionFromServer(const char *serverName) /* * GetRestCatalogConnectionForRelation returns the REST catalog connection - * info for the given relation. If the table uses catalog='rest', the - * connection is built from GUCs. Otherwise, the catalog option is treated - * as a server name and the connection is built from its options. + * info for the given relation. The catalog option value is used as the + * server name. For the extension-owned 'rest' server and user-created + * servers alike, server options are read first with GUC fallback. */ RestCatalogConnectionInfo * GetRestCatalogConnectionForRelation(Oid relationId) @@ -411,9 +394,6 @@ GetRestCatalogConnectionForRelation(Oid relationId) (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("catalog option is not set for relation %u", relationId))); - if (IsRestCatalogOwnedByExtension(catalog)) - return GetRestCatalogConnectionFromGUCs(); - return GetRestCatalogConnectionFromServer(catalog); } diff --git a/pg_lake_iceberg/src/test/rest_catalog.c b/pg_lake_iceberg/src/test/rest_catalog.c index 7ad8ffa3..939d93de 100644 --- a/pg_lake_iceberg/src/test/rest_catalog.c +++ b/pg_lake_iceberg/src/test/rest_catalog.c @@ -37,7 +37,7 @@ register_namespace_to_rest_catalog(PG_FUNCTION_ARGS) char *catalogName = text_to_cstring(PG_GETARG_TEXT_P(0)); char *namespaceName = text_to_cstring(PG_GETARG_TEXT_P(1)); - RestCatalogConnectionInfo *conn = GetRestCatalogConnectionFromGUCs(); + RestCatalogConnectionInfo *conn = GetRestCatalogConnectionFromServer(REST_CATALOG_NAME); RegisterNamespaceToRestCatalog(conn, catalogName, namespaceName); PG_RETURN_VOID(); diff --git a/pg_lake_table/src/ddl/create_table.c b/pg_lake_table/src/ddl/create_table.c index 210b445a..118dc6f5 100644 --- a/pg_lake_table/src/ddl/create_table.c +++ b/pg_lake_table/src/ddl/create_table.c @@ -735,12 +735,8 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) if (hasRestCatalogOption && hasExternalCatalogReadOnlyOption) { char *catalogOptionValue = GetStringOption(createStmt->options, "catalog", false); - RestCatalogConnectionInfo *conn; - - if (IsRestCatalogOwnedByExtension(catalogOptionValue)) - conn = GetRestCatalogConnectionFromGUCs(); - else - conn = GetRestCatalogConnectionFromServer(catalogOptionValue); + RestCatalogConnectionInfo *conn = + GetRestCatalogConnectionFromServer(catalogOptionValue); ErrorIfRestNamespaceDoesNotExist(conn, catalogName, catalogNamespace); @@ -951,12 +947,8 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) * etc., but here we need to do it early before the table is created. */ char *catalogOptionValue = GetStringOption(createStmt->options, "catalog", false); - RestCatalogConnectionInfo *conn; - - if (IsRestCatalogOwnedByExtension(catalogOptionValue)) - conn = GetRestCatalogConnectionFromGUCs(); - else - conn = GetRestCatalogConnectionFromServer(catalogOptionValue); + RestCatalogConnectionInfo *conn = + GetRestCatalogConnectionFromServer(catalogOptionValue); RegisterNamespaceToRestCatalog(conn, get_database_name(MyDatabaseId), get_namespace_name(namespaceId)); diff --git a/pg_lake_table/src/fdw/option.c b/pg_lake_table/src/fdw/option.c index 195595d0..89264f57 100644 --- a/pg_lake_table/src/fdw/option.c +++ b/pg_lake_table/src/fdw/option.c @@ -765,11 +765,7 @@ pg_lake_iceberg_validator(PG_FUNCTION_ARGS) { char *icebergCatalogName = defGetString(def); - /* - * We only accept "rest" and "postgres" for now. If not provided, - * assume "postgres" by default. Don't allow anything. - */ - if (pg_strncasecmp(icebergCatalogName, REST_CATALOG_NAME, strlen(icebergCatalogName)) == 0) + if (IsRestCatalog(icebergCatalogName)) { /* * at this point, we cannot tell whether it's read only or @@ -780,7 +776,6 @@ pg_lake_iceberg_validator(PG_FUNCTION_ARGS) } else if (pg_strncasecmp(icebergCatalogName, OBJECT_STORE_CATALOG_NAME, strlen(icebergCatalogName)) == 0) { - /* * at this point, we cannot tell whether it's read only or * read write. We'll determine that later based on the @@ -791,20 +786,11 @@ pg_lake_iceberg_validator(PG_FUNCTION_ARGS) else if (pg_strncasecmp(icebergCatalogName, POSTGRES_CATALOG_NAME, strlen(icebergCatalogName)) == 0) icebergCatalogType = POSTGRES_CATALOG; else - { - /* - * Check if the catalog value refers to an iceberg_catalog - * server. If so, treat it as a REST catalog. - */ - if (IsRestCatalogOwnedByUsers(options_list)) - icebergCatalogType = REST_CATALOG_READ_ONLY; - else - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("invalid catalog option: %s", icebergCatalogName), - errdetail("Use \"rest\", \"object_store\", \"postgres\", " - "or the name of an iceberg_catalog server."))); - } + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid catalog option: %s", icebergCatalogName), + errdetail("Use \"rest\", \"object_store\", \"postgres\", " + "or the name of an iceberg_catalog server."))); } else if (catalog == ForeignTableRelationId && strcmp(def->defname, "read_only") == 0) { From b3c0ef985de9f4cd5e06dc1cdfccce58d34b296e Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Fri, 13 Mar 2026 14:23:51 +0300 Subject: [PATCH 09/23] Address review part 1 - Block CREATE FOREIGN TABLE on iceberg_catalog servers. The iceberg_catalog FDW has no handler, so foreign tables created on it would fail at query time with "has no handler". The check is added to ErrorUnsupportedCreatePgLakeTableHandler in pg_lake_table, which already runs first for all CREATE FOREIGN TABLE statements. - Block ALTER SERVER ... OWNER TO on extension-owned catalog servers (postgres, object_store, rest). - Move ICEBERG_CATALOG_FDW_NAME from rest_catalog.h to catalog_type.h alongside the other catalog name constants, since it is referenced by both pg_lake_iceberg and pg_lake_table. - Rename rest_auth_type value "default" to "oauth2" to better describe the standard OAuth2 client_credentials grant with Basic auth. "default" value is also kept. - Rename ProtectExtensionCatalogServersHandler to BlockDDLOnExtensionCatalogs for clarity. - Move BlockDDLOnExtensionCatalogs registration from pg_lake_iceberg init to pg_lake_table init, where all other ProcessUtility hooks are registered. Signed-off-by: sfc-gh-npuka --- .../include/pg_lake/util/catalog_type.h | 3 + .../pg_lake/rest_catalog/rest_catalog.h | 7 +- pg_lake_iceberg/src/init.c | 7 +- .../src/rest_catalog/rest_catalog.c | 33 ++++-- pg_lake_table/src/ddl/create_table.c | 31 ++++++ pg_lake_table/src/init.c | 2 + .../pytests/test_iceberg_catalog_server.py | 102 +++++++++++++----- 7 files changed, 142 insertions(+), 43 deletions(-) diff --git a/pg_lake_engine/include/pg_lake/util/catalog_type.h b/pg_lake_engine/include/pg_lake/util/catalog_type.h index 0606e413..ab29380f 100644 --- a/pg_lake_engine/include/pg_lake/util/catalog_type.h +++ b/pg_lake_engine/include/pg_lake/util/catalog_type.h @@ -17,6 +17,9 @@ #pragma once +/* FDW name for iceberg_catalog servers */ +#define ICEBERG_CATALOG_FDW_NAME "iceberg_catalog" + /* * The allowed values for IcebergDefaultCatalog, case insensitive. */ diff --git a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h index 9eb4a5ab..31d90ae1 100644 --- a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h +++ b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h @@ -24,7 +24,7 @@ #include "pg_lake/parquet/field.h" #include "pg_lake/iceberg/api/snapshot.h" -#define REST_CATALOG_AUTH_TYPE_DEFAULT (0) +#define REST_CATALOG_AUTH_TYPE_OAUTH2 (0) #define REST_CATALOG_AUTH_TYPE_HORIZON (1) extern PGDLLEXPORT char *RestCatalogHost; @@ -99,9 +99,6 @@ typedef struct RestCatalogRequest extern PGDLLEXPORT RestCatalogConnectionInfo * GetRestCatalogConnectionFromServer(const char *serverName); extern PGDLLEXPORT RestCatalogConnectionInfo * GetRestCatalogConnectionForRelation(Oid relationId); -/* FDW name for iceberg_catalog servers */ -#define ICEBERG_CATALOG_FDW_NAME "iceberg_catalog" - extern PGDLLEXPORT void RegisterNamespaceToRestCatalog(RestCatalogConnectionInfo * conn, const char *catalogName, const char *namespaceName); extern PGDLLEXPORT void StartStageRestCatalogIcebergTableCreate(Oid relationId); extern PGDLLEXPORT char *FinishStageRestCatalogIcebergTableCreateRestRequest(Oid relationId, DataFileSchema * dataFileSchema, List *partitionSpecs); @@ -125,4 +122,4 @@ extern PGDLLEXPORT RestCatalogRequest * GetSetPartitionDefaultIdCatalogRequest(O extern PGDLLEXPORT RestCatalogRequest * GetRemoveSnapshotCatalogRequest(List *removedSnapshotIds, Oid relationId); /* ProcessUtility handler: protects extension-owned catalog servers */ -extern PGDLLEXPORT bool ProtectExtensionCatalogServersHandler(ProcessUtilityParams *processUtilityParams, void *arg); +extern PGDLLEXPORT bool BlockDDLOnExtensionCatalogs(ProcessUtilityParams *processUtilityParams, void *arg); diff --git a/pg_lake_iceberg/src/init.c b/pg_lake_iceberg/src/init.c index 0c25493f..10f556d5 100644 --- a/pg_lake_iceberg/src/init.c +++ b/pg_lake_iceberg/src/init.c @@ -59,7 +59,8 @@ void _PG_init(void); /* pg_lake_iceberg.rest_catalog_auth_type */ static const struct config_enum_entry RestCatalogAuthTypeOptions[] = { - {"default", REST_CATALOG_AUTH_TYPE_DEFAULT, false}, + {"oauth2", REST_CATALOG_AUTH_TYPE_OAUTH2, false}, + {"default", REST_CATALOG_AUTH_TYPE_OAUTH2, false}, {"horizon", REST_CATALOG_AUTH_TYPE_HORIZON, false}, {NULL, 0, false}, }; @@ -256,7 +257,7 @@ _PG_init(void) gettext_noop("Determines the format for the initial OAuth token requests."), NULL, &RestCatalogAuthType, - REST_CATALOG_AUTH_TYPE_DEFAULT, + REST_CATALOG_AUTH_TYPE_OAUTH2, RestCatalogAuthTypeOptions, PGC_SUSET, GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE, @@ -329,8 +330,6 @@ _PG_init(void) NULL, NULL, NULL); AvroInit(); - - RegisterUtilityStatementHandler(ProtectExtensionCatalogServersHandler, NULL); } diff --git a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c index e98bba3b..fbdbe25d 100644 --- a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c +++ b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c @@ -56,7 +56,7 @@ char *RestCatalogOauthHostPath = ""; char *RestCatalogClientId = NULL; char *RestCatalogClientSecret = NULL; char *RestCatalogScope = "PRINCIPAL_ROLE:ALL"; -int RestCatalogAuthType = REST_CATALOG_AUTH_TYPE_DEFAULT; +int RestCatalogAuthType = REST_CATALOG_AUTH_TYPE_OAUTH2; bool RestCatalogEnableVendedCredentials = true; /* @@ -169,11 +169,13 @@ iceberg_catalog_validator(PG_FUNCTION_ARGS) { char *authType = defGetString(def); - if (strcmp(authType, "default") != 0 && strcmp(authType, "horizon") != 0) + if (strcmp(authType, "oauth2") != 0 && + strcmp(authType, "default") != 0 && + strcmp(authType, "horizon") != 0) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid rest_auth_type option: \"%s\"", authType), - errhint("Valid values are \"default\" and \"horizon\"."))); + errhint("Valid values are \"oauth2\" and \"horizon\"."))); } else if (strcmp(def->defname, "enable_vended_credentials") == 0) { @@ -204,7 +206,7 @@ IsIcebergCatalogServer(const char *serverName) /* - * ProtectExtensionCatalogServersHandler guards the extension-owned + * BlockDDLOnExtensionCatalogs guards the extension-owned * iceberg_catalog servers (postgres, object_store, rest) against * unauthorized DDL. * @@ -214,9 +216,10 @@ IsIcebergCatalogServer(const char *serverName) * - ALTER SERVER on 'rest' is allowed (users may set options). * - DROP SERVER on 'postgres', 'object_store', or 'rest' is blocked. * - ALTER ... RENAME on 'postgres', 'object_store', or 'rest' is blocked. + * - ALTER ... OWNER TO on 'postgres', 'object_store', or 'rest' is blocked. */ bool -ProtectExtensionCatalogServersHandler(ProcessUtilityParams *processUtilityParams, +BlockDDLOnExtensionCatalogs(ProcessUtilityParams *processUtilityParams, void *arg) { Node *parsetree = processUtilityParams->plannedStmt->utilityStmt; @@ -298,6 +301,24 @@ ProtectExtensionCatalogServersHandler(ProcessUtilityParams *processUtilityParams errmsg("cannot rename the extension-owned \"%s\" catalog server", serverName))); } + else if (IsA(parsetree, AlterOwnerStmt)) + { + AlterOwnerStmt *stmt = (AlterOwnerStmt *) parsetree; + + if (stmt->objectType != OBJECT_FOREIGN_SERVER) + return false; + + char *serverName = strVal(stmt->object); + + if (!IsIcebergCatalogServer(serverName)) + return false; + + if (IsCatalogOwnedByExtension(serverName)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot change owner of the extension-owned \"%s\" catalog server", + serverName))); + } return false; } @@ -359,7 +380,7 @@ GetRestCatalogConnectionFromServer(const char *serverName) conn->authType = (strcmp(authType, "horizon") == 0) ? REST_CATALOG_AUTH_TYPE_HORIZON - : REST_CATALOG_AUTH_TYPE_DEFAULT; + : REST_CATALOG_AUTH_TYPE_OAUTH2; } else if (strcmp(def->defname, "oauth_endpoint") == 0) conn->oauthHostPath = defGetString(def); diff --git a/pg_lake_table/src/ddl/create_table.c b/pg_lake_table/src/ddl/create_table.c index 118dc6f5..a8c443fa 100644 --- a/pg_lake_table/src/ddl/create_table.c +++ b/pg_lake_table/src/ddl/create_table.c @@ -90,6 +90,7 @@ static bool IsJsonOrCSVBackedTable(PgLakeTableType tableType, List *options); static void ErrorIfUnsupportedColumnTypeForJsonOrCSVTables(List *columnDefList); static void ErrorIfUsingGeometryWithoutSpatialAnalytics(List *columnDefList); static void ErrorIfUnsupportedLakeTable(CreateForeignTableStmt *createStmt); +static void ErrorIfCreateForeignTableOnIcebergCatalog(CreateForeignTableStmt *createStmt); static void ErrorIfWritableTableWithReservedColumnName(List *columnDefList, PgLakeTableType tableType); static void ErrorIfInvalidFilenameColumn(List *columnDefList); static bool IsConflictingColumnNameForReadParquet(const char *columnName); @@ -324,6 +325,9 @@ ErrorIfUsingGeometryWithoutSpatialAnalytics(List *columnDefList) * * We check for unsupported features in the table definition, such as unsupported URLs or unsupported * combinations such as writable tables without column definitions. +* +* Also blocks CREATE FOREIGN TABLE on iceberg_catalog servers, which have no +* handler. Tables should be created via CREATE TABLE ... USING iceberg instead. */ bool ErrorUnsupportedCreatePgLakeTableHandler(ProcessUtilityParams * params, void *arg) @@ -339,6 +343,8 @@ ErrorUnsupportedCreatePgLakeTableHandler(ProcessUtilityParams * params, void *ar CreateForeignTableStmt *createStmt = (CreateForeignTableStmt *) plannedStmt->utilityStmt; + ErrorIfCreateForeignTableOnIcebergCatalog(createStmt); + if (!IsCreateLakeTable(createStmt)) { /* not a lake table */ @@ -351,6 +357,31 @@ ErrorUnsupportedCreatePgLakeTableHandler(ProcessUtilityParams * params, void *ar } +/* + * ErrorIfCreateForeignTableOnIcebergCatalog blocks CREATE FOREIGN TABLE + * when the target server uses the iceberg_catalog FDW, which has no handler. + */ +static void +ErrorIfCreateForeignTableOnIcebergCatalog(CreateForeignTableStmt *createStmt) +{ + ForeignServer *server = + GetForeignServerByName(createStmt->servername, true); + + if (server == NULL) + return; + + ForeignDataWrapper *fdw = GetForeignDataWrapper(server->fdwid); + + if (strcmp(fdw->fdwname, ICEBERG_CATALOG_FDW_NAME) == 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot create foreign tables on iceberg_catalog server \"%s\"", + createStmt->servername), + errhint("Use CREATE TABLE ... USING iceberg WITH (catalog = '%s') instead.", + createStmt->servername))); +} + + /* * ErrorIfUnsupportedLakeTable is a helper function for checking unsupported features * in CREATE FOREIGN TABLE statements that are pg_lake tables. diff --git a/pg_lake_table/src/init.c b/pg_lake_table/src/init.c index 9e350907..f7d9199b 100644 --- a/pg_lake_table/src/init.c +++ b/pg_lake_table/src/init.c @@ -42,6 +42,7 @@ #include "pg_lake/planner/insert_select.h" #include "pg_lake/planner/query_pushdown.h" #include "pg_lake/util/s3_file_utils.h" +#include "pg_lake/rest_catalog/rest_catalog.h" #include "pg_lake/test/hide_lake_objects.h" #include "pg_lake/transaction/transaction_hooks.h" #include "pg_lake/transaction/track_iceberg_metadata_changes.h" @@ -382,6 +383,7 @@ _PG_init(void) MarkGUCPrefixReserved(PG_LAKE_TABLE); + RegisterUtilityStatementHandler(BlockDDLOnExtensionCatalogs, NULL); RegisterUtilityStatementHandler(ProcessVacuumPgLakeTable, NULL); RegisterUtilityStatementHandler(ProcessCreatePgLakeTable, NULL); RegisterUtilityStatementHandler(ProcessCreateAsSelectPgLakeTable, NULL); diff --git a/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py index 1380c3e5..2d182ae8 100644 --- a/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py +++ b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py @@ -57,7 +57,7 @@ def test_create_rest_server_with_all_options(superuser_conn, extension): FOREIGN DATA WRAPPER iceberg_catalog OPTIONS ( rest_endpoint 'http://localhost:8181', - rest_auth_type 'default', + rest_auth_type 'oauth2', oauth_endpoint 'http://localhost:8181/oauth/tokens', scope 'PRINCIPAL_ROLE:ALL', enable_vended_credentials 'true', @@ -135,12 +135,12 @@ def test_reject_unknown_server_option(superuser_conn, extension): def test_reject_invalid_auth_type(superuser_conn, extension): - """Only 'default' and 'horizon' are valid for rest_auth_type.""" + """Only 'oauth2', 'default', and 'horizon' are valid for rest_auth_type.""" err = run_command( """ CREATE SERVER test_bad_auth TYPE 'rest' FOREIGN DATA WRAPPER iceberg_catalog - OPTIONS (rest_endpoint 'http://localhost:8181', rest_auth_type 'oauth2') + OPTIONS (rest_endpoint 'http://localhost:8181', rest_auth_type 'basic') """, superuser_conn, raise_error=False, @@ -177,38 +177,23 @@ def test_reject_options_on_non_server(superuser_conn, extension): superuser_conn.rollback() -# ── Creating foreign tables on iceberg_catalog should fail ───────────────── +# ── CREATE FOREIGN TABLE on iceberg_catalog servers is blocked ────────────── -def test_cannot_query_foreign_table_on_catalog_server(superuser_conn, extension): - """iceberg_catalog has no handler, so querying a foreign table should fail. - - PostgreSQL allows CREATE FOREIGN TABLE on a handler-less FDW; the error - only surfaces at query time when GetFdwRoutineByServerId() is called. - """ - run_command( - """ - CREATE SERVER test_ft_server TYPE 'rest' - FOREIGN DATA WRAPPER iceberg_catalog - OPTIONS (rest_endpoint 'http://localhost:8181') - """, - superuser_conn, - ) - - run_command( +def test_reject_create_foreign_table_on_iceberg_catalog_server( + superuser_conn, extension +): + """CREATE FOREIGN TABLE on an iceberg_catalog server is blocked.""" + err = run_command( """ - CREATE FOREIGN TABLE test_ft_table (id int) - SERVER test_ft_server + CREATE FOREIGN TABLE test_ft_pg (id int) + SERVER postgres """, superuser_conn, - ) - - err = run_command( - "SELECT * FROM test_ft_table", - superuser_conn, raise_error=False, ) - assert "has no handler" in str(err) + assert err is not None + assert "cannot create foreign tables on iceberg_catalog server" in str(err) superuser_conn.rollback() @@ -631,6 +616,50 @@ def test_reject_rename_rest_server(superuser_conn, extension): superuser_conn.rollback() +def test_reject_owner_change_postgres_server(superuser_conn, extension): + """ALTER SERVER ... OWNER TO on the extension-owned 'postgres' server is blocked.""" + err = run_command( + "ALTER SERVER postgres OWNER TO CURRENT_USER", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert ( + 'cannot change owner of the extension-owned "postgres" catalog server' + in str(err) + ) + superuser_conn.rollback() + + +def test_reject_owner_change_object_store_server(superuser_conn, extension): + """ALTER SERVER ... OWNER TO on the extension-owned 'object_store' server is blocked.""" + err = run_command( + "ALTER SERVER object_store OWNER TO CURRENT_USER", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert ( + 'cannot change owner of the extension-owned "object_store" catalog server' + in str(err) + ) + superuser_conn.rollback() + + +def test_reject_owner_change_rest_server(superuser_conn, extension): + """ALTER SERVER ... OWNER TO on the extension-owned 'rest' server is blocked.""" + err = run_command( + "ALTER SERVER rest OWNER TO CURRENT_USER", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert 'cannot change owner of the extension-owned "rest" catalog server' in str( + err + ) + superuser_conn.rollback() + + def test_allow_drop_user_created_server(superuser_conn, extension): """DROP SERVER on a user-created server should work fine.""" run_command( @@ -659,3 +688,20 @@ def test_allow_rename_user_created_server(superuser_conn, extension): "ALTER SERVER user_rename_srv RENAME TO user_renamed_srv", superuser_conn ) superuser_conn.rollback() + + +def test_allow_owner_change_user_created_server(superuser_conn, extension): + """ALTER SERVER ... OWNER TO on a user-created server should work fine.""" + run_command( + """ + CREATE SERVER user_owner_srv TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + run_command( + "ALTER SERVER user_owner_srv OWNER TO CURRENT_USER", + superuser_conn, + ) + superuser_conn.rollback() From 0171e02749951fa59893d3203b241f2100df3d1f Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Fri, 13 Mar 2026 22:49:31 +0300 Subject: [PATCH 10/23] Address review part 2 Token cache improvements: - Allocate the token cache hash table and token strings in a dedicated RestTokenCacheCtx memory context (under TopMemoryContext) instead of directly in TopMemoryContext, keeping the cache memory isolated. - On a 419 (token expired) retry, invalidate only the affected server cached token instead of clearing the entire cache. The server name is passed through SendRequestToRestCatalog and stored in a file-scoped static so the retry callback can target the right entry. Transaction commit batching fix: - Group batches by (host, catalogName) instead of host alone. The transaction commit URL includes the catalog prefix, so two servers pointing to the same host but with different catalog_name values need separate commits. Previously, all tables on the same host were batched together using only the first table catalogName. Other improvements: - Replace the ereport(ERROR) FDW name check in GetRestCatalogConnectionFromServer with an Assert, since the catalog option validator already ensures only iceberg_catalog servers are accepted. - Add errhint to credential/host error messages in FetchRestCatalogAccessToken, pointing users to both the server option and the corresponding GUC. - Add test_precreated_rest_server test. Signed-off-by: sfc-gh-npuka --- .../pg_lake/rest_catalog/rest_catalog.h | 2 +- .../src/rest_catalog/rest_catalog.c | 65 ++++++++++++++----- .../track_iceberg_metadata_changes.c | 22 +++++-- .../pytests/test_iceberg_catalog_server.py | 11 ++++ 4 files changed, 74 insertions(+), 26 deletions(-) diff --git a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h index 31d90ae1..ea085ab8 100644 --- a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h +++ b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h @@ -113,7 +113,7 @@ extern PGDLLEXPORT char *GetMetadataLocationForRestCatalogForIcebergTable(Oid re extern PGDLLEXPORT void ReportHTTPError(HttpResult httpResult, int level); extern PGDLLEXPORT List *PostHeadersWithAuth(RestCatalogConnectionInfo * conn); extern PGDLLEXPORT List *DeleteHeadersWithAuth(RestCatalogConnectionInfo * conn); -extern PGDLLEXPORT HttpResult SendRequestToRestCatalog(HttpMethod method, const char *url, const char *body, List *headers); +extern PGDLLEXPORT HttpResult SendRequestToRestCatalog(HttpMethod method, const char *url, const char *body, List *headers, const char *serverName); extern PGDLLEXPORT RestCatalogRequest * GetAddSnapshotCatalogRequest(IcebergSnapshot * newSnapshot, Oid relationId); extern PGDLLEXPORT RestCatalogRequest * GetAddSchemaCatalogRequest(Oid relationId, DataFileSchema * dataFileSchema); extern PGDLLEXPORT RestCatalogRequest * GetSetCurrentSchemaCatalogRequest(Oid relationId, int32_t schemaId); diff --git a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c index fbdbe25d..88cdbf43 100644 --- a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c +++ b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c @@ -72,6 +72,13 @@ typedef struct RestCatalogTokenCacheEntry } RestCatalogTokenCacheEntry; static HTAB *RestCatalogTokenCache = NULL; +static MemoryContext RestTokenCacheCtx = NULL; + +/* + * Tracks which server's request is in flight so the retry callback can + * invalidate only the right token cache entry. + */ +static const char *CurrentRetryServerName = NULL; static char *GetRestCatalogAccessToken(RestCatalogConnectionInfo * conn, bool forceRefreshToken); static void FetchRestCatalogAccessToken(RestCatalogConnectionInfo * conn, char **accessToken, int *expiresIn); @@ -341,11 +348,7 @@ GetRestCatalogConnectionFromServer(const char *serverName) ForeignServer *server = GetForeignServerByName(serverName, false); ForeignDataWrapper *fdw = GetForeignDataWrapper(server->fdwid); - if (strcmp(fdw->fdwname, ICEBERG_CATALOG_FDW_NAME) != 0) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("server \"%s\" does not use the iceberg_catalog foreign data wrapper", - serverName))); + Assert(strcmp(fdw->fdwname, ICEBERG_CATALOG_FDW_NAME) == 0); RestCatalogConnectionInfo *conn = palloc0(sizeof(RestCatalogConnectionInfo)); @@ -474,7 +477,8 @@ StartStageRestCatalogIcebergTableCreate(Oid relationId) headers = lappend(headers, vendedCreds); } - HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, postUrl, body->data, headers); + HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, postUrl, body->data, + headers, conn->serverName); if (httpResult.status != 200) { @@ -608,7 +612,9 @@ RegisterNamespaceToRestCatalog(RestCatalogConnectionInfo * conn, const char *cat psprintf(REST_CATALOG_NAMESPACE_NAME, conn->host, URLEncodePath(catalogName), URLEncodePath(namespaceName)); - HttpResult httpResult = SendRequestToRestCatalog(HTTP_GET, getUrl, NULL, GetHeadersWithAuth(conn)); + HttpResult httpResult = SendRequestToRestCatalog(HTTP_GET, getUrl, NULL, + GetHeadersWithAuth(conn), + conn->serverName); switch (httpResult.status) { @@ -698,8 +704,9 @@ ErrorIfRestNamespaceDoesNotExist(RestCatalogConnectionInfo * conn, const char *c psprintf(REST_CATALOG_NAMESPACE_NAME, conn->host, URLEncodePath(catalogName), URLEncodePath(namespaceName)); - HttpResult httpResult = SendRequestToRestCatalog(HTTP_GET, getUrl, NULL, GetHeadersWithAuth(conn)); - + HttpResult httpResult = SendRequestToRestCatalog(HTTP_GET, getUrl, NULL, + GetHeadersWithAuth(conn), + conn->serverName); /* namespace not found */ if (httpResult.status == 404) @@ -748,7 +755,8 @@ GetMetadataLocationFromRestCatalog(RestCatalogConnectionInfo * conn, const char conn->host, URLEncodePath(restCatalogName), URLEncodePath(namespaceName), URLEncodePath(relationName)); List *headers = GetHeadersWithAuth(conn); - HttpResult hr = SendRequestToRestCatalog(HTTP_GET, getUrl, NULL, headers); + HttpResult hr = SendRequestToRestCatalog(HTTP_GET, getUrl, NULL, headers, + conn->serverName); if (hr.status != 200) { @@ -796,7 +804,9 @@ CreateNamespaceOnRestCatalog(RestCatalogConnectionInfo * conn, const char *catal psprintf(REST_CATALOG_NAMESPACE, conn->host, URLEncodePath(catalogName)); - HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, postUrl, body.data, PostHeadersWithAuth(conn)); + HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, postUrl, body.data, + PostHeadersWithAuth(conn), + conn->serverName); if (httpResult.status != 200) { @@ -901,12 +911,16 @@ InitTokenCacheIfNeeded(void) if (RestCatalogTokenCache != NULL) return; + RestTokenCacheCtx = AllocSetContextCreate(TopMemoryContext, + "RestTokenCacheCtx", + ALLOCSET_DEFAULT_SIZES); + HASHCTL ctl; memset(&ctl, 0, sizeof(ctl)); ctl.keysize = TOKEN_CACHE_KEY_LEN; ctl.entrysize = sizeof(RestCatalogTokenCacheEntry); - ctl.hcxt = TopMemoryContext; + ctl.hcxt = RestTokenCacheCtx; RestCatalogTokenCache = hash_create("REST Catalog Token Cache", 8, &ctl, @@ -958,7 +972,7 @@ GetRestCatalogAccessToken(RestCatalogConnectionInfo * conn, bool forceRefreshTok FetchRestCatalogAccessToken(conn, &accessToken, &expiresIn); - entry->accessToken = MemoryContextStrdup(TopMemoryContext, accessToken); + entry->accessToken = MemoryContextStrdup(RestTokenCacheCtx, accessToken); entry->accessTokenExpiry = now + (int64_t) expiresIn * 1000000; /* expiresIn is in * seconds */ } @@ -976,9 +990,15 @@ static void FetchRestCatalogAccessToken(RestCatalogConnectionInfo * conn, char **accessToken, int *expiresIn) { if (!conn->host || !*conn->host) - ereport(ERROR, (errmsg("REST catalog host is not configured"))); + ereport(ERROR, + (errmsg("REST catalog host is not configured"), + errhint("Set the \"rest_endpoint\" option on the server " + "or the pg_lake_iceberg.rest_catalog_host GUC."))); if (!conn->clientSecret || !*conn->clientSecret) - ereport(ERROR, (errmsg("REST catalog client_secret is not configured"))); + ereport(ERROR, + (errmsg("REST catalog client_secret is not configured"), + errhint("Set the \"client_secret\" option on the server " + "or the pg_lake_iceberg.rest_catalog_client_secret GUC."))); char *accessTokenUrl = conn->oauthHostPath; @@ -1006,7 +1026,10 @@ FetchRestCatalogAccessToken(RestCatalogConnectionInfo * conn, char **accessToken else { if (!conn->clientId || !*conn->clientId) - ereport(ERROR, (errmsg("REST catalog client_id is not configured"))); + ereport(ERROR, + (errmsg("REST catalog client_id is not configured"), + errhint("Set the \"client_id\" option on the server " + "or the pg_lake_iceberg.rest_catalog_client_id GUC."))); /* Build Authorization: Basic */ char *encodedAuth = EncodeBasicAuth(conn->clientId, conn->clientSecret); @@ -1018,7 +1041,9 @@ FetchRestCatalogAccessToken(RestCatalogConnectionInfo * conn, char **accessToken headers = lappend(headers, "Content-Type: application/x-www-form-urlencoded"); /* POST */ - HttpResult httpResponse = SendRequestToRestCatalog(HTTP_POST, accessTokenUrl, body.data, headers); + HttpResult httpResponse = SendRequestToRestCatalog(HTTP_POST, accessTokenUrl, + body.data, headers, + conn->serverName); if (httpResponse.status != 200) ereport(ERROR, @@ -1530,9 +1555,13 @@ ClassifyRestCatalogRequestRetry(long status, int maxRetry, int retryNo) * cancel backend). This function can be called at post-commit hook, * so normally we wouldn't want any errors to happen, but then * Postgres already prevents post-commit backends to receive signals. + * + * The serverName is used by the retry callback to invalidate only the + * matching token cache entry on a 419 (token expired) response. */ HttpResult -SendRequestToRestCatalog(HttpMethod method, const char *url, const char *body, List *headers) +SendRequestToRestCatalog(HttpMethod method, const char *url, const char *body, + List *headers, const char *serverName) { const int MAX_HTTP_RETRY_FOR_REST_CATALOG = 3; diff --git a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c index 87aee6dc..e9d612fc 100644 --- a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c +++ b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c @@ -286,7 +286,9 @@ PostAllRestCatalogRequests(void) { HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, requestPerTable->tableRestUrl, - createTableRequest->body, PostHeadersWithAuth(requestPerTable->conn)); + createTableRequest->body, + PostHeadersWithAuth(requestPerTable->conn), + requestPerTable->conn->serverName); if (httpResult.status != 200) { @@ -302,7 +304,9 @@ PostAllRestCatalogRequests(void) { HttpResult httpResult = SendRequestToRestCatalog(HTTP_DELETE, requestPerTable->tableRestUrl, - NULL, DeleteHeadersWithAuth(requestPerTable->conn)); + NULL, + DeleteHeadersWithAuth(requestPerTable->conn), + requestPerTable->conn->serverName); if (httpResult.status != 204) { @@ -368,9 +372,10 @@ PostAllRestCatalogRequests(void) } /* - * Group by server host and send one batch per server. For each table, - * find if we already started a batch for its server host, otherwise - * start a new one. + * Group by (host, catalogName) and send one batch per group. The + * transaction commit URL includes the catalog prefix, so tables under + * different catalog names need separate commits even when the host is + * the same. */ while (list_length(tablesWithModifications) > 0) { @@ -394,7 +399,8 @@ PostAllRestCatalogRequests(void) { requestPerTable = (RestCatalogRequestPerTable *) lfirst(lc); - if (strcmp(requestPerTable->conn->host, batchHost) != 0) + if (strcmp(requestPerTable->conn->host, batchHost) != 0 || + strcmp(requestPerTable->catalogName, catalogName) != 0) { remaining = lappend(remaining, requestPerTable); continue; @@ -439,7 +445,9 @@ PostAllRestCatalogRequests(void) appendStringInfoChar(batchRequestBody, '}'); char *url = psprintf(REST_CATALOG_TRANSACTION_COMMIT, batchConn->host, catalogName); - HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, url, batchRequestBody->data, PostHeadersWithAuth(batchConn)); + HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, url, batchRequestBody->data, + PostHeadersWithAuth(batchConn), + batchConn->serverName); if (httpResult.status != 204) { diff --git a/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py index 2d182ae8..2c7ec724 100644 --- a/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py +++ b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py @@ -46,6 +46,17 @@ def test_precreated_object_store_server(pg_conn, extension): assert result[0]["srvtype"] == "object_store" +def test_precreated_rest_server(pg_conn, extension): + """A 'rest' server of TYPE 'rest' should be pre-created.""" + result = run_query( + "SELECT srvname, srvtype FROM pg_foreign_server WHERE srvname = 'rest'", + pg_conn, + ) + assert len(result) == 1 + assert result[0]["srvname"] == "rest" + assert result[0]["srvtype"] == "rest" + + # ── CREATE SERVER with valid options ─────────────────────────────────────── From 32d64c7a0f60007f0dadb1990fb8b10cb9837fb1 Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Fri, 13 Mar 2026 23:12:36 +0300 Subject: [PATCH 11/23] Block reserved catalog names and fix prefix-match comparisons Block CREATE SERVER with a name that case-insensitively matches the extension-owned catalog names (postgres, object_store, rest) on the iceberg_catalog FDW. Also block ALTER SERVER ... RENAME TO a reserved name. Fix a latent bug across multiple files where pg_strncasecmp was used with a partial length, causing prefix-only matching: e.g. "rest_1" would match "rest". Replaced all instances with pg_strcasecmp for exact case-insensitive comparison. Signed-off-by: sfc-gh-npuka --- pg_lake_engine/src/utils/catalog_type.c | 12 ++--- pg_lake_iceberg/src/init.c | 6 +-- .../src/rest_catalog/rest_catalog.c | 14 +++++ pg_lake_table/src/fdw/option.c | 4 +- .../pytests/test_iceberg_catalog_server.py | 51 +++++++++++++++++++ 5 files changed, 76 insertions(+), 11 deletions(-) diff --git a/pg_lake_engine/src/utils/catalog_type.c b/pg_lake_engine/src/utils/catalog_type.c index 930c8576..f8fb4213 100644 --- a/pg_lake_engine/src/utils/catalog_type.c +++ b/pg_lake_engine/src/utils/catalog_type.c @@ -90,7 +90,7 @@ HasObjectStoreCatalogTableOption(List *options) { char *catalog = GetStringOption(options, "catalog", false); - return catalog ? pg_strncasecmp(catalog, OBJECT_STORE_CATALOG_NAME, strlen(catalog)) == 0 : false; + return catalog ? pg_strcasecmp(catalog, OBJECT_STORE_CATALOG_NAME) == 0 : false; } @@ -114,9 +114,9 @@ HasReadOnlyOption(List *options) bool IsCatalogOwnedByExtension(const char *catalog) { - return pg_strncasecmp(catalog, REST_CATALOG_NAME, strlen(REST_CATALOG_NAME)) == 0 || - pg_strncasecmp(catalog, OBJECT_STORE_CATALOG_NAME, strlen(OBJECT_STORE_CATALOG_NAME)) == 0 || - pg_strncasecmp(catalog, POSTGRES_CATALOG_NAME, strlen(POSTGRES_CATALOG_NAME)) == 0; + return pg_strcasecmp(catalog, REST_CATALOG_NAME) == 0 || + pg_strcasecmp(catalog, OBJECT_STORE_CATALOG_NAME) == 0 || + pg_strcasecmp(catalog, POSTGRES_CATALOG_NAME) == 0; } @@ -131,7 +131,7 @@ IsRestCatalog(const char *catalog) if (catalog == NULL) return false; - if (pg_strncasecmp(catalog, REST_CATALOG_NAME, strlen(REST_CATALOG_NAME)) == 0) + if (pg_strcasecmp(catalog, REST_CATALOG_NAME) == 0) return true; /* Try to look up a server with this name */ @@ -148,7 +148,7 @@ IsRestCatalog(const char *catalog) /* Check server TYPE if set */ if (server->servertype != NULL && *server->servertype != '\0') - return pg_strncasecmp(server->servertype, "rest", strlen("rest")) == 0; + return pg_strcasecmp(server->servertype, "rest") == 0; /* No TYPE specified, assume rest */ return true; diff --git a/pg_lake_iceberg/src/init.c b/pg_lake_iceberg/src/init.c index 10f556d5..3c56f419 100644 --- a/pg_lake_iceberg/src/init.c +++ b/pg_lake_iceberg/src/init.c @@ -367,9 +367,9 @@ IcebergDefaultCatalogCheckHook(char **newvalue, void **extra, GucSource source) { char *newCatalog = *newvalue; - if (pg_strncasecmp(newCatalog, POSTGRES_CATALOG_NAME, strlen(newCatalog)) == 0 || - pg_strncasecmp(newCatalog, REST_CATALOG_NAME, strlen(newCatalog)) == 0 || - pg_strncasecmp(newCatalog, OBJECT_STORE_CATALOG_NAME, strlen(newCatalog)) == 0) + if (pg_strcasecmp(newCatalog, POSTGRES_CATALOG_NAME) == 0 || + pg_strcasecmp(newCatalog, REST_CATALOG_NAME) == 0 || + pg_strcasecmp(newCatalog, OBJECT_STORE_CATALOG_NAME) == 0) return true; GUC_check_errdetail("pg_lake_iceberg: allowed iceberg catalog options are '" POSTGRES_CATALOG_NAME "', " diff --git a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c index 88cdbf43..89c1ff7d 100644 --- a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c +++ b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c @@ -242,6 +242,13 @@ BlockDDLOnExtensionCatalogs(ProcessUtilityParams *processUtilityParams, strcmp(stmt->fdwname, ICEBERG_CATALOG_FDW_NAME) != 0) return false; + if (IsCatalogOwnedByExtension(stmt->servername)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("server name \"%s\" is reserved for the extension-owned catalog", + stmt->servername), + errhint("Choose a different server name."))); + if (stmt->servertype != NULL && (pg_strcasecmp(stmt->servertype, POSTGRES_CATALOG_NAME) == 0 || pg_strcasecmp(stmt->servertype, OBJECT_STORE_CATALOG_NAME) == 0)) @@ -307,6 +314,13 @@ BlockDDLOnExtensionCatalogs(ProcessUtilityParams *processUtilityParams, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot rename the extension-owned \"%s\" catalog server", serverName))); + + if (IsCatalogOwnedByExtension(stmt->newname)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("server name \"%s\" is reserved for the extension-owned catalog", + stmt->newname), + errhint("Choose a different server name."))); } else if (IsA(parsetree, AlterOwnerStmt)) { diff --git a/pg_lake_table/src/fdw/option.c b/pg_lake_table/src/fdw/option.c index 89264f57..7a5984a2 100644 --- a/pg_lake_table/src/fdw/option.c +++ b/pg_lake_table/src/fdw/option.c @@ -774,7 +774,7 @@ pg_lake_iceberg_validator(PG_FUNCTION_ARGS) */ icebergCatalogType = REST_CATALOG_READ_ONLY; } - else if (pg_strncasecmp(icebergCatalogName, OBJECT_STORE_CATALOG_NAME, strlen(icebergCatalogName)) == 0) + else if (pg_strcasecmp(icebergCatalogName, OBJECT_STORE_CATALOG_NAME) == 0) { /* * at this point, we cannot tell whether it's read only or @@ -783,7 +783,7 @@ pg_lake_iceberg_validator(PG_FUNCTION_ARGS) */ icebergCatalogType = OBJECT_STORE_READ_ONLY; } - else if (pg_strncasecmp(icebergCatalogName, POSTGRES_CATALOG_NAME, strlen(icebergCatalogName)) == 0) + else if (pg_strcasecmp(icebergCatalogName, POSTGRES_CATALOG_NAME) == 0) icebergCatalogType = POSTGRES_CATALOG; else ereport(ERROR, diff --git a/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py index 2c7ec724..1a2afa19 100644 --- a/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py +++ b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py @@ -518,6 +518,27 @@ def test_reject_create_server_type_object_store(superuser_conn, extension): superuser_conn.rollback() +def test_reject_create_server_reserved_name(superuser_conn, extension): + """CREATE SERVER with a reserved catalog name (case-insensitive) is blocked.""" + reserved_names = [ + "Postgres", + "OBJECT_STORE", + "ReSt", + ] + for name in reserved_names: + err = run_command( + f""" + CREATE SERVER "{name}" TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + """, + superuser_conn, + raise_error=False, + ) + assert err is not None, f"Expected error for reserved name '{name}'" + assert "reserved for the extension-owned catalog" in str(err) + superuser_conn.rollback() + + def test_reject_alter_postgres_server(superuser_conn, extension): """ALTER SERVER on the extension-owned 'postgres' server is blocked.""" err = run_command( @@ -627,6 +648,36 @@ def test_reject_rename_rest_server(superuser_conn, extension): superuser_conn.rollback() +def test_reject_rename_to_reserved_name(superuser_conn, extension): + """Renaming a user-created server TO a reserved name is blocked.""" + run_command( + """ + CREATE SERVER tmp_rename_srv TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + for reserved in ["POSTGRES", "Object_Store", "REST"]: + err = run_command( + f'ALTER SERVER tmp_rename_srv RENAME TO "{reserved}"', + superuser_conn, + raise_error=False, + ) + assert err is not None, f"Expected error for renaming to '{reserved}'" + assert "reserved for the extension-owned catalog" in str(err) + superuser_conn.rollback() + run_command( + """ + CREATE SERVER tmp_rename_srv TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + superuser_conn.rollback() + + def test_reject_owner_change_postgres_server(superuser_conn, extension): """ALTER SERVER ... OWNER TO on the extension-owned 'postgres' server is blocked.""" err = run_command( From c2d7f1a7fe13c776e8ed2857ed3f2179a55fb65b Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Thu, 19 Mar 2026 18:05:46 +0300 Subject: [PATCH 12/23] Address Onder's review REST catalog options and retry mechanism: - Rename RestCatalogConnectionInfo to RestCatalogOptions throughout - Eliminate CurrentRetryServerName static; pass opts directly through HttpRetryFn callback (void *context + List *headers) so the 419 token-expired handler can force-refresh and patch the Authorization header in-place - Fix double-free in GetRestCatalogAccessToken: null out entry fields before calling FetchRestCatalogAccessToken Transaction-scoped state: - Reject transactions that touch tables from different REST catalog servers - Replace per-table rest catalog opts deep-copy with a single PgLakeXactRestCatalogOpts static, deep-copied into TopTransactionContext on first use - This avoids syscache lookups at XACT_EVENT_COMMIT time, which are forbidden (AssertCouldGetRelation fires during TRANS_COMMIT state) Server configuration enforcement: - Require TYPE 'rest' on CREATE SERVER ... FOREIGN DATA WRAPPER iceberg_catalog (reject NULL or non-rest types) - Make FDW option names and auth type values case-insensitive (pg_strcasecmp), while keeping server names case-sensitive - Make reserved catalog name checks (postgres, object_store, rest) case-insensitive via IsCatalogOwnedByExtension - Support location_prefix server option, overriding the GUC default - Accept user-created iceberg_catalog servers in default_catalog GUC Tests: - Add test_reject_modify_different_rest_catalogs_in_single_transaction - Add test_server_location_prefix_overrides_guc - Add tests for TYPE enforcement, case-sensitive server names, and default_catalog GUC with user-created servers - Remove obsolete no-TYPE-defaults-to-rest tests Signed-off-by: sfc-gh-npuka --- pg_lake_engine/src/utils/catalog_type.c | 18 +- .../include/pg_lake/http/http_client.h | 5 +- .../pg_lake/rest_catalog/rest_catalog.h | 32 ++- pg_lake_iceberg/src/http/http_client.c | 5 +- pg_lake_iceberg/src/init.c | 12 +- .../src/rest_catalog/rest_catalog.c | 260 ++++++++++-------- pg_lake_iceberg/src/test/rest_catalog.c | 4 +- pg_lake_iceberg/src/test/test_http_client.c | 8 +- pg_lake_table/src/ddl/create_table.c | 26 +- pg_lake_table/src/init.c | 1 + .../track_iceberg_metadata_changes.c | 212 +++++++------- .../pytests/test_iceberg_catalog_server.py | 178 ++++++++---- .../pytests/test_modify_iceberg_rest_table.py | 183 +++++++----- 13 files changed, 559 insertions(+), 385 deletions(-) diff --git a/pg_lake_engine/src/utils/catalog_type.c b/pg_lake_engine/src/utils/catalog_type.c index f8fb4213..7e7ea8cc 100644 --- a/pg_lake_engine/src/utils/catalog_type.c +++ b/pg_lake_engine/src/utils/catalog_type.c @@ -108,8 +108,10 @@ HasReadOnlyOption(List *options) /* - * IsCatalogOwnedByExtension returns true if the catalog name is one of the - * extension-owned literals: 'rest', 'object_store', or 'postgres'. + * IsCatalogOwnedByExtension returns true if the catalog name matches one of + * the extension-owned names: 'rest', 'object_store', or 'postgres'. + * Comparison is case-insensitive so that "Postgres", "REST", etc. are + * also recognized as reserved. */ bool IsCatalogOwnedByExtension(const char *catalog) @@ -123,7 +125,7 @@ IsCatalogOwnedByExtension(const char *catalog) /* * IsRestCatalog returns true if the catalog name identifies a REST catalog. * This includes the extension-owned 'rest' literal and any user-created - * iceberg_catalog server whose TYPE is 'rest' (or omitted, defaulting to 'rest'). + * iceberg_catalog server whose TYPE is 'rest'. */ bool IsRestCatalog(const char *catalog) @@ -143,13 +145,9 @@ IsRestCatalog(const char *catalog) ForeignDataWrapper *fdw = GetForeignDataWrapper(server->fdwid); - if (strcmp(fdw->fdwname, "iceberg_catalog") != 0) + if (strcmp(fdw->fdwname, ICEBERG_CATALOG_FDW_NAME) != 0) return false; - /* Check server TYPE if set */ - if (server->servertype != NULL && *server->servertype != '\0') - return pg_strcasecmp(server->servertype, "rest") == 0; - - /* No TYPE specified, assume rest */ - return true; + Assert(server->servertype != NULL && *server->servertype != '\0'); + return pg_strcasecmp(server->servertype, REST_CATALOG_NAME) == 0; } diff --git a/pg_lake_iceberg/include/pg_lake/http/http_client.h b/pg_lake_iceberg/include/pg_lake/http/http_client.h index 8d9c3803..17a9b39f 100644 --- a/pg_lake_iceberg/include/pg_lake/http/http_client.h +++ b/pg_lake_iceberg/include/pg_lake/http/http_client.h @@ -50,7 +50,7 @@ extern bool HttpClientTraceTraffic; #define HTTP_STATUS_SERVICE_UNAVAILABLE 503 /* Callback function to determine if a request should be retried */ -typedef bool (*HttpRetryFn) (long status, int maxRetry, int retryNo); +typedef bool (*HttpRetryFn) (long status, int maxRetry, int retryNo, void *context, List *headers); /* plain C API (no PostgreSQL types) */ extern PGDLLEXPORT HttpResult HttpGet(const char *url, List *headers); @@ -60,5 +60,6 @@ extern PGDLLEXPORT HttpResult HttpDelete(const char *url, List *headers); extern PGDLLEXPORT HttpResult HttpPut(const char *url, const char *body, List *headers); extern PGDLLEXPORT HttpResult SendHttpRequest(HttpMethod method, const char *url, const char *body, List *headers); extern PGDLLEXPORT HttpResult SendHttpRequestWithRetry(HttpMethod method, const char *url, const char *body, - List *headers, HttpRetryFn retryFn, int maxRetry); + List *headers, HttpRetryFn retryFn, int maxRetry, + void *retryContext); extern PGDLLEXPORT int LinearBackoffSleepMs(int baseMs, int retryNo); diff --git a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h index ea085ab8..d60dcb6f 100644 --- a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h +++ b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h @@ -36,11 +36,11 @@ extern int RestCatalogAuthType; extern bool RestCatalogEnableVendedCredentials; /* - * Holds per-server REST catalog connection settings. Populated from the - * server options of an iceberg_catalog ForeignServer, with GUC fallback - * for any option not explicitly set on the server. + * Holds per-server REST catalog options. Populated from the server options + * of an iceberg_catalog ForeignServer, with GUC fallback for any option + * not explicitly set on the server. */ -typedef struct RestCatalogConnectionInfo +typedef struct RestCatalogOptions { char *serverName; /* server name, used for token cache keying */ char *host; @@ -48,9 +48,10 @@ typedef struct RestCatalogConnectionInfo char *clientId; char *clientSecret; char *scope; + char *locationPrefix; int authType; bool enableVendedCredentials; -} RestCatalogConnectionInfo; +} RestCatalogOptions; #define REST_CATALOG_AUTH_TOKEN_PATH "%s/api/catalog/v1/oauth/tokens" @@ -96,24 +97,24 @@ typedef struct RestCatalogRequest #define GET_REST_CATALOG_METADATA_LOCATION "%s/api/catalog/v1/%s/namespaces/%s/tables/%s" /* Connection info resolution */ -extern PGDLLEXPORT RestCatalogConnectionInfo * GetRestCatalogConnectionFromServer(const char *serverName); -extern PGDLLEXPORT RestCatalogConnectionInfo * GetRestCatalogConnectionForRelation(Oid relationId); +extern PGDLLEXPORT RestCatalogOptions * GetRestCatalogOptionsFromServer(const char *serverName); +extern PGDLLEXPORT RestCatalogOptions * GetRestCatalogOptionsForRelation(Oid relationId); -extern PGDLLEXPORT void RegisterNamespaceToRestCatalog(RestCatalogConnectionInfo * conn, const char *catalogName, const char *namespaceName); +extern PGDLLEXPORT void RegisterNamespaceToRestCatalog(RestCatalogOptions * opts, const char *catalogName, const char *namespaceName); extern PGDLLEXPORT void StartStageRestCatalogIcebergTableCreate(Oid relationId); extern PGDLLEXPORT char *FinishStageRestCatalogIcebergTableCreateRestRequest(Oid relationId, DataFileSchema * dataFileSchema, List *partitionSpecs); -extern PGDLLEXPORT void ErrorIfRestNamespaceDoesNotExist(RestCatalogConnectionInfo * conn, const char *catalogName, const char *namespaceName); +extern PGDLLEXPORT void ErrorIfRestNamespaceDoesNotExist(RestCatalogOptions * opts, const char *catalogName, const char *namespaceName); extern PGDLLEXPORT char *GetRestCatalogName(Oid relationId); extern PGDLLEXPORT char *GetRestCatalogNamespace(Oid relationId); extern PGDLLEXPORT char *GetRestCatalogTableName(Oid relationId); extern PGDLLEXPORT bool IsReadOnlyRestCatalogIcebergTable(Oid relationId); -extern PGDLLEXPORT char *GetMetadataLocationFromRestCatalog(RestCatalogConnectionInfo * conn, const char *restCatalogName, const char *namespaceName, +extern PGDLLEXPORT char *GetMetadataLocationFromRestCatalog(RestCatalogOptions * opts, const char *restCatalogName, const char *namespaceName, const char *relationName); extern PGDLLEXPORT char *GetMetadataLocationForRestCatalogForIcebergTable(Oid relationId); extern PGDLLEXPORT void ReportHTTPError(HttpResult httpResult, int level); -extern PGDLLEXPORT List *PostHeadersWithAuth(RestCatalogConnectionInfo * conn); -extern PGDLLEXPORT List *DeleteHeadersWithAuth(RestCatalogConnectionInfo * conn); -extern PGDLLEXPORT HttpResult SendRequestToRestCatalog(HttpMethod method, const char *url, const char *body, List *headers, const char *serverName); +extern PGDLLEXPORT List *PostHeadersWithAuth(RestCatalogOptions * opts); +extern PGDLLEXPORT List *DeleteHeadersWithAuth(RestCatalogOptions * opts); +extern PGDLLEXPORT HttpResult SendRequestToRestCatalog(HttpMethod method, const char *url, const char *body, List *headers, RestCatalogOptions * opts); extern PGDLLEXPORT RestCatalogRequest * GetAddSnapshotCatalogRequest(IcebergSnapshot * newSnapshot, Oid relationId); extern PGDLLEXPORT RestCatalogRequest * GetAddSchemaCatalogRequest(Oid relationId, DataFileSchema * dataFileSchema); extern PGDLLEXPORT RestCatalogRequest * GetSetCurrentSchemaCatalogRequest(Oid relationId, int32_t schemaId); @@ -121,5 +122,6 @@ extern PGDLLEXPORT RestCatalogRequest * GetAddPartitionCatalogRequest(Oid relati extern PGDLLEXPORT RestCatalogRequest * GetSetPartitionDefaultIdCatalogRequest(Oid relationId, int specId); extern PGDLLEXPORT RestCatalogRequest * GetRemoveSnapshotCatalogRequest(List *removedSnapshotIds, Oid relationId); -/* ProcessUtility handler: protects extension-owned catalog servers */ -extern PGDLLEXPORT bool BlockDDLOnExtensionCatalogs(ProcessUtilityParams *processUtilityParams, void *arg); +/* ProcessUtility handlers for iceberg_catalog servers */ +extern PGDLLEXPORT bool BlockDDLOnExtensionCatalogs(ProcessUtilityParams * processUtilityParams, void *arg); +extern PGDLLEXPORT bool RequireRestTypeForIcebergCatalogServer(ProcessUtilityParams * processUtilityParams, void *arg); diff --git a/pg_lake_iceberg/src/http/http_client.c b/pg_lake_iceberg/src/http/http_client.c index 64225a0f..3f696ddc 100644 --- a/pg_lake_iceberg/src/http/http_client.c +++ b/pg_lake_iceberg/src/http/http_client.c @@ -276,7 +276,8 @@ CurlReturnError(CURL *curl, struct curl_slist *headerList, */ HttpResult SendHttpRequestWithRetry(HttpMethod method, const char *url, const char *body, - List *headers, HttpRetryFn retryFn, int maxRetry) + List *headers, HttpRetryFn retryFn, int maxRetry, + void *retryContext) { Assert(maxRetry > 0); @@ -286,7 +287,7 @@ SendHttpRequestWithRetry(HttpMethod method, const char *url, const char *body, { result = SendHttpRequest(method, url, body, headers); - if (retryFn != NULL && retryFn(result.status, maxRetry, retryNo)) + if (retryFn != NULL && retryFn(result.status, maxRetry, retryNo, retryContext, headers)) continue; else break; diff --git a/pg_lake_iceberg/src/init.c b/pg_lake_iceberg/src/init.c index 3c56f419..011e6735 100644 --- a/pg_lake_iceberg/src/init.c +++ b/pg_lake_iceberg/src/init.c @@ -35,6 +35,8 @@ #include "pg_lake/iceberg/operations/vacuum.h" #include "pg_lake/object_store_catalog/object_store_catalog.h" #include "pg_lake/rest_catalog/rest_catalog.h" +#include "pg_lake/util/catalog_type.h" +#include "access/xact.h" #define GUC_STANDARD 0 @@ -372,8 +374,16 @@ IcebergDefaultCatalogCheckHook(char **newvalue, void **extra, GucSource source) pg_strcasecmp(newCatalog, OBJECT_STORE_CATALOG_NAME) == 0) return true; + /* + * When catalog access is available, also accept user-created + * iceberg_catalog foreign servers with TYPE 'rest'. + */ + if (IsTransactionState() && IsRestCatalog(newCatalog)) + return true; + GUC_check_errdetail("pg_lake_iceberg: allowed iceberg catalog options are '" POSTGRES_CATALOG_NAME "', " - " '" REST_CATALOG_NAME "' and '" OBJECT_STORE_CATALOG_NAME "'"); + "'" REST_CATALOG_NAME "', '" OBJECT_STORE_CATALOG_NAME + "', or the name of a user-created iceberg_catalog server with TYPE 'rest'"); return false; } diff --git a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c index 89c1ff7d..18300c46 100644 --- a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c +++ b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c @@ -48,6 +48,7 @@ #include "pg_lake/util/catalog_type.h" #include "pg_lake/util/url_encode.h" #include "pg_lake/util/rel_utils.h" +#include "pg_lake/util/string_utils.h" /* determined by GUC */ @@ -74,18 +75,12 @@ typedef struct RestCatalogTokenCacheEntry static HTAB *RestCatalogTokenCache = NULL; static MemoryContext RestTokenCacheCtx = NULL; -/* - * Tracks which server's request is in flight so the retry callback can - * invalidate only the right token cache entry. - */ -static const char *CurrentRetryServerName = NULL; - -static char *GetRestCatalogAccessToken(RestCatalogConnectionInfo * conn, bool forceRefreshToken); -static void FetchRestCatalogAccessToken(RestCatalogConnectionInfo * conn, char **accessToken, int *expiresIn); -static void CreateNamespaceOnRestCatalog(RestCatalogConnectionInfo * conn, const char *catalogName, const char *namespaceName); +static char *GetRestCatalogAccessToken(RestCatalogOptions * opts, bool forceRefreshToken); +static void FetchRestCatalogAccessToken(RestCatalogOptions * opts, char **accessToken, int *expiresIn); +static void CreateNamespaceOnRestCatalog(RestCatalogOptions * opts, const char *catalogName, const char *namespaceName); static char *EncodeBasicAuth(const char *clientId, const char *clientSecret); static char *JsonbGetStringByPath(const char *jsonb_text, int nkeys,...); -static List *GetHeadersWithAuth(RestCatalogConnectionInfo * conn); +static List *GetHeadersWithAuth(RestCatalogOptions * opts); static char *AppendIcebergPartitionSpecForRestCatalog(List *partitionSpecs); static void UpdateAuthorizationHeader(List *headers, const char *token); @@ -124,7 +119,7 @@ is_valid_iceberg_catalog_option(const char *keyword) { for (int i = 0; iceberg_catalog_server_options[i] != NULL; i++) { - if (strcmp(keyword, iceberg_catalog_server_options[i]) == 0) + if (pg_strcasecmp(keyword, iceberg_catalog_server_options[i]) == 0) return true; } return false; @@ -172,19 +167,19 @@ iceberg_catalog_validator(PG_FUNCTION_ARGS) "location_prefix, catalog_name, client_id, client_secret."))); } - if (strcmp(def->defname, "rest_auth_type") == 0) + if (pg_strcasecmp(def->defname, "rest_auth_type") == 0) { char *authType = defGetString(def); - if (strcmp(authType, "oauth2") != 0 && - strcmp(authType, "default") != 0 && - strcmp(authType, "horizon") != 0) + if (pg_strcasecmp(authType, "oauth2") != 0 && + pg_strcasecmp(authType, "default") != 0 && + pg_strcasecmp(authType, "horizon") != 0) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid rest_auth_type option: \"%s\"", authType), errhint("Valid values are \"oauth2\" and \"horizon\"."))); } - else if (strcmp(def->defname, "enable_vended_credentials") == 0) + else if (pg_strcasecmp(def->defname, "enable_vended_credentials") == 0) { (void) defGetBoolean(def); } @@ -226,8 +221,8 @@ IsIcebergCatalogServer(const char *serverName) * - ALTER ... OWNER TO on 'postgres', 'object_store', or 'rest' is blocked. */ bool -BlockDDLOnExtensionCatalogs(ProcessUtilityParams *processUtilityParams, - void *arg) +BlockDDLOnExtensionCatalogs(ProcessUtilityParams * processUtilityParams, + void *arg) { Node *parsetree = processUtilityParams->plannedStmt->utilityStmt; @@ -346,14 +341,48 @@ BlockDDLOnExtensionCatalogs(ProcessUtilityParams *processUtilityParams, /* - * GetRestCatalogConnectionFromServer returns a RestCatalogConnectionInfo + * RequireRestTypeForIcebergCatalogServer ensures that CREATE SERVER + * commands using the iceberg_catalog FDW specify TYPE 'rest'. + */ +bool +RequireRestTypeForIcebergCatalogServer(ProcessUtilityParams * processUtilityParams, + void *arg) +{ + Node *parsetree = processUtilityParams->plannedStmt->utilityStmt; + + if (creating_extension) + return false; + + if (!IsA(parsetree, CreateForeignServerStmt)) + return false; + + CreateForeignServerStmt *stmt = (CreateForeignServerStmt *) parsetree; + + if (stmt->fdwname == NULL || + strcmp(stmt->fdwname, ICEBERG_CATALOG_FDW_NAME) != 0) + return false; + + if (stmt->servertype == NULL || + pg_strcasecmp(stmt->servertype, REST_CATALOG_NAME) != 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("iceberg_catalog server requires TYPE 'rest'"), + errhint("Specify TYPE 'rest' when creating an " + "iceberg_catalog server."))); + + return false; +} + + +/* + * GetRestCatalogOptionsFromServer returns a RestCatalogOptions * populated from the options of the named ForeignServer. GUC values are * used as defaults; any option explicitly set on the server overrides the * corresponding GUC. This applies to both the extension-owned 'rest' * server and user-created iceberg_catalog servers. */ -RestCatalogConnectionInfo * -GetRestCatalogConnectionFromServer(const char *serverName) +RestCatalogOptions * +GetRestCatalogOptionsFromServer(const char *serverName) { /* Normalize case-insensitive match to the canonical pre-created name */ if (pg_strcasecmp(serverName, REST_CATALOG_NAME) == 0) @@ -364,18 +393,19 @@ GetRestCatalogConnectionFromServer(const char *serverName) Assert(strcmp(fdw->fdwname, ICEBERG_CATALOG_FDW_NAME) == 0); - RestCatalogConnectionInfo *conn = palloc0(sizeof(RestCatalogConnectionInfo)); + RestCatalogOptions *opts = palloc0(sizeof(RestCatalogOptions)); - conn->serverName = pstrdup(serverName); + opts->serverName = pstrdup(serverName); /* GUC values serve as defaults; server options override below */ - conn->host = RestCatalogHost; - conn->oauthHostPath = RestCatalogOauthHostPath; - conn->clientId = RestCatalogClientId; - conn->clientSecret = RestCatalogClientSecret; - conn->scope = RestCatalogScope; - conn->authType = RestCatalogAuthType; - conn->enableVendedCredentials = RestCatalogEnableVendedCredentials; + opts->host = RestCatalogHost; + opts->oauthHostPath = RestCatalogOauthHostPath; + opts->clientId = RestCatalogClientId; + opts->clientSecret = RestCatalogClientSecret; + opts->scope = RestCatalogScope; + opts->authType = RestCatalogAuthType; + opts->enableVendedCredentials = RestCatalogEnableVendedCredentials; + opts->locationPrefix = GetIcebergDefaultLocationPrefix(); ListCell *lc; @@ -383,46 +413,51 @@ GetRestCatalogConnectionFromServer(const char *serverName) { DefElem *def = (DefElem *) lfirst(lc); - if (strcmp(def->defname, "rest_endpoint") == 0) - conn->host = defGetString(def); - else if (strcmp(def->defname, "client_id") == 0) - conn->clientId = defGetString(def); - else if (strcmp(def->defname, "client_secret") == 0) - conn->clientSecret = defGetString(def); - else if (strcmp(def->defname, "scope") == 0) - conn->scope = defGetString(def); - else if (strcmp(def->defname, "rest_auth_type") == 0) + if (pg_strcasecmp(def->defname, "rest_endpoint") == 0) + opts->host = defGetString(def); + else if (pg_strcasecmp(def->defname, "client_id") == 0) + opts->clientId = defGetString(def); + else if (pg_strcasecmp(def->defname, "client_secret") == 0) + opts->clientSecret = defGetString(def); + else if (pg_strcasecmp(def->defname, "scope") == 0) + opts->scope = defGetString(def); + else if (pg_strcasecmp(def->defname, "rest_auth_type") == 0) { char *authType = defGetString(def); - conn->authType = (strcmp(authType, "horizon") == 0) + opts->authType = (pg_strcasecmp(authType, "horizon") == 0) ? REST_CATALOG_AUTH_TYPE_HORIZON : REST_CATALOG_AUTH_TYPE_OAUTH2; } - else if (strcmp(def->defname, "oauth_endpoint") == 0) - conn->oauthHostPath = defGetString(def); - else if (strcmp(def->defname, "enable_vended_credentials") == 0) - conn->enableVendedCredentials = defGetBoolean(def); + else if (pg_strcasecmp(def->defname, "oauth_endpoint") == 0) + opts->oauthHostPath = defGetString(def); + else if (pg_strcasecmp(def->defname, "enable_vended_credentials") == 0) + opts->enableVendedCredentials = defGetBoolean(def); + else if (pg_strcasecmp(def->defname, "location_prefix") == 0) + { + bool inPlace = false; + opts->locationPrefix = StripTrailingSlash(defGetString(def), inPlace); + } } - if (conn->host == NULL || conn->host[0] == '\0') + if (opts->host == NULL || opts->host[0] == '\0') ereport(ERROR, (errcode(ERRCODE_FDW_OPTION_NAME_NOT_FOUND), errmsg("\"rest_endpoint\" option is required for iceberg_catalog server \"%s\"", serverName))); - return conn; + return opts; } /* - * GetRestCatalogConnectionForRelation returns the REST catalog connection + * GetRestCatalogOptionsForRelation returns the REST catalog options * info for the given relation. The catalog option value is used as the * server name. For the extension-owned 'rest' server and user-created * servers alike, server options are read first with GUC fallback. */ -RestCatalogConnectionInfo * -GetRestCatalogConnectionForRelation(Oid relationId) +RestCatalogOptions * +GetRestCatalogOptionsForRelation(Oid relationId) { ForeignTable *foreignTable = GetForeignTable(relationId); char *catalog = GetStringOption(foreignTable->options, "catalog", false); @@ -432,7 +467,7 @@ GetRestCatalogConnectionForRelation(Oid relationId) (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("catalog option is not set for relation %u", relationId))); - return GetRestCatalogConnectionFromServer(catalog); + return GetRestCatalogOptionsFromServer(catalog); } @@ -477,14 +512,14 @@ StartStageRestCatalogIcebergTableCreate(Oid relationId) const char *catalogName = GetRestCatalogName(relationId); const char *namespaceName = GetRestCatalogNamespace(relationId); - RestCatalogConnectionInfo *conn = GetRestCatalogConnectionForRelation(relationId); + RestCatalogOptions *opts = GetRestCatalogOptionsForRelation(relationId); char *postUrl = - psprintf(REST_CATALOG_TABLES, conn->host, + psprintf(REST_CATALOG_TABLES, opts->host, URLEncodePath(catalogName), URLEncodePath(namespaceName)); - List *headers = PostHeadersWithAuth(conn); + List *headers = PostHeadersWithAuth(opts); - if (conn->enableVendedCredentials) + if (opts->enableVendedCredentials) { char *vendedCreds = pstrdup("X-Iceberg-Access-Delegation: vended-credentials"); @@ -492,7 +527,7 @@ StartStageRestCatalogIcebergTableCreate(Oid relationId) } HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, postUrl, body->data, - headers, conn->serverName); + headers, opts); if (httpResult.status != 200) { @@ -569,8 +604,9 @@ FinishStageRestCatalogIcebergTableCreateRestRequest(Oid relationId, DataFileSche const char *catalogName = GetRestCatalogName(relationId); const char *namespaceName = GetRestCatalogNamespace(relationId); const char *relationName = GetRestCatalogTableName(relationId); + RestCatalogOptions *opts = GetRestCatalogOptionsForRelation(relationId); - appendStringInfo(location, "%s/%s/%s/%s/%d", IcebergDefaultLocationPrefix, catalogName, namespaceName, relationName, relationId); + appendStringInfo(location, "%s/%s/%s/%s/%d", opts->locationPrefix, catalogName, namespaceName, relationName, relationId); appendJsonString(body, "location", location->data); appendStringInfoChar(body, '}'); /* end set-location */ @@ -616,7 +652,7 @@ FinishStageRestCatalogIcebergTableCreateRestRequest(Oid relationId, DataFileSche * allowed locations as part of the namespace. */ void -RegisterNamespaceToRestCatalog(RestCatalogConnectionInfo * conn, const char *catalogName, const char *namespaceName) +RegisterNamespaceToRestCatalog(RestCatalogOptions * opts, const char *catalogName, const char *namespaceName) { /* * First, we need to check if the namespace already exists in Rest Catalog @@ -624,11 +660,11 @@ RegisterNamespaceToRestCatalog(RestCatalogConnectionInfo * conn, const char *cat */ char *getUrl = psprintf(REST_CATALOG_NAMESPACE_NAME, - conn->host, URLEncodePath(catalogName), + opts->host, URLEncodePath(catalogName), URLEncodePath(namespaceName)); HttpResult httpResult = SendRequestToRestCatalog(HTTP_GET, getUrl, NULL, - GetHeadersWithAuth(conn), - conn->serverName); + GetHeadersWithAuth(opts), + opts); switch (httpResult.status) { @@ -643,7 +679,7 @@ RegisterNamespaceToRestCatalog(RestCatalogConnectionInfo * conn, const char *cat /* * Does not exists, we'll create it. */ - CreateNamespaceOnRestCatalog(conn, catalogName, namespaceName); + CreateNamespaceOnRestCatalog(opts, catalogName, namespaceName); break; } @@ -662,7 +698,7 @@ RegisterNamespaceToRestCatalog(RestCatalogConnectionInfo * conn, const char *cat if (serverAllowedLocation) { const char *defaultAllowedLocation = - psprintf("%s/%s/%s", IcebergDefaultLocationPrefix, catalogName, namespaceName); + psprintf("%s/%s/%s", opts->locationPrefix, catalogName, namespaceName); /* @@ -708,7 +744,7 @@ RegisterNamespaceToRestCatalog(RestCatalogConnectionInfo * conn, const char *cat * namespace exists when creating a table in the given namespace. */ void -ErrorIfRestNamespaceDoesNotExist(RestCatalogConnectionInfo * conn, const char *catalogName, const char *namespaceName) +ErrorIfRestNamespaceDoesNotExist(RestCatalogOptions * opts, const char *catalogName, const char *namespaceName) { /* * First, we need to check if the namespace already exists in Rest Catalog @@ -716,11 +752,11 @@ ErrorIfRestNamespaceDoesNotExist(RestCatalogConnectionInfo * conn, const char *c */ char *getUrl = psprintf(REST_CATALOG_NAMESPACE_NAME, - conn->host, URLEncodePath(catalogName), + opts->host, URLEncodePath(catalogName), URLEncodePath(namespaceName)); HttpResult httpResult = SendRequestToRestCatalog(HTTP_GET, getUrl, NULL, - GetHeadersWithAuth(conn), - conn->serverName); + GetHeadersWithAuth(opts), + opts); /* namespace not found */ if (httpResult.status == 404) @@ -752,9 +788,9 @@ GetMetadataLocationForRestCatalogForIcebergTable(Oid relationId) const char *relationName = GetRestCatalogTableName(relationId); const char *namespaceName = GetRestCatalogNamespace(relationId); - RestCatalogConnectionInfo *conn = GetRestCatalogConnectionForRelation(relationId); + RestCatalogOptions *opts = GetRestCatalogOptionsForRelation(relationId); - return GetMetadataLocationFromRestCatalog(conn, restCatalogName, namespaceName, relationName); + return GetMetadataLocationFromRestCatalog(opts, restCatalogName, namespaceName, relationName); } @@ -762,15 +798,15 @@ GetMetadataLocationForRestCatalogForIcebergTable(Oid relationId) * Gets the metadata location for a relation from the external catalog. */ char * -GetMetadataLocationFromRestCatalog(RestCatalogConnectionInfo * conn, const char *restCatalogName, const char *namespaceName, const char *relationName) +GetMetadataLocationFromRestCatalog(RestCatalogOptions * opts, const char *restCatalogName, const char *namespaceName, const char *relationName) { char *getUrl = psprintf(REST_CATALOG_TABLE, - conn->host, URLEncodePath(restCatalogName), URLEncodePath(namespaceName), URLEncodePath(relationName)); + opts->host, URLEncodePath(restCatalogName), URLEncodePath(namespaceName), URLEncodePath(relationName)); - List *headers = GetHeadersWithAuth(conn); + List *headers = GetHeadersWithAuth(opts); HttpResult hr = SendRequestToRestCatalog(HTTP_GET, getUrl, NULL, headers, - conn->serverName); + opts); if (hr.status != 200) { @@ -791,7 +827,7 @@ GetMetadataLocationFromRestCatalog(RestCatalogConnectionInfo * conn, const char * an error is raised. */ static void -CreateNamespaceOnRestCatalog(RestCatalogConnectionInfo * conn, const char *catalogName, const char *namespaceName) +CreateNamespaceOnRestCatalog(RestCatalogOptions * opts, const char *catalogName, const char *namespaceName) { /* POST create */ StringInfoData body; @@ -815,12 +851,12 @@ CreateNamespaceOnRestCatalog(RestCatalogConnectionInfo * conn, const char *catal appendStringInfoChar(&body, '}'); /* close body */ char *postUrl = - psprintf(REST_CATALOG_NAMESPACE, conn->host, + psprintf(REST_CATALOG_NAMESPACE, opts->host, URLEncodePath(catalogName)); HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, postUrl, body.data, - PostHeadersWithAuth(conn), - conn->serverName); + PostHeadersWithAuth(opts), + opts); if (httpResult.status != 200) { @@ -832,11 +868,11 @@ CreateNamespaceOnRestCatalog(RestCatalogConnectionInfo * conn, const char *catal * Creates the headers for a POST request with authentication. */ List * -PostHeadersWithAuth(RestCatalogConnectionInfo * conn) +PostHeadersWithAuth(RestCatalogOptions * opts) { bool forceRefreshToken = false; - return list_make3(psprintf("Authorization: Bearer %s", GetRestCatalogAccessToken(conn, forceRefreshToken)), + return list_make3(psprintf("Authorization: Bearer %s", GetRestCatalogAccessToken(opts, forceRefreshToken)), pstrdup("Accept: application/json"), pstrdup("Content-Type: application/json")); } @@ -847,11 +883,11 @@ PostHeadersWithAuth(RestCatalogConnectionInfo * conn) * Creates the headers for a DELETE request with authentication. */ List * -DeleteHeadersWithAuth(RestCatalogConnectionInfo * conn) +DeleteHeadersWithAuth(RestCatalogOptions * opts) { bool forceRefreshToken = false; - return list_make1(psprintf("Authorization: Bearer %s", GetRestCatalogAccessToken(conn, forceRefreshToken))); + return list_make1(psprintf("Authorization: Bearer %s", GetRestCatalogAccessToken(opts, forceRefreshToken))); } @@ -860,11 +896,11 @@ DeleteHeadersWithAuth(RestCatalogConnectionInfo * conn) * Creates the headers for a GET request with authentication. */ static List * -GetHeadersWithAuth(RestCatalogConnectionInfo * conn) +GetHeadersWithAuth(RestCatalogOptions * opts) { bool forceRefreshToken = false; - return list_make2(psprintf("Authorization: Bearer %s", GetRestCatalogAccessToken(conn, forceRefreshToken)), + return list_make2(psprintf("Authorization: Bearer %s", GetRestCatalogAccessToken(opts, forceRefreshToken)), pstrdup("Accept: application/json")); } @@ -909,10 +945,10 @@ ReportHTTPError(HttpResult httpResult, int level) * Build a cache key for the per-server token cache. */ static void -BuildTokenCacheKey(char *key, const RestCatalogConnectionInfo *conn) +BuildTokenCacheKey(char *key, const RestCatalogOptions * opts) { - Assert(conn->serverName != NULL); - strlcpy(key, conn->serverName, TOKEN_CACHE_KEY_LEN); + Assert(opts->serverName != NULL); + strlcpy(key, opts->serverName, TOKEN_CACHE_KEY_LEN); } @@ -943,17 +979,17 @@ InitTokenCacheIfNeeded(void) /* -* Gets an access token from rest catalog. Caches the token per server -* (keyed by host + clientId) until it is about to expire. -*/ + * Gets an access token from rest catalog. Caches the token per server + * (keyed by server name) until it is about to expire. + */ static char * -GetRestCatalogAccessToken(RestCatalogConnectionInfo * conn, bool forceRefreshToken) +GetRestCatalogAccessToken(RestCatalogOptions * opts, bool forceRefreshToken) { InitTokenCacheIfNeeded(); char cacheKey[TOKEN_CACHE_KEY_LEN]; - BuildTokenCacheKey(cacheKey, conn); + BuildTokenCacheKey(cacheKey, opts); bool found = false; RestCatalogTokenCacheEntry *entry = @@ -979,15 +1015,16 @@ GetRestCatalogAccessToken(RestCatalogConnectionInfo * conn, bool forceRefreshTok { pfree(entry->accessToken); entry->accessToken = NULL; + entry->accessTokenExpiry = 0; } char *accessToken = NULL; int expiresIn = 0; - FetchRestCatalogAccessToken(conn, &accessToken, &expiresIn); + FetchRestCatalogAccessToken(opts, &accessToken, &expiresIn); entry->accessToken = MemoryContextStrdup(RestTokenCacheCtx, accessToken); - entry->accessTokenExpiry = now + (int64_t) expiresIn * 1000000; /* expiresIn is in + entry->accessTokenExpiry = now + (int64_t) expiresIn * 1000000; /* expiresIn is in * seconds */ } @@ -998,55 +1035,55 @@ GetRestCatalogAccessToken(RestCatalogConnectionInfo * conn, bool forceRefreshTok /* -* Fetches an access token from rest catalog using the given connection info. +* Fetches an access token from rest catalog using the given options. */ static void -FetchRestCatalogAccessToken(RestCatalogConnectionInfo * conn, char **accessToken, int *expiresIn) +FetchRestCatalogAccessToken(RestCatalogOptions * opts, char **accessToken, int *expiresIn) { - if (!conn->host || !*conn->host) + if (!opts->host || !*opts->host) ereport(ERROR, (errmsg("REST catalog host is not configured"), errhint("Set the \"rest_endpoint\" option on the server " "or the pg_lake_iceberg.rest_catalog_host GUC."))); - if (!conn->clientSecret || !*conn->clientSecret) + if (!opts->clientSecret || !*opts->clientSecret) ereport(ERROR, (errmsg("REST catalog client_secret is not configured"), errhint("Set the \"client_secret\" option on the server " "or the pg_lake_iceberg.rest_catalog_client_secret GUC."))); - char *accessTokenUrl = conn->oauthHostPath; + char *accessTokenUrl = opts->oauthHostPath; /* * if oauthHostPath is not set, use Polaris' default oauth token endpoint */ if (!accessTokenUrl || *accessTokenUrl == '\0') - accessTokenUrl = psprintf(REST_CATALOG_AUTH_TOKEN_PATH, conn->host); + accessTokenUrl = psprintf(REST_CATALOG_AUTH_TOKEN_PATH, opts->host); /* Form-encoded body */ StringInfoData body; initStringInfo(&body); appendStringInfo(&body, "grant_type=client_credentials&scope=%s", - URLEncodePath(conn->scope)); + URLEncodePath(opts->scope)); /* Headers */ List *headers = NIL; - if (conn->authType == REST_CATALOG_AUTH_TYPE_HORIZON) + if (opts->authType == REST_CATALOG_AUTH_TYPE_HORIZON) { /* Put secret in body (ignore client ID) */ - appendStringInfo(&body, "&client_secret=%s", URLEncodePath(conn->clientSecret)); + appendStringInfo(&body, "&client_secret=%s", URLEncodePath(opts->clientSecret)); } else { - if (!conn->clientId || !*conn->clientId) + if (!opts->clientId || !*opts->clientId) ereport(ERROR, (errmsg("REST catalog client_id is not configured"), errhint("Set the \"client_id\" option on the server " "or the pg_lake_iceberg.rest_catalog_client_id GUC."))); /* Build Authorization: Basic */ - char *encodedAuth = EncodeBasicAuth(conn->clientId, conn->clientSecret); + char *encodedAuth = EncodeBasicAuth(opts->clientId, opts->clientSecret); char *authHeader = psprintf("Authorization: Basic %s", encodedAuth); headers = lappend(headers, authHeader); @@ -1054,10 +1091,10 @@ FetchRestCatalogAccessToken(RestCatalogConnectionInfo * conn, char **accessToken headers = lappend(headers, "Content-Type: application/x-www-form-urlencoded"); - /* POST */ + /* POST — pass NULL opts to skip 419 token refresh (avoids recursion) */ HttpResult httpResponse = SendRequestToRestCatalog(HTTP_POST, accessTokenUrl, - body.data, headers, - conn->serverName); + body.data, headers, + NULL); if (httpResponse.status != 200) ereport(ERROR, @@ -1570,12 +1607,13 @@ ClassifyRestCatalogRequestRetry(long status, int maxRetry, int retryNo) * so normally we wouldn't want any errors to happen, but then * Postgres already prevents post-commit backends to receive signals. * - * The serverName is used by the retry callback to invalidate only the - * matching token cache entry on a 419 (token expired) response. + * When opts is non-NULL the retry callback can force-refresh the + * access token and patch the Authorization header on a 419 response. + * Pass opts = NULL for the token-fetch request itself to avoid recursion. */ HttpResult -SendRequestToRestCatalog(HttpMethod method, const char *url, const char *body, - List *headers, const char *serverName) +SendRequestToRestCatalog(RestCatalogOptions * opts, HttpMethod method, const char *url, + const char *body, List *headers) { const int MAX_HTTP_RETRY_FOR_REST_CATALOG = 3; @@ -1603,7 +1641,7 @@ SendRequestToRestCatalog(HttpMethod method, const char *url, const char *body, * new token. */ bool forceRefreshToken = true; - char *freshToken = GetRestCatalogAccessToken(forceRefreshToken); + char *freshToken = GetRestCatalogAccessToken(opts, forceRefreshToken); UpdateAuthorizationHeader(headers, freshToken); continue; diff --git a/pg_lake_iceberg/src/test/rest_catalog.c b/pg_lake_iceberg/src/test/rest_catalog.c index 939d93de..e5422515 100644 --- a/pg_lake_iceberg/src/test/rest_catalog.c +++ b/pg_lake_iceberg/src/test/rest_catalog.c @@ -37,8 +37,8 @@ register_namespace_to_rest_catalog(PG_FUNCTION_ARGS) char *catalogName = text_to_cstring(PG_GETARG_TEXT_P(0)); char *namespaceName = text_to_cstring(PG_GETARG_TEXT_P(1)); - RestCatalogConnectionInfo *conn = GetRestCatalogConnectionFromServer(REST_CATALOG_NAME); + RestCatalogOptions *opts = GetRestCatalogOptionsFromServer(REST_CATALOG_NAME); - RegisterNamespaceToRestCatalog(conn, catalogName, namespaceName); + RegisterNamespaceToRestCatalog(opts, catalogName, namespaceName); PG_RETURN_VOID(); } diff --git a/pg_lake_iceberg/src/test/test_http_client.c b/pg_lake_iceberg/src/test/test_http_client.c index 185a77ac..bf55dc61 100644 --- a/pg_lake_iceberg/src/test/test_http_client.c +++ b/pg_lake_iceberg/src/test/test_http_client.c @@ -34,7 +34,7 @@ PG_FUNCTION_INFO_V1(test_http_with_retry); static Datum build_http_result(FunctionCallInfo fcinfo, const HttpResult * r); static List *extract_headers(FunctionCallInfo fcinfo, int argno); -static bool TestShouldRetryRequestToRestCatalog(long status, int maxRetry, int retryNo); +static bool TestShouldRetryRequestToRestCatalog(long status, int maxRetry, int retryNo, void *context, List *headers); Datum @@ -132,7 +132,8 @@ test_http_with_retry(PG_FUNCTION_ARGS) HttpResult r = SendHttpRequestWithRetry(method, url, body, headers, TestShouldRetryRequestToRestCatalog, - MAX_HTTP_RETRY_FOR_REST_CATALOG); + MAX_HTTP_RETRY_FOR_REST_CATALOG, + NULL); PG_RETURN_DATUM(build_http_result(fcinfo, &r)); } @@ -192,7 +193,8 @@ build_http_result(FunctionCallInfo fcinfo, const HttpResult * r) * retries until maxRetry is reached. */ static bool -TestShouldRetryRequestToRestCatalog(long status, int maxRetry, int retryNo) +TestShouldRetryRequestToRestCatalog(long status, int maxRetry, int retryNo, + void *context, List *headers) { if (retryNo > maxRetry) return false; diff --git a/pg_lake_table/src/ddl/create_table.c b/pg_lake_table/src/ddl/create_table.c index a8c443fa..82a2c637 100644 --- a/pg_lake_table/src/ddl/create_table.c +++ b/pg_lake_table/src/ddl/create_table.c @@ -766,13 +766,13 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) if (hasRestCatalogOption && hasExternalCatalogReadOnlyOption) { char *catalogOptionValue = GetStringOption(createStmt->options, "catalog", false); - RestCatalogConnectionInfo *conn = - GetRestCatalogConnectionFromServer(catalogOptionValue); + RestCatalogOptions *opts = + GetRestCatalogOptionsFromServer(catalogOptionValue); - ErrorIfRestNamespaceDoesNotExist(conn, catalogName, catalogNamespace); + ErrorIfRestNamespaceDoesNotExist(opts, catalogName, catalogNamespace); metadataLocation = - GetMetadataLocationFromRestCatalog(conn, catalogName, catalogNamespace, catalogTableName); + GetMetadataLocationFromRestCatalog(opts, catalogName, catalogNamespace, catalogTableName); } else if (hasObjectStoreCatalogOption && hasExternalCatalogReadOnlyOption) { @@ -884,6 +884,18 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) "tables"); } + if (hasRestCatalogOption && locationOption == NULL && + !HasReadOnlyOption(createStmt->options)) + { + char *catalogOptionValue = + GetStringOption(createStmt->options, "catalog", false); + RestCatalogOptions *opts = + GetRestCatalogOptionsFromServer(catalogOptionValue); + + if (opts->locationPrefix != NULL) + defaultLocationPrefix = opts->locationPrefix; + } + /* * We will set the location by using the default location prefix when user * does not specify the location but already set default locatipn prefix. @@ -978,10 +990,10 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) * etc., but here we need to do it early before the table is created. */ char *catalogOptionValue = GetStringOption(createStmt->options, "catalog", false); - RestCatalogConnectionInfo *conn = - GetRestCatalogConnectionFromServer(catalogOptionValue); + RestCatalogOptions *opts = + GetRestCatalogOptionsFromServer(catalogOptionValue); - RegisterNamespaceToRestCatalog(conn, get_database_name(MyDatabaseId), + RegisterNamespaceToRestCatalog(opts, get_database_name(MyDatabaseId), get_namespace_name(namespaceId)); } diff --git a/pg_lake_table/src/init.c b/pg_lake_table/src/init.c index f7d9199b..e71d3acf 100644 --- a/pg_lake_table/src/init.c +++ b/pg_lake_table/src/init.c @@ -383,6 +383,7 @@ _PG_init(void) MarkGUCPrefixReserved(PG_LAKE_TABLE); + RegisterUtilityStatementHandler(RequireRestTypeForIcebergCatalogServer, NULL); RegisterUtilityStatementHandler(BlockDDLOnExtensionCatalogs, NULL); RegisterUtilityStatementHandler(ProcessVacuumPgLakeTable, NULL); RegisterUtilityStatementHandler(ProcessCreatePgLakeTable, NULL); diff --git a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c index e9d612fc..7d233024 100644 --- a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c +++ b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c @@ -76,9 +76,6 @@ typedef struct RestCatalogRequestPerTable char *tableRestUrl; char *tableIdentifier; - /* Per-table REST catalog connection info for multi-server support */ - RestCatalogConnectionInfo *conn; - RestCatalogRequest *createTableRequest; RestCatalogRequest *dropTableRequest; @@ -124,10 +121,19 @@ static HTAB *TrackedIcebergMetadataOperationsHash = NULL; */ static HTAB *RestCatalogRequestsHash = NULL; - /* some pre-allocated memory so we don't palloc() ever in XACT_COMMIT */ static MemoryContext PgLakeXactCommitContext = NULL; +/* + * Resolved REST catalog options for the current transaction, deep-copied into + * TopTransactionContext in RecordRestCatalogRequestInTx (when syscache is still + * accessible) because PostAllRestCatalogRequests runs at XACT_EVENT_COMMIT, + * where syscache lookups are forbidden. Only one REST catalog server is allowed + * per transaction. + */ +static RestCatalogOptions *PgLakeXactRestCatalogOpts = NULL; + + /* * TrackIcebergMetadataChangesInTx tracks metadata changes for a given relation * within a transaction. It acquires the necessary locks before applying the changes @@ -219,6 +225,7 @@ ResetRestCatalogRequests(void) { RestCatalogRequestsHash = NULL; PgLakeXactCommitContext = NULL; + PgLakeXactRestCatalogOpts = NULL; } @@ -241,6 +248,8 @@ PostAllRestCatalogRequests(void) */ MemoryContext oldContext = MemoryContextSwitchTo(PgLakeXactCommitContext); + Assert(PgLakeXactRestCatalogOpts != NULL); + /* * We need to iterate over the RestCatalogRequestsHash twice: 1. First, we * need to post the create table requests to create the iceberg tables in @@ -287,8 +296,8 @@ PostAllRestCatalogRequests(void) HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, requestPerTable->tableRestUrl, createTableRequest->body, - PostHeadersWithAuth(requestPerTable->conn), - requestPerTable->conn->serverName); + PostHeadersWithAuth(PgLakeXactRestCatalogOpts), + PgLakeXactRestCatalogOpts); if (httpResult.status != 200) { @@ -305,8 +314,8 @@ PostAllRestCatalogRequests(void) HttpResult httpResult = SendRequestToRestCatalog(HTTP_DELETE, requestPerTable->tableRestUrl, NULL, - DeleteHeadersWithAuth(requestPerTable->conn), - requestPerTable->conn->serverName); + DeleteHeadersWithAuth(PgLakeXactRestCatalogOpts), + PgLakeXactRestCatalogOpts); if (httpResult.status != 204) { @@ -327,14 +336,16 @@ PostAllRestCatalogRequests(void) /* * Now that all create table requests have been posted, we can post all - * the other modifications. We group modifications by REST catalog server - * (identified by host) so that each server gets its own transaction - * commit request for atomicity. - * - * We do two passes: first collect tables that have modifications, then - * group by server host and send one batch per server. + * the other modifications. All table modifications are sent in a single + * HTTP request to ensure atomicity. */ - List *tablesWithModifications = NIL; + char *catalogName = NULL; + bool hasRestCatalogChanges = false; + StringInfo batchRequestBody = makeStringInfo(); + + appendStringInfo(batchRequestBody, "{"); /* start msg body */ + appendJsonKey(batchRequestBody, "table-changes"); + appendStringInfo(batchRequestBody, "["); /* start array of changes */ hash_seq_init(&status, RestCatalogRequestsHash); @@ -351,6 +362,9 @@ PostAllRestCatalogRequests(void) continue; } + /* TODO: can we ever have multiple catalogs? */ + catalogName = requestPerTable->catalogName; + if (requestPerTable->createTableRequest != NULL && requestPerTable->dropTableRequest != NULL) { @@ -368,94 +382,67 @@ PostAllRestCatalogRequests(void) continue; } - tablesWithModifications = lappend(tablesWithModifications, requestPerTable); - } + if (hasRestCatalogChanges) + { + appendStringInfoChar(batchRequestBody, ','); /* separate previous + * table change */ + } - /* - * Group by (host, catalogName) and send one batch per group. The - * transaction commit URL includes the catalog prefix, so tables under - * different catalog names need separate commits even when the host is - * the same. - */ - while (list_length(tablesWithModifications) > 0) - { - RestCatalogRequestPerTable *firstTable = - (RestCatalogRequestPerTable *) linitial(tablesWithModifications); + appendStringInfoChar(batchRequestBody, '{'); /* start per-table json + * object */ + appendJsonKey(batchRequestBody, "identifier"); + appendStringInfo(batchRequestBody, "%s", requestPerTable->tableIdentifier); + appendStringInfoChar(batchRequestBody, ','); + appendStringInfoString(batchRequestBody, "\"requirements\":[],"); + appendStringInfoString(batchRequestBody, " \"updates\":["); - char *batchHost = firstTable->conn->host; - char *catalogName = firstTable->catalogName; - RestCatalogConnectionInfo *batchConn = firstTable->conn; - bool hasChanges = false; - StringInfo batchRequestBody = makeStringInfo(); + ListCell *requestCell = NULL; - appendStringInfoChar(batchRequestBody, '{'); - appendJsonKey(batchRequestBody, "table-changes"); - appendStringInfoChar(batchRequestBody, '['); + foreach(requestCell, requestPerTable->tableModifyRequests) + { + RestCatalogRequest *request = (RestCatalogRequest *) lfirst(requestCell); - List *remaining = NIL; - ListCell *lc; + appendStringInfoString(batchRequestBody, request->body); - foreach(lc, tablesWithModifications) - { - requestPerTable = (RestCatalogRequestPerTable *) lfirst(lc); + bool lastRequest = (requestCell == list_tail(requestPerTable->tableModifyRequests)); - if (strcmp(requestPerTable->conn->host, batchHost) != 0 || - strcmp(requestPerTable->catalogName, catalogName) != 0) + if (!lastRequest) { - remaining = lappend(remaining, requestPerTable); - continue; - } - - if (hasChanges) appendStringInfoChar(batchRequestBody, ','); + } - appendStringInfoChar(batchRequestBody, '{'); - appendJsonKey(batchRequestBody, "identifier"); - appendStringInfo(batchRequestBody, "%s", requestPerTable->tableIdentifier); - appendStringInfoChar(batchRequestBody, ','); - appendStringInfoString(batchRequestBody, "\"requirements\":[],"); - appendStringInfoString(batchRequestBody, " \"updates\":["); - - ListCell *requestCell = NULL; - - foreach(requestCell, requestPerTable->tableModifyRequests) + if (message_level_is_interesting(DEBUG2)) { - RestCatalogRequest *request = (RestCatalogRequest *) lfirst(requestCell); + elog(DEBUG2, "REST Catalog Request Body size reached: %d bytes", + batchRequestBody->len); + } + } - appendStringInfoString(batchRequestBody, request->body); + appendStringInfoChar(batchRequestBody, ']'); /* close updates array */ + appendStringInfoChar(batchRequestBody, '}'); /* close per-table json + * object */ - if (requestCell != list_tail(requestPerTable->tableModifyRequests)) - appendStringInfoChar(batchRequestBody, ','); + /* + * We have at least one change to send for this table + */ + hasRestCatalogChanges = true; + } - if (message_level_is_interesting(DEBUG2)) - { - elog(DEBUG2, "REST Catalog Request Body size reached: %d bytes", - batchRequestBody->len); - } - } + if (hasRestCatalogChanges) + { + appendStringInfoChar(batchRequestBody, ']'); /* close table-changes */ + appendStringInfoChar(batchRequestBody, '}'); /* close json body */ - appendStringInfoChar(batchRequestBody, ']'); - appendStringInfoChar(batchRequestBody, '}'); - hasChanges = true; - } + char *url = psprintf(REST_CATALOG_TRANSACTION_COMMIT, + PgLakeXactRestCatalogOpts->host, catalogName); + HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, url, batchRequestBody->data, + PostHeadersWithAuth(PgLakeXactRestCatalogOpts), + PgLakeXactRestCatalogOpts); - if (hasChanges) + if (httpResult.status != 204) { - appendStringInfoChar(batchRequestBody, ']'); - appendStringInfoChar(batchRequestBody, '}'); - - char *url = psprintf(REST_CATALOG_TRANSACTION_COMMIT, batchConn->host, catalogName); - HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, url, batchRequestBody->data, - PostHeadersWithAuth(batchConn), - batchConn->serverName); - - if (httpResult.status != 204) - { - ReportHTTPError(httpResult, WARNING); - } + ReportHTTPError(httpResult, WARNING); } - - tablesWithModifications = remaining; } /* @@ -644,24 +631,39 @@ RecordRestCatalogRequestInTx(Oid relationId, RestCatalogOperationType operationT memset(requestPerTable, 0, sizeof(RestCatalogRequestPerTable)); requestPerTable->relationId = relationId; - /* Resolve per-table REST catalog connection info */ - RestCatalogConnectionInfo *conn = GetRestCatalogConnectionForRelation(relationId); - RestCatalogConnectionInfo *persistConn = - MemoryContextAlloc(TopTransactionContext, sizeof(RestCatalogConnectionInfo)); - - memcpy(persistConn, conn, sizeof(RestCatalogConnectionInfo)); - if (conn->serverName) - persistConn->serverName = MemoryContextStrdup(TopTransactionContext, conn->serverName); - persistConn->host = MemoryContextStrdup(TopTransactionContext, conn->host); - if (conn->oauthHostPath) - persistConn->oauthHostPath = MemoryContextStrdup(TopTransactionContext, conn->oauthHostPath); - if (conn->clientId) - persistConn->clientId = MemoryContextStrdup(TopTransactionContext, conn->clientId); - if (conn->clientSecret) - persistConn->clientSecret = MemoryContextStrdup(TopTransactionContext, conn->clientSecret); - if (conn->scope) - persistConn->scope = MemoryContextStrdup(TopTransactionContext, conn->scope); - requestPerTable->conn = persistConn; + /* Resolve the options for this relation's REST catalog */ + RestCatalogOptions *resolvedOpts = GetRestCatalogOptionsForRelation(relationId); + + if (PgLakeXactRestCatalogOpts == NULL) + { + /* + * Deep-copy opts into TopTransactionContext so the struct and its + * string fields survive until XACT_EVENT_COMMIT. + */ + MemoryContext oldctx = MemoryContextSwitchTo(TopTransactionContext); + + PgLakeXactRestCatalogOpts = palloc0(sizeof(RestCatalogOptions)); + PgLakeXactRestCatalogOpts->serverName = pstrdup(resolvedOpts->serverName); + PgLakeXactRestCatalogOpts->host = pstrdup(resolvedOpts->host); + PgLakeXactRestCatalogOpts->oauthHostPath = resolvedOpts->oauthHostPath ? pstrdup(resolvedOpts->oauthHostPath) : NULL; + PgLakeXactRestCatalogOpts->clientId = resolvedOpts->clientId ? pstrdup(resolvedOpts->clientId) : NULL; + PgLakeXactRestCatalogOpts->clientSecret = resolvedOpts->clientSecret ? pstrdup(resolvedOpts->clientSecret) : NULL; + PgLakeXactRestCatalogOpts->scope = resolvedOpts->scope ? pstrdup(resolvedOpts->scope) : NULL; + PgLakeXactRestCatalogOpts->locationPrefix = resolvedOpts->locationPrefix ? pstrdup(resolvedOpts->locationPrefix) : NULL; + PgLakeXactRestCatalogOpts->authType = resolvedOpts->authType; + PgLakeXactRestCatalogOpts->enableVendedCredentials = resolvedOpts->enableVendedCredentials; + + MemoryContextSwitchTo(oldctx); + } + else if (strcmp(PgLakeXactRestCatalogOpts->serverName, resolvedOpts->serverName) != 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot modify tables from different REST catalogs " + "in the same transaction"), + errdetail("This transaction already targets catalog server " + "\"%s\", but table %u belongs to \"%s\".", + PgLakeXactRestCatalogOpts->serverName, relationId, + resolvedOpts->serverName))); requestPerTable->catalogName = MemoryContextStrdup(TopTransactionContext, GetRestCatalogName(relationId)); @@ -679,7 +681,7 @@ RecordRestCatalogRequestInTx(Oid relationId, RestCatalogOperationType operationT requestPerTable->tableRestUrl = MemoryContextStrdup(TopTransactionContext, psprintf(REST_CATALOG_TABLE, - persistConn->host, + resolvedOpts->host, requestPerTable->urlEncodedCatalogName, requestPerTable->urlEncodedCatalogNamespace, requestPerTable->urlEncodedCatalogTableName)); diff --git a/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py index 1a2afa19..66c2f2b5 100644 --- a/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py +++ b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py @@ -61,21 +61,23 @@ def test_precreated_rest_server(pg_conn, extension): def test_create_rest_server_with_all_options(superuser_conn, extension): - """All documented options should be accepted for a REST-type server.""" + """All documented options should be accepted for a REST-type server. + Uses a mix of quoted upper-case and plain lower-case option names to + verify case-insensitive matching.""" run_command( """ CREATE SERVER test_rest_all_opts TYPE 'rest' FOREIGN DATA WRAPPER iceberg_catalog OPTIONS ( - rest_endpoint 'http://localhost:8181', - rest_auth_type 'oauth2', - oauth_endpoint 'http://localhost:8181/oauth/tokens', + "Rest_Endpoint" 'http://localhost:8181', + "REST_AUTH_TYPE" 'OAuth2', + "OAuth_Endpoint" 'http://localhost:8181/oauth/tokens', scope 'PRINCIPAL_ROLE:ALL', enable_vended_credentials 'true', - location_prefix 's3://bucket/prefix', + "Location_Prefix" 's3://bucket/prefix', catalog_name 'my_catalog', - client_id 'test-id', - client_secret 'test-secret' + "Client_Id" 'test-id', + "Client_Secret" 'test-secret' ) """, superuser_conn, @@ -96,19 +98,6 @@ def test_create_rest_server_minimal(superuser_conn, extension): superuser_conn.rollback() -def test_create_server_without_type(superuser_conn, extension): - """A server without TYPE should be accepted (defaults to rest).""" - run_command( - """ - CREATE SERVER test_no_type - FOREIGN DATA WRAPPER iceberg_catalog - OPTIONS (rest_endpoint 'http://localhost:8181') - """, - superuser_conn, - ) - superuser_conn.rollback() - - def test_create_server_horizon_auth(superuser_conn, extension): """Horizon auth type should be accepted.""" run_command( @@ -390,42 +379,6 @@ def test_non_iceberg_catalog_server_rejected( pg_conn.rollback() -def test_server_without_type_treated_as_rest( - pg_conn, superuser_conn, s3, extension, with_default_location -): - """A server without explicit TYPE should default to rest catalog behavior.""" - run_command( - """ - CREATE SERVER test_no_type_srv - FOREIGN DATA WRAPPER iceberg_catalog - OPTIONS ( - rest_endpoint 'http://localhost:8181', - client_id 'id', - client_secret 'secret' - ) - """, - superuser_conn, - ) - superuser_conn.commit() - - err = run_command( - """ - CREATE TABLE test_no_type_tbl () - USING iceberg - WITH (catalog = 'test_no_type_srv', read_only = 'true', - catalog_namespace = 'ns', catalog_table_name = 'tbl') - """, - pg_conn, - raise_error=False, - ) - # Should be treated as REST, not rejected as invalid catalog - assert "invalid catalog option" not in str(err) - pg_conn.rollback() - - run_command("DROP SERVER test_no_type_srv CASCADE", superuser_conn) - superuser_conn.commit() - - # ── Backward compatibility ───────────────────────────────────────────────── @@ -518,6 +471,36 @@ def test_reject_create_server_type_object_store(superuser_conn, extension): superuser_conn.rollback() +def test_reject_create_server_non_rest_type(superuser_conn, extension): + """Any TYPE other than 'rest' is rejected for user-created iceberg_catalog servers.""" + err = run_command( + """ + CREATE SERVER my_server TYPE 'something_else' + FOREIGN DATA WRAPPER iceberg_catalog + """, + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "iceberg_catalog server requires TYPE 'rest'" in str(err) + superuser_conn.rollback() + + +def test_reject_create_server_without_type(superuser_conn, extension): + """CREATE SERVER without TYPE is rejected for iceberg_catalog servers.""" + err = run_command( + """ + CREATE SERVER my_server + FOREIGN DATA WRAPPER iceberg_catalog + """, + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "iceberg_catalog server requires TYPE 'rest'" in str(err) + superuser_conn.rollback() + + def test_reject_create_server_reserved_name(superuser_conn, extension): """CREATE SERVER with a reserved catalog name (case-insensitive) is blocked.""" reserved_names = [ @@ -767,3 +750,86 @@ def test_allow_owner_change_user_created_server(superuser_conn, extension): superuser_conn, ) superuser_conn.rollback() + + +# ── default_catalog GUC with user-created servers ────────────────────────── + + +def test_set_default_catalog_to_user_created_rest_server(superuser_conn, extension): + """SET pg_lake_iceberg.default_catalog should accept a user-created + iceberg_catalog server with TYPE 'rest'.""" + run_command( + """ + CREATE SERVER my_rest_catalog TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + + run_command( + "SET pg_lake_iceberg.default_catalog = 'my_rest_catalog'", + superuser_conn, + ) + + result = run_query( + "SHOW pg_lake_iceberg.default_catalog", + superuser_conn, + ) + assert result[0]["pg_lake_iceberg.default_catalog"] == "my_rest_catalog" + superuser_conn.rollback() + + +def test_set_default_catalog_rejects_nonexistent_server(pg_conn, extension): + """SET pg_lake_iceberg.default_catalog should reject a name that is + neither a built-in literal nor an existing server.""" + err = run_command( + "SET pg_lake_iceberg.default_catalog = 'no_such_server'", + pg_conn, + raise_error=False, + ) + assert err is not None + assert "user-created iceberg_catalog server" in str(err) + pg_conn.rollback() + + +# ── Case-sensitive server names ──────────────────────────────────────────── + + +def test_case_sensitive_server_names(superuser_conn, extension): + """Server names are case-sensitive: "test_cs" and "TEST_CS" are distinct + servers that can coexist with different options.""" + run_command( + """ + CREATE SERVER test_cs TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://host-lower:8181') + """, + superuser_conn, + ) + run_command( + """ + CREATE SERVER "TEST_CS" TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://host-upper:8181') + """, + superuser_conn, + ) + + lower_opts = run_query( + "SELECT srvoptions FROM pg_foreign_server WHERE srvname = 'test_cs'", + superuser_conn, + ) + upper_opts = run_query( + "SELECT srvoptions FROM pg_foreign_server WHERE srvname = 'TEST_CS'", + superuser_conn, + ) + + assert len(lower_opts) == 1 + assert len(upper_opts) == 1 + assert "host-lower" in str(lower_opts[0]["srvoptions"]) + assert "host-upper" in str(upper_opts[0]["srvoptions"]) + + run_command("DROP SERVER test_cs", superuser_conn) + run_command('DROP SERVER "TEST_CS"', superuser_conn) + superuser_conn.rollback() diff --git a/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py b/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py index 62ad6ee6..627b2ffe 100644 --- a/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py +++ b/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py @@ -567,130 +567,171 @@ def get_rest_table_metadata_location(encoded_namespace, encoded_table_name, pg_c return metadata["metadata"]["location"] -def test_multi_table_different_rest_catalog_hosts_in_single_transaction( +def test_server_location_prefix_overrides_guc( installcheck, superuser_conn, pg_conn, s3, extension, - with_default_location, polaris_session, create_http_helper_functions, ): """ - Tables from two REST catalog servers with different hosts are modified - in the same transaction. PostAllRestCatalogRequests groups modifications - by conn->host, so using 'localhost' vs '127.0.0.1' (same Polaris, different - host strings) produces two separate batch commit requests. + When a REST catalog server has a location_prefix option, tables must use + that prefix for their storage location. We verify this by setting the + GUC to a broken S3 bucket. """ if installcheck: return - server_a = "multi_host_catalog_a" - server_b = "multi_host_catalog_b" - table_a = "multi_host_tx_a" - table_b = "multi_host_tx_b" - ns = TABLE_NAMESPACE + "_multi_host" - - _create_polaris_catalog_server(superuser_conn, server_a, "localhost") - _create_polaris_catalog_server(superuser_conn, server_b, "127.0.0.1") - superuser_conn.commit() + BROKEN_PREFIX = "s3://nonexistent-broken-bucket-xyz" + VALID_PREFIX = f"s3://{TEST_BUCKET}/" + SERVER_NAME = "rest_catalog_loc_prefix" + SCHEMA_NAME = TABLE_NAMESPACE + TABLE_NAME = "loc_prefix_test" - run_command(f"CREATE SCHEMA IF NOT EXISTS {ns}", pg_conn) - pg_conn.commit() + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" run_command( - f"CREATE TABLE {ns}.{table_a} (id bigint, value text) USING iceberg WITH (catalog='{server_a}')", + f"SET pg_lake_iceberg.default_location_prefix TO '{BROKEN_PREFIX}'", pg_conn, ) pg_conn.commit() run_command( - f"CREATE TABLE {ns}.{table_b} (id bigint, value text) USING iceberg WITH (catalog='{server_b}')", - pg_conn, + f""" + CREATE SERVER {SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}', + location_prefix '{VALID_PREFIX}') + """, + superuser_conn, ) + superuser_conn.commit() + + run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", pg_conn) pg_conn.commit() - # Insert into both tables (different hosts) within a single transaction - run_command( - f"INSERT INTO {ns}.{table_a} SELECT i, 'a' FROM generate_series(1, 50) i", - pg_conn, - ) run_command( - f"INSERT INTO {ns}.{table_b} SELECT i, 'b' FROM generate_series(1, 30) i", + f"CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME} (id bigint, value text) " + f"USING iceberg WITH (catalog='{SERVER_NAME}')", pg_conn, ) pg_conn.commit() - results_a = run_query(f"SELECT count(*) FROM {ns}.{table_a}", pg_conn) - assert results_a[0][0] == 50 - - results_b = run_query(f"SELECT count(*) FROM {ns}.{table_b}", pg_conn) - assert results_b[0][0] == 30 - - # Mixed DML across different hosts in a single transaction - run_command( - f"INSERT INTO {ns}.{table_a} SELECT i, 'a2' FROM generate_series(51, 70) i", - pg_conn, - ) run_command( - f"DELETE FROM {ns}.{table_b} WHERE id <= 10", + f"INSERT INTO {SCHEMA_NAME}.{TABLE_NAME} " + f"SELECT i, i::text FROM generate_series(1, 10) i", pg_conn, ) pg_conn.commit() - results_a = run_query(f"SELECT count(*) FROM {ns}.{table_a}", pg_conn) - assert results_a[0][0] == 70 + results = run_query(f"SELECT count(*) FROM {SCHEMA_NAME}.{TABLE_NAME}", pg_conn) + assert results[0][0] == 10 + + table_location = get_rest_table_metadata_location(SCHEMA_NAME, TABLE_NAME, pg_conn) + stripped_prefix = VALID_PREFIX.rstrip("/") + assert table_location.startswith(stripped_prefix), ( + f"Expected location to start with server prefix '{stripped_prefix}', " + f"got '{table_location}'" + ) + assert BROKEN_PREFIX not in table_location + assert ( + "//" not in table_location.split("://", 1)[1] + ), f"Double slash found in location path: '{table_location}'" - results_b = run_query(f"SELECT count(*) FROM {ns}.{table_b}", pg_conn) - assert results_b[0][0] == 20 + run_command_outside_tx([f"VACUUM {SCHEMA_NAME}.{TABLE_NAME}"]) - # UPDATE on both hosts in a single transaction run_command( - f"UPDATE {ns}.{table_a} SET value = 'updated_a' WHERE id <= 5", + f"ALTER TABLE {SCHEMA_NAME}.{TABLE_NAME} ADD COLUMN extra int", pg_conn, ) + pg_conn.commit() + run_command( - f"UPDATE {ns}.{table_b} SET value = 'updated_b' WHERE id > 20", + f"INSERT INTO {SCHEMA_NAME}.{TABLE_NAME} " + f"SELECT i, i::text, i FROM generate_series(11, 20) i", pg_conn, ) pg_conn.commit() - results_a = run_query( - f"SELECT count(*) FROM {ns}.{table_a} WHERE value = 'updated_a'", pg_conn - ) - assert results_a[0][0] == 5 - - results_b = run_query( - f"SELECT count(*) FROM {ns}.{table_b} WHERE value = 'updated_b'", pg_conn - ) - assert results_b[0][0] == 10 + results = run_query(f"SELECT count(*) FROM {SCHEMA_NAME}.{TABLE_NAME}", pg_conn) + assert results[0][0] == 20 - # Cleanup pg_conn.rollback() - run_command(f"DROP SCHEMA {ns} CASCADE", pg_conn) + run_command(f"DROP SCHEMA {SCHEMA_NAME} CASCADE", pg_conn) pg_conn.commit() - run_command(f"DROP SERVER {server_a}", superuser_conn) - run_command(f"DROP SERVER {server_b}", superuser_conn) + + superuser_conn.rollback() + run_command(f"DROP SERVER {SERVER_NAME}", superuser_conn) superuser_conn.commit() + run_command("RESET pg_lake_iceberg.default_location_prefix", pg_conn) + pg_conn.commit() + + +def test_reject_modify_different_rest_catalogs_in_single_transaction( + installcheck, + superuser_conn, + pg_conn, + s3, + extension, + with_default_location, + polaris_session, + create_http_helper_functions, +): + """ + Modifying tables from two different REST catalog servers in the same + transaction must be rejected. + """ + if installcheck: + return -def _create_polaris_catalog_server(conn, server_name, hostname): - """Create an iceberg_catalog server pointing to the Polaris instance via the given hostname.""" creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) client_id = creds["credentials"]["clientId"] client_secret = creds["credentials"]["clientSecret"] - endpoint = f"http://{hostname}:{server_params.POLARIS_PORT}" + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + + for name in ["rest_catalog_a", "rest_catalog_b"]: + run_command( + f""" + CREATE SERVER {name} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}') + """, + superuser_conn, + ) + superuser_conn.commit() + + run_command(f"CREATE SCHEMA IF NOT EXISTS {TABLE_NAMESPACE}", pg_conn) + pg_conn.commit() + + for name, catalog in [("table_a", "rest_catalog_a"), ("table_b", "rest_catalog_b")]: + run_command( + f"CREATE TABLE {TABLE_NAMESPACE}.{name} (id bigint) USING iceberg WITH (catalog='{catalog}')", + pg_conn, + ) + pg_conn.commit() run_command( - f""" - CREATE SERVER {server_name} TYPE 'rest' - FOREIGN DATA WRAPPER iceberg_catalog - OPTIONS ( - rest_endpoint '{endpoint}', - client_id '{client_id}', - client_secret '{client_secret}' - ) - """, - conn, + f"INSERT INTO {TABLE_NAMESPACE}.table_a SELECT i FROM generate_series(1, 10) i", + pg_conn, + ) + run_command( + f"INSERT INTO {TABLE_NAMESPACE}.table_b SELECT i FROM generate_series(1, 10) i", + pg_conn, ) + + with pytest.raises( + psycopg2.errors.FeatureNotSupported, match="different REST catalogs" + ): + pg_conn.commit() + + pg_conn.rollback() From 6edcfe06e4157a0a2b051ee29e32230a091f953c Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Tue, 24 Mar 2026 15:03:11 +0300 Subject: [PATCH 13/23] Treat postgres, object_store, and rest as built-in names, not servers Don't create foreign servers for the reserved catalog names in the extension install script. These names ('postgres', 'object_store', 'rest') could already be in use in the target database (especially 'postgres'), causing the extension script to fail. Instead, treat them as special built-in identifiers whose configuration comes entirely from GUC settings. Only user-created catalogs (via CREATE SERVER ... FOREIGN DATA WRAPPER iceberg_catalog) result in actual foreign server objects. Key changes: - Remove CREATE SERVER postgres/object_store/rest from the upgrade SQL. - GetRestCatalogOptionsFromCatalog now returns GUC-only opts for the built-in 'rest' name without looking up a foreign server. - Simplify BlockDDLOnExtensionCatalogs: only guard CREATE SERVER (reserved names, TYPE postgres/object_store) and RENAME TO reserved names. ALTER/DROP/OWNER on built-in names fail naturally since no server object exists. - Remove IsIcebergCatalogServer (no longer needed). - Rename RestCatalogOptions.serverName to .catalog and GetRestCatalogOptionsFromServer to GetRestCatalogOptionsFromCatalog to reflect the new semantics. - Update tests Signed-off-by: sfc-gh-npuka --- pg_lake_engine/src/utils/catalog_type.c | 10 +- .../pg_lake/rest_catalog/rest_catalog.h | 16 +- pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql | 13 - .../src/rest_catalog/rest_catalog.c | 237 ++++++------------ pg_lake_iceberg/src/test/rest_catalog.c | 2 +- pg_lake_table/src/ddl/create_table.c | 6 +- .../track_iceberg_metadata_changes.c | 9 +- .../pytests/test_iceberg_catalog_server.py | 204 +-------------- 8 files changed, 118 insertions(+), 379 deletions(-) diff --git a/pg_lake_engine/src/utils/catalog_type.c b/pg_lake_engine/src/utils/catalog_type.c index 7e7ea8cc..75bce280 100644 --- a/pg_lake_engine/src/utils/catalog_type.c +++ b/pg_lake_engine/src/utils/catalog_type.c @@ -108,8 +108,8 @@ HasReadOnlyOption(List *options) /* - * IsCatalogOwnedByExtension returns true if the catalog name matches one of - * the extension-owned names: 'rest', 'object_store', or 'postgres'. + * IsCatalogOwnedByExtension returns true if the catalog name is one of + * the reserved built-in names: 'rest', 'object_store', or 'postgres'. * Comparison is case-insensitive so that "Postgres", "REST", etc. are * also recognized as reserved. */ @@ -124,7 +124,7 @@ IsCatalogOwnedByExtension(const char *catalog) /* * IsRestCatalog returns true if the catalog name identifies a REST catalog. - * This includes the extension-owned 'rest' literal and any user-created + * This includes the built-in 'rest' literal and any user-created * iceberg_catalog server whose TYPE is 'rest'. */ bool @@ -148,6 +148,6 @@ IsRestCatalog(const char *catalog) if (strcmp(fdw->fdwname, ICEBERG_CATALOG_FDW_NAME) != 0) return false; - Assert(server->servertype != NULL && *server->servertype != '\0'); - return pg_strcasecmp(server->servertype, REST_CATALOG_NAME) == 0; + Assert(pg_strcasecmp(server->servertype, REST_CATALOG_NAME) == 0); + return true; } diff --git a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h index d60dcb6f..3e21bdeb 100644 --- a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h +++ b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h @@ -36,13 +36,17 @@ extern int RestCatalogAuthType; extern bool RestCatalogEnableVendedCredentials; /* - * Holds per-server REST catalog options. Populated from the server options - * of an iceberg_catalog ForeignServer, with GUC fallback for any option - * not explicitly set on the server. + * Resolved REST catalog connection options. For the built-in 'rest' + * catalog the fields come entirely from GUC settings. For user-created + * catalogs (CREATE SERVER ... FOREIGN DATA WRAPPER iceberg_catalog) the + * server options override the GUC defaults. */ typedef struct RestCatalogOptions { - char *serverName; /* server name, used for token cache keying */ + char *catalog; /* catalog name, used for token cache keying; + * can be 'rest' or a user-created server name + * of TYPE 'rest' + */ char *host; char *oauthHostPath; char *clientId; @@ -96,8 +100,8 @@ typedef struct RestCatalogRequest #define REST_CATALOG_AUTH_TOKEN_PATH "%s/api/catalog/v1/oauth/tokens" #define GET_REST_CATALOG_METADATA_LOCATION "%s/api/catalog/v1/%s/namespaces/%s/tables/%s" -/* Connection info resolution */ -extern PGDLLEXPORT RestCatalogOptions * GetRestCatalogOptionsFromServer(const char *serverName); +/* Catalog options resolution */ +extern PGDLLEXPORT RestCatalogOptions * GetRestCatalogOptionsFromCatalog(const char *catalog); extern PGDLLEXPORT RestCatalogOptions * GetRestCatalogOptionsForRelation(Oid relationId); extern PGDLLEXPORT void RegisterNamespaceToRestCatalog(RestCatalogOptions * opts, const char *catalogName, const char *namespaceName); diff --git a/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql b/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql index 2e229943..70829978 100644 --- a/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql +++ b/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql @@ -38,16 +38,3 @@ LANGUAGE C STRICT; CREATE FOREIGN DATA WRAPPER iceberg_catalog NO HANDLER VALIDATOR lake_iceberg.iceberg_catalog_validator; - -/* Pre-created catalog servers for backward compatibility */ -CREATE SERVER postgres - TYPE 'postgres' - FOREIGN DATA WRAPPER iceberg_catalog; - -CREATE SERVER object_store - TYPE 'object_store' - FOREIGN DATA WRAPPER iceberg_catalog; - -CREATE SERVER rest - TYPE 'rest' - FOREIGN DATA WRAPPER iceberg_catalog; diff --git a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c index 18300c46..8f979dea 100644 --- a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c +++ b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c @@ -61,7 +61,7 @@ int RestCatalogAuthType = REST_CATALOG_AUTH_TYPE_OAUTH2; bool RestCatalogEnableVendedCredentials = true; /* - * Per-server token cache. Keyed by server name. + * Per-catalog token cache. Keyed by catalog. */ #define TOKEN_CACHE_KEY_LEN NAMEDATALEN @@ -190,35 +190,16 @@ iceberg_catalog_validator(PG_FUNCTION_ARGS) /* - * IsIcebergCatalogServer returns true if the named server exists and - * uses the iceberg_catalog FDW. - */ -static bool -IsIcebergCatalogServer(const char *serverName) -{ - ForeignServer *server = GetForeignServerByName(serverName, true); - - if (server == NULL) - return false; - - ForeignDataWrapper *fdw = GetForeignDataWrapper(server->fdwid); - - return strcmp(fdw->fdwname, ICEBERG_CATALOG_FDW_NAME) == 0; -} - - -/* - * BlockDDLOnExtensionCatalogs guards the extension-owned - * iceberg_catalog servers (postgres, object_store, rest) against - * unauthorized DDL. + * BlockDDLOnExtensionCatalogs prevents misuse of the reserved catalog + * names 'postgres', 'object_store', and 'rest'. These are built-in + * identifiers (not actual foreign servers), so we block: + * + * - CREATE SERVER with a reserved name. + * - CREATE SERVER with TYPE 'postgres' or 'object_store'. + * - RENAME TO a reserved name. * - * Rules (outside of CREATE/ALTER EXTENSION): - * - CREATE SERVER with TYPE 'postgres' or 'object_store' is blocked. - * - ALTER SERVER on 'postgres' or 'object_store' is blocked. - * - ALTER SERVER on 'rest' is allowed (users may set options). - * - DROP SERVER on 'postgres', 'object_store', or 'rest' is blocked. - * - ALTER ... RENAME on 'postgres', 'object_store', or 'rest' is blocked. - * - ALTER ... OWNER TO on 'postgres', 'object_store', or 'rest' is blocked. + * ALTER/DROP/OWNER on these names will fail naturally because no + * server object exists. */ bool BlockDDLOnExtensionCatalogs(ProcessUtilityParams * processUtilityParams, @@ -251,47 +232,10 @@ BlockDDLOnExtensionCatalogs(ProcessUtilityParams * processUtilityParams, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot create iceberg_catalog server with TYPE '%s'", stmt->servertype), - errhint("Use the pre-created \"%s\" or \"%s\" server, " + errhint("Use the built-in \"%s\" or \"%s\" catalogs, " "or create a server of type 'rest'.", POSTGRES_CATALOG_NAME, OBJECT_STORE_CATALOG_NAME))); } - else if (IsA(parsetree, AlterForeignServerStmt)) - { - AlterForeignServerStmt *stmt = (AlterForeignServerStmt *) parsetree; - - if (!IsIcebergCatalogServer(stmt->servername)) - return false; - - if (pg_strcasecmp(stmt->servername, POSTGRES_CATALOG_NAME) == 0 || - pg_strcasecmp(stmt->servername, OBJECT_STORE_CATALOG_NAME) == 0) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot alter the extension-owned \"%s\" catalog server", - stmt->servername))); - } - else if (IsA(parsetree, DropStmt)) - { - DropStmt *stmt = (DropStmt *) parsetree; - - if (stmt->removeType != OBJECT_FOREIGN_SERVER) - return false; - - ListCell *lc; - - foreach(lc, stmt->objects) - { - char *serverName = strVal(lfirst(lc)); - - if (!IsIcebergCatalogServer(serverName)) - continue; - - if (IsCatalogOwnedByExtension(serverName)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot drop the extension-owned \"%s\" catalog server", - serverName))); - } - } else if (IsA(parsetree, RenameStmt)) { RenameStmt *stmt = (RenameStmt *) parsetree; @@ -299,17 +243,6 @@ BlockDDLOnExtensionCatalogs(ProcessUtilityParams * processUtilityParams, if (stmt->renameType != OBJECT_FOREIGN_SERVER) return false; - char *serverName = strVal(stmt->object); - - if (!IsIcebergCatalogServer(serverName)) - return false; - - if (IsCatalogOwnedByExtension(serverName)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot rename the extension-owned \"%s\" catalog server", - serverName))); - if (IsCatalogOwnedByExtension(stmt->newname)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), @@ -317,24 +250,6 @@ BlockDDLOnExtensionCatalogs(ProcessUtilityParams * processUtilityParams, stmt->newname), errhint("Choose a different server name."))); } - else if (IsA(parsetree, AlterOwnerStmt)) - { - AlterOwnerStmt *stmt = (AlterOwnerStmt *) parsetree; - - if (stmt->objectType != OBJECT_FOREIGN_SERVER) - return false; - - char *serverName = strVal(stmt->object); - - if (!IsIcebergCatalogServer(serverName)) - return false; - - if (IsCatalogOwnedByExtension(serverName)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot change owner of the extension-owned \"%s\" catalog server", - serverName))); - } return false; } @@ -366,38 +281,34 @@ RequireRestTypeForIcebergCatalogServer(ProcessUtilityParams * processUtilityPara pg_strcasecmp(stmt->servertype, REST_CATALOG_NAME) != 0) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("iceberg_catalog server requires TYPE 'rest'"), - errhint("Specify TYPE 'rest' when creating an " - "iceberg_catalog server."))); + errmsg("iceberg_catalog server requires TYPE 'rest'"))); return false; } /* - * GetRestCatalogOptionsFromServer returns a RestCatalogOptions - * populated from the options of the named ForeignServer. GUC values are - * used as defaults; any option explicitly set on the server overrides the - * corresponding GUC. This applies to both the extension-owned 'rest' - * server and user-created iceberg_catalog servers. + * GetRestCatalogOptionsFromCatalog returns a RestCatalogOptions struct. + * For the built-in 'rest' catalog name the GUCs are used directly. + * For user-created servers, the GUCs serve as defaults, + * overridden by any option set on the server. */ RestCatalogOptions * -GetRestCatalogOptionsFromServer(const char *serverName) +GetRestCatalogOptionsFromCatalog(const char *catalog) { - /* Normalize case-insensitive match to the canonical pre-created name */ - if (pg_strcasecmp(serverName, REST_CATALOG_NAME) == 0) - serverName = REST_CATALOG_NAME; - - ForeignServer *server = GetForeignServerByName(serverName, false); - ForeignDataWrapper *fdw = GetForeignDataWrapper(server->fdwid); - - Assert(strcmp(fdw->fdwname, ICEBERG_CATALOG_FDW_NAME) == 0); - RestCatalogOptions *opts = palloc0(sizeof(RestCatalogOptions)); - opts->serverName = pstrdup(serverName); + /* + * Normalize built-in catalog name to the canonical constant so that + * case variations (e.g. 'REST', 'rEst') compare equal with strcmp. + * User-created server names are case-sensitive and stored as-is. + */ + if (pg_strcasecmp(catalog, REST_CATALOG_NAME) == 0) + opts->catalog = pstrdup(REST_CATALOG_NAME); + else + opts->catalog = pstrdup(catalog); - /* GUC values serve as defaults; server options override below */ + /* GUC values serve as defaults */ opts->host = RestCatalogHost; opts->oauthHostPath = RestCatalogOauthHostPath; opts->clientId = RestCatalogClientId; @@ -407,54 +318,68 @@ GetRestCatalogOptionsFromServer(const char *serverName) opts->enableVendedCredentials = RestCatalogEnableVendedCredentials; opts->locationPrefix = GetIcebergDefaultLocationPrefix(); - ListCell *lc; - - foreach(lc, server->options) + /* + * The built-in 'rest' name uses GUCs exclusively. + * For user-created servers, look up server options and + * override the GUC defaults. + */ + if (pg_strcasecmp(catalog, REST_CATALOG_NAME) != 0) { - DefElem *def = (DefElem *) lfirst(lc); - - if (pg_strcasecmp(def->defname, "rest_endpoint") == 0) - opts->host = defGetString(def); - else if (pg_strcasecmp(def->defname, "client_id") == 0) - opts->clientId = defGetString(def); - else if (pg_strcasecmp(def->defname, "client_secret") == 0) - opts->clientSecret = defGetString(def); - else if (pg_strcasecmp(def->defname, "scope") == 0) - opts->scope = defGetString(def); - else if (pg_strcasecmp(def->defname, "rest_auth_type") == 0) - { - char *authType = defGetString(def); + ForeignServer *server = GetForeignServerByName(catalog, false); + ForeignDataWrapper *fdw = GetForeignDataWrapper(server->fdwid); - opts->authType = (pg_strcasecmp(authType, "horizon") == 0) - ? REST_CATALOG_AUTH_TYPE_HORIZON - : REST_CATALOG_AUTH_TYPE_OAUTH2; - } - else if (pg_strcasecmp(def->defname, "oauth_endpoint") == 0) - opts->oauthHostPath = defGetString(def); - else if (pg_strcasecmp(def->defname, "enable_vended_credentials") == 0) - opts->enableVendedCredentials = defGetBoolean(def); - else if (pg_strcasecmp(def->defname, "location_prefix") == 0) + Assert(strcmp(fdw->fdwname, ICEBERG_CATALOG_FDW_NAME) == 0); + + ListCell *lc; + + foreach(lc, server->options) { - bool inPlace = false; - opts->locationPrefix = StripTrailingSlash(defGetString(def), inPlace); + DefElem *def = (DefElem *) lfirst(lc); + + if (pg_strcasecmp(def->defname, "rest_endpoint") == 0) + opts->host = defGetString(def); + else if (pg_strcasecmp(def->defname, "client_id") == 0) + opts->clientId = defGetString(def); + else if (pg_strcasecmp(def->defname, "client_secret") == 0) + opts->clientSecret = defGetString(def); + else if (pg_strcasecmp(def->defname, "scope") == 0) + opts->scope = defGetString(def); + else if (pg_strcasecmp(def->defname, "rest_auth_type") == 0) + { + char *authType = defGetString(def); + + opts->authType = (pg_strcasecmp(authType, "horizon") == 0) + ? REST_CATALOG_AUTH_TYPE_HORIZON + : REST_CATALOG_AUTH_TYPE_OAUTH2; + } + else if (pg_strcasecmp(def->defname, "oauth_endpoint") == 0) + opts->oauthHostPath = defGetString(def); + else if (pg_strcasecmp(def->defname, "enable_vended_credentials") == 0) + opts->enableVendedCredentials = defGetBoolean(def); + else if (pg_strcasecmp(def->defname, "location_prefix") == 0) + { + bool inPlace = false; + opts->locationPrefix = StripTrailingSlash(defGetString(def), inPlace); + } } } if (opts->host == NULL || opts->host[0] == '\0') ereport(ERROR, (errcode(ERRCODE_FDW_OPTION_NAME_NOT_FOUND), - errmsg("\"rest_endpoint\" option is required for iceberg_catalog server \"%s\"", - serverName))); + errmsg("\"rest_endpoint\" is not configured for REST catalog \"%s\"", + catalog), + errhint("Set the pg_lake_iceberg.rest_catalog_host GUC or " + "the \"rest_endpoint\" option on the server."))); return opts; } /* - * GetRestCatalogOptionsForRelation returns the REST catalog options - * info for the given relation. The catalog option value is used as the - * server name. For the extension-owned 'rest' server and user-created - * servers alike, server options are read first with GUC fallback. + * GetRestCatalogOptionsForRelation returns the REST catalog options for + * the given relation. The catalog option value is used as the server + * name (or built-in 'rest' literal). */ RestCatalogOptions * GetRestCatalogOptionsForRelation(Oid relationId) @@ -467,7 +392,7 @@ GetRestCatalogOptionsForRelation(Oid relationId) (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("catalog option is not set for relation %u", relationId))); - return GetRestCatalogOptionsFromServer(catalog); + return GetRestCatalogOptionsFromCatalog(catalog); } @@ -942,18 +867,18 @@ ReportHTTPError(HttpResult httpResult, int level) /* - * Build a cache key for the per-server token cache. + * Build a cache key for the per-catalog token cache. */ static void BuildTokenCacheKey(char *key, const RestCatalogOptions * opts) { - Assert(opts->serverName != NULL); - strlcpy(key, opts->serverName, TOKEN_CACHE_KEY_LEN); + Assert(opts->catalog != NULL); + strlcpy(key, opts->catalog, TOKEN_CACHE_KEY_LEN); } /* - * Initialize the per-server token cache hash table if needed. + * Initialize the per-catalog token cache hash table if needed. */ static void InitTokenCacheIfNeeded(void) @@ -979,8 +904,8 @@ InitTokenCacheIfNeeded(void) /* - * Gets an access token from rest catalog. Caches the token per server - * (keyed by server name) until it is about to expire. + * Gets an access token from rest catalog. Caches the token per catalog + * (keyed by catalog) until it is about to expire. */ static char * GetRestCatalogAccessToken(RestCatalogOptions * opts, bool forceRefreshToken) diff --git a/pg_lake_iceberg/src/test/rest_catalog.c b/pg_lake_iceberg/src/test/rest_catalog.c index e5422515..069213a2 100644 --- a/pg_lake_iceberg/src/test/rest_catalog.c +++ b/pg_lake_iceberg/src/test/rest_catalog.c @@ -37,7 +37,7 @@ register_namespace_to_rest_catalog(PG_FUNCTION_ARGS) char *catalogName = text_to_cstring(PG_GETARG_TEXT_P(0)); char *namespaceName = text_to_cstring(PG_GETARG_TEXT_P(1)); - RestCatalogOptions *opts = GetRestCatalogOptionsFromServer(REST_CATALOG_NAME); + RestCatalogOptions *opts = GetRestCatalogOptionsFromCatalog(REST_CATALOG_NAME); RegisterNamespaceToRestCatalog(opts, catalogName, namespaceName); PG_RETURN_VOID(); diff --git a/pg_lake_table/src/ddl/create_table.c b/pg_lake_table/src/ddl/create_table.c index 82a2c637..8dbbad2b 100644 --- a/pg_lake_table/src/ddl/create_table.c +++ b/pg_lake_table/src/ddl/create_table.c @@ -767,7 +767,7 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) { char *catalogOptionValue = GetStringOption(createStmt->options, "catalog", false); RestCatalogOptions *opts = - GetRestCatalogOptionsFromServer(catalogOptionValue); + GetRestCatalogOptionsFromCatalog(catalogOptionValue); ErrorIfRestNamespaceDoesNotExist(opts, catalogName, catalogNamespace); @@ -890,7 +890,7 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) char *catalogOptionValue = GetStringOption(createStmt->options, "catalog", false); RestCatalogOptions *opts = - GetRestCatalogOptionsFromServer(catalogOptionValue); + GetRestCatalogOptionsFromCatalog(catalogOptionValue); if (opts->locationPrefix != NULL) defaultLocationPrefix = opts->locationPrefix; @@ -991,7 +991,7 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) */ char *catalogOptionValue = GetStringOption(createStmt->options, "catalog", false); RestCatalogOptions *opts = - GetRestCatalogOptionsFromServer(catalogOptionValue); + GetRestCatalogOptionsFromCatalog(catalogOptionValue); RegisterNamespaceToRestCatalog(opts, get_database_name(MyDatabaseId), get_namespace_name(namespaceId)); diff --git a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c index 7d233024..4c43a431 100644 --- a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c +++ b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c @@ -121,6 +121,7 @@ static HTAB *TrackedIcebergMetadataOperationsHash = NULL; */ static HTAB *RestCatalogRequestsHash = NULL; + /* some pre-allocated memory so we don't palloc() ever in XACT_COMMIT */ static MemoryContext PgLakeXactCommitContext = NULL; @@ -643,7 +644,7 @@ RecordRestCatalogRequestInTx(Oid relationId, RestCatalogOperationType operationT MemoryContext oldctx = MemoryContextSwitchTo(TopTransactionContext); PgLakeXactRestCatalogOpts = palloc0(sizeof(RestCatalogOptions)); - PgLakeXactRestCatalogOpts->serverName = pstrdup(resolvedOpts->serverName); + PgLakeXactRestCatalogOpts->catalog = pstrdup(resolvedOpts->catalog); PgLakeXactRestCatalogOpts->host = pstrdup(resolvedOpts->host); PgLakeXactRestCatalogOpts->oauthHostPath = resolvedOpts->oauthHostPath ? pstrdup(resolvedOpts->oauthHostPath) : NULL; PgLakeXactRestCatalogOpts->clientId = resolvedOpts->clientId ? pstrdup(resolvedOpts->clientId) : NULL; @@ -655,15 +656,15 @@ RecordRestCatalogRequestInTx(Oid relationId, RestCatalogOperationType operationT MemoryContextSwitchTo(oldctx); } - else if (strcmp(PgLakeXactRestCatalogOpts->serverName, resolvedOpts->serverName) != 0) + else if (strcmp(PgLakeXactRestCatalogOpts->catalog, resolvedOpts->catalog) != 0) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot modify tables from different REST catalogs " "in the same transaction"), errdetail("This transaction already targets catalog server " "\"%s\", but table %u belongs to \"%s\".", - PgLakeXactRestCatalogOpts->serverName, relationId, - resolvedOpts->serverName))); + PgLakeXactRestCatalogOpts->catalog, relationId, + resolvedOpts->catalog))); requestPerTable->catalogName = MemoryContextStrdup(TopTransactionContext, GetRestCatalogName(relationId)); diff --git a/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py index 66c2f2b5..7fcc086d 100644 --- a/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py +++ b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py @@ -2,7 +2,7 @@ from utils_pytest import * -# ── FDW and pre-created servers ──────────────────────────────────────────── +# ── FDW --------------------------─────────────────────────────────────────- def test_iceberg_catalog_fdw_exists(pg_conn, extension): @@ -24,39 +24,6 @@ def test_iceberg_catalog_fdw_has_no_handler(pg_conn, extension): assert result[0]["fdwhandler"] == 0 -def test_precreated_postgres_server(pg_conn, extension): - """A 'postgres' server of TYPE 'postgres' should be pre-created.""" - result = run_query( - "SELECT srvname, srvtype FROM pg_foreign_server WHERE srvname = 'postgres'", - pg_conn, - ) - assert len(result) == 1 - assert result[0]["srvname"] == "postgres" - assert result[0]["srvtype"] == "postgres" - - -def test_precreated_object_store_server(pg_conn, extension): - """An 'object_store' server of TYPE 'object_store' should be pre-created.""" - result = run_query( - "SELECT srvname, srvtype FROM pg_foreign_server WHERE srvname = 'object_store'", - pg_conn, - ) - assert len(result) == 1 - assert result[0]["srvname"] == "object_store" - assert result[0]["srvtype"] == "object_store" - - -def test_precreated_rest_server(pg_conn, extension): - """A 'rest' server of TYPE 'rest' should be pre-created.""" - result = run_query( - "SELECT srvname, srvtype FROM pg_foreign_server WHERE srvname = 'rest'", - pg_conn, - ) - assert len(result) == 1 - assert result[0]["srvname"] == "rest" - assert result[0]["srvtype"] == "rest" - - # ── CREATE SERVER with valid options ─────────────────────────────────────── @@ -184,10 +151,18 @@ def test_reject_create_foreign_table_on_iceberg_catalog_server( superuser_conn, extension ): """CREATE FOREIGN TABLE on an iceberg_catalog server is blocked.""" + run_command( + """ + CREATE SERVER test_ft_srv TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) err = run_command( """ - CREATE FOREIGN TABLE test_ft_pg (id int) - SERVER postgres + CREATE FOREIGN TABLE test_ft_tbl (id int) + SERVER test_ft_srv """, superuser_conn, raise_error=False, @@ -438,7 +413,7 @@ def test_catalog_object_store_literal_still_works( pg_conn.rollback() -# ── Protection of extension-owned catalog servers ───────────────────────── +# ── Protection of reserved catalog names ─────────────────────────────────── def test_reject_create_server_type_postgres(superuser_conn, extension): @@ -518,119 +493,10 @@ def test_reject_create_server_reserved_name(superuser_conn, extension): raise_error=False, ) assert err is not None, f"Expected error for reserved name '{name}'" - assert "reserved for the extension-owned catalog" in str(err) + assert "is reserved" in str(err) superuser_conn.rollback() -def test_reject_alter_postgres_server(superuser_conn, extension): - """ALTER SERVER on the extension-owned 'postgres' server is blocked.""" - err = run_command( - "ALTER SERVER postgres OPTIONS (ADD location_prefix 's3://bucket')", - superuser_conn, - raise_error=False, - ) - assert err is not None - assert 'cannot alter the extension-owned "postgres" catalog server' in str(err) - superuser_conn.rollback() - - -def test_reject_alter_object_store_server(superuser_conn, extension): - """ALTER SERVER on the extension-owned 'object_store' server is blocked.""" - err = run_command( - "ALTER SERVER object_store OPTIONS (ADD location_prefix 's3://bucket')", - superuser_conn, - raise_error=False, - ) - assert err is not None - assert 'cannot alter the extension-owned "object_store" catalog server' in str(err) - superuser_conn.rollback() - - -def test_allow_alter_rest_server(superuser_conn, extension): - """ALTER SERVER on the extension-owned 'rest' server is allowed.""" - run_command( - "ALTER SERVER rest OPTIONS (ADD rest_endpoint 'http://localhost:8181')", - superuser_conn, - ) - run_command( - "ALTER SERVER rest OPTIONS (DROP rest_endpoint)", - superuser_conn, - ) - superuser_conn.rollback() - - -def test_reject_drop_postgres_server(superuser_conn, extension): - """DROP SERVER on the extension-owned 'postgres' server is blocked.""" - err = run_command( - "DROP SERVER postgres", - superuser_conn, - raise_error=False, - ) - assert err is not None - assert 'cannot drop the extension-owned "postgres" catalog server' in str(err) - superuser_conn.rollback() - - -def test_reject_drop_object_store_server(superuser_conn, extension): - """DROP SERVER on the extension-owned 'object_store' server is blocked.""" - err = run_command( - "DROP SERVER object_store", - superuser_conn, - raise_error=False, - ) - assert err is not None - assert 'cannot drop the extension-owned "object_store" catalog server' in str(err) - superuser_conn.rollback() - - -def test_reject_drop_rest_server(superuser_conn, extension): - """DROP SERVER on the extension-owned 'rest' server is blocked.""" - err = run_command( - "DROP SERVER rest", - superuser_conn, - raise_error=False, - ) - assert err is not None - assert 'cannot drop the extension-owned "rest" catalog server' in str(err) - superuser_conn.rollback() - - -def test_reject_rename_postgres_server(superuser_conn, extension): - """RENAME on the extension-owned 'postgres' server is blocked.""" - err = run_command( - "ALTER SERVER postgres RENAME TO my_postgres", - superuser_conn, - raise_error=False, - ) - assert err is not None - assert 'cannot rename the extension-owned "postgres" catalog server' in str(err) - superuser_conn.rollback() - - -def test_reject_rename_object_store_server(superuser_conn, extension): - """RENAME on the extension-owned 'object_store' server is blocked.""" - err = run_command( - "ALTER SERVER object_store RENAME TO my_obj_store", - superuser_conn, - raise_error=False, - ) - assert err is not None - assert 'cannot rename the extension-owned "object_store" catalog server' in str(err) - superuser_conn.rollback() - - -def test_reject_rename_rest_server(superuser_conn, extension): - """RENAME on the extension-owned 'rest' server is blocked.""" - err = run_command( - "ALTER SERVER rest RENAME TO my_rest", - superuser_conn, - raise_error=False, - ) - assert err is not None - assert 'cannot rename the extension-owned "rest" catalog server' in str(err) - superuser_conn.rollback() - - def test_reject_rename_to_reserved_name(superuser_conn, extension): """Renaming a user-created server TO a reserved name is blocked.""" run_command( @@ -661,50 +527,6 @@ def test_reject_rename_to_reserved_name(superuser_conn, extension): superuser_conn.rollback() -def test_reject_owner_change_postgres_server(superuser_conn, extension): - """ALTER SERVER ... OWNER TO on the extension-owned 'postgres' server is blocked.""" - err = run_command( - "ALTER SERVER postgres OWNER TO CURRENT_USER", - superuser_conn, - raise_error=False, - ) - assert err is not None - assert ( - 'cannot change owner of the extension-owned "postgres" catalog server' - in str(err) - ) - superuser_conn.rollback() - - -def test_reject_owner_change_object_store_server(superuser_conn, extension): - """ALTER SERVER ... OWNER TO on the extension-owned 'object_store' server is blocked.""" - err = run_command( - "ALTER SERVER object_store OWNER TO CURRENT_USER", - superuser_conn, - raise_error=False, - ) - assert err is not None - assert ( - 'cannot change owner of the extension-owned "object_store" catalog server' - in str(err) - ) - superuser_conn.rollback() - - -def test_reject_owner_change_rest_server(superuser_conn, extension): - """ALTER SERVER ... OWNER TO on the extension-owned 'rest' server is blocked.""" - err = run_command( - "ALTER SERVER rest OWNER TO CURRENT_USER", - superuser_conn, - raise_error=False, - ) - assert err is not None - assert 'cannot change owner of the extension-owned "rest" catalog server' in str( - err - ) - superuser_conn.rollback() - - def test_allow_drop_user_created_server(superuser_conn, extension): """DROP SERVER on a user-created server should work fine.""" run_command( From 911135b2208192cf920ca8382430c7adf88eceb2 Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Tue, 24 Mar 2026 15:41:25 +0300 Subject: [PATCH 14/23] Use catalog_name server option for REST catalog tables The catalog_name option was accepted on iceberg_catalog servers but never consumed. Now GetRestCatalogOptionsFromCatalog reads it and stores it in RestCatalogOptions.catalogName. GetRestCatalogName uses a three-level fallback for the REST API catalog prefix: table option > server option > database name. This lets a server define a default catalog_name for all its tables while still allowing per-table overrides. - Add catalogName field to RestCatalogOptions. - Populate it from the server catalog_name option in GetRestCatalogOptionsFromCatalog. - Rewrite GetRestCatalogName to check table option first, then opts->catalogName. - Writable REST catalog tables cannot use a server with catalog_name set, since for now they inherit from the database name, schema name, and table name - Add tests Signed-off-by: sfc-gh-npuka --- .../pg_lake/rest_catalog/rest_catalog.h | 1 + .../src/rest_catalog/rest_catalog.c | 40 ++-- pg_lake_table/src/ddl/create_table.c | 24 ++- .../track_iceberg_metadata_changes.c | 1 + .../pytests/test_modify_iceberg_rest_table.py | 201 ++++++++++++++++++ 5 files changed, 242 insertions(+), 25 deletions(-) diff --git a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h index 3e21bdeb..4766a0d4 100644 --- a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h +++ b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h @@ -53,6 +53,7 @@ typedef struct RestCatalogOptions char *clientSecret; char *scope; char *locationPrefix; + char *catalogName; /* REST API catalog prefix; defaults to dbname */ int authType; bool enableVendedCredentials; } RestCatalogOptions; diff --git a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c index 8f979dea..107bf4c7 100644 --- a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c +++ b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c @@ -356,6 +356,8 @@ GetRestCatalogOptionsFromCatalog(const char *catalog) opts->oauthHostPath = defGetString(def); else if (pg_strcasecmp(def->defname, "enable_vended_credentials") == 0) opts->enableVendedCredentials = defGetBoolean(def); + else if (pg_strcasecmp(def->defname, "catalog_name") == 0) + opts->catalogName = defGetString(def); else if (pg_strcasecmp(def->defname, "location_prefix") == 0) { bool inPlace = false; @@ -1227,10 +1229,13 @@ GetRestCatalogNamespace(Oid relationId) /* -* Readable rest catalog tables always use the catalog_name option -* as the catalog name in the external catalog. Writable rest catalog tables -* use the current database name as the catalog name. -*/ + * Returns the catalog name to use for REST API calls. + * + * Precedence: table option catalog_name > server option catalog_name + * > current database name. + * + * Read-only tables must have catalog_name set (on the table or server). + */ char * GetRestCatalogName(Oid relationId) { @@ -1239,25 +1244,22 @@ GetRestCatalogName(Oid relationId) Assert(catalogType == REST_CATALOG_READ_ONLY || catalogType == REST_CATALOG_READ_WRITE); - if (catalogType == REST_CATALOG_READ_ONLY) - { - - Assert(GetIcebergCatalogType(relationId) == REST_CATALOG_READ_ONLY || - GetIcebergCatalogType(relationId) == REST_CATALOG_READ_WRITE); + ForeignTable *foreignTable = GetForeignTable(relationId); + char *catalogName = GetStringOption(foreignTable->options, "catalog_name", false); - ForeignTable *foreignTable = GetForeignTable(relationId); - List *options = foreignTable->options; + if (catalogName != NULL) + return catalogName; - char *catalogName = GetStringOption(options, "catalog_name", false); + RestCatalogOptions *opts = GetRestCatalogOptionsForRelation(relationId); - /* user provided the custom catalog name */ - if (!catalogName) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("catalog_name option is required for rest catalog iceberg tables"))); + if (opts->catalogName != NULL) + return opts->catalogName; - return catalogName; - } + if (catalogType == REST_CATALOG_READ_ONLY) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("catalog_name is required for read-only REST catalog tables"), + errhint("Set catalog_name on the table or the server."))); return get_database_name(MyDatabaseId); } diff --git a/pg_lake_table/src/ddl/create_table.c b/pg_lake_table/src/ddl/create_table.c index 8dbbad2b..0f449635 100644 --- a/pg_lake_table/src/ddl/create_table.c +++ b/pg_lake_table/src/ddl/create_table.c @@ -787,12 +787,11 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) if (!hasExternalCatalogReadOnlyOption) { /* - * For writable object store catalog tables, we need to continue - * with the regular iceberg table creation process. We only fill - * in the catalog options here. Other than that, we simply check - * if user provided any catalog options. That's not allowed, - * writable tables only inherit from the database name, schema - * name, and table name. + * Writable tables always derive catalog_name, catalog_namespace, + * and catalog_table_name from the database name, schema name, and + * table name. Explicit catalog options on the table are rejected, + * and the server must not have catalog_name set either, since that + * would conflict with the derived values. */ if (catalogNamespaceProvided != NULL || catalogTableNameProvided != NULL || @@ -802,6 +801,19 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) errmsg("writable %s catalog iceberg tables do not " "allow explicit catalog options", hasObjectStoreCatalogOption ? OBJECT_STORE_CATALOG_NAME : REST_CATALOG_NAME))); } + + if (hasRestCatalogOption) + { + char *catalogOptionValue = GetStringOption(createStmt->options, "catalog", false); + RestCatalogOptions *opts = + GetRestCatalogOptionsFromCatalog(catalogOptionValue); + + if (opts->catalogName != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("writable REST catalog tables cannot use a server " + "with catalog_name set"))); + } } else if (createStmt->base.tableElts == NIL && hasExternalCatalogReadOnlyOption) { diff --git a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c index 4c43a431..a4440a53 100644 --- a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c +++ b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c @@ -651,6 +651,7 @@ RecordRestCatalogRequestInTx(Oid relationId, RestCatalogOperationType operationT PgLakeXactRestCatalogOpts->clientSecret = resolvedOpts->clientSecret ? pstrdup(resolvedOpts->clientSecret) : NULL; PgLakeXactRestCatalogOpts->scope = resolvedOpts->scope ? pstrdup(resolvedOpts->scope) : NULL; PgLakeXactRestCatalogOpts->locationPrefix = resolvedOpts->locationPrefix ? pstrdup(resolvedOpts->locationPrefix) : NULL; + PgLakeXactRestCatalogOpts->catalogName = resolvedOpts->catalogName ? pstrdup(resolvedOpts->catalogName) : NULL; PgLakeXactRestCatalogOpts->authType = resolvedOpts->authType; PgLakeXactRestCatalogOpts->enableVendedCredentials = resolvedOpts->enableVendedCredentials; diff --git a/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py b/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py index 627b2ffe..c22c8bf1 100644 --- a/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py +++ b/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py @@ -735,3 +735,204 @@ def test_reject_modify_different_rest_catalogs_in_single_transaction( pg_conn.commit() pg_conn.rollback() + + +def test_reject_writable_table_on_server_with_catalog_name( + installcheck, + superuser_conn, + pg_conn, + s3, + extension, + with_default_location, + polaris_session, + create_http_helper_functions, +): + """ + Creating a writable table on a server that has catalog_name set must + be rejected, because writable tables always derive the catalog name + from the database name. + """ + if installcheck: + return + + SERVER_NAME = "rest_catalog_has_catname" + SCHEMA_NAME = TABLE_NAMESPACE + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + + run_command( + f""" + CREATE SERVER {SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}', + catalog_name '{server_params.PG_DATABASE}', + location_prefix 's3://{TEST_BUCKET}') + """, + superuser_conn, + ) + superuser_conn.commit() + + run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", pg_conn) + pg_conn.commit() + + err = run_command( + f"CREATE TABLE {SCHEMA_NAME}.reject_catname (id bigint) " + f"USING iceberg WITH (catalog='{SERVER_NAME}')", + pg_conn, + raise_error=False, + ) + assert err is not None + assert ( + "writable REST catalog tables cannot use a server with catalog_name set" + in str(err) + ) + pg_conn.rollback() + + superuser_conn.rollback() + run_command(f"DROP SERVER {SERVER_NAME}", superuser_conn) + superuser_conn.commit() + + +def test_server_catalog_name_overrides_default( + installcheck, + superuser_conn, + pg_conn, + s3, + extension, + polaris_session, + create_http_helper_functions, +): + """ + The server's catalog_name must override the default (database name). + We prove this by creating a server with a wrong catalog_name and + creating a read-only table that does not set catalog_name itself. + The REST metadata lookup should fail because it uses the server's + value, not the default database name. + """ + if installcheck: + return + + SERVER_NAME = "rest_catalog_wrong_name" + SCHEMA_NAME = TABLE_NAMESPACE + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + + run_command( + f""" + CREATE SERVER {SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}', + catalog_name 'nonexistent_catalog') + """, + superuser_conn, + ) + superuser_conn.commit() + + err = run_command( + f"CREATE TABLE {SCHEMA_NAME}.srv_catname_fail () " + f"USING iceberg WITH (catalog='{SERVER_NAME}', read_only='true')", + pg_conn, + raise_error=False, + ) + assert err is not None, ( + "Expected failure because server's catalog_name 'nonexistent_catalog' " + "should be used instead of the default database name" + ) + pg_conn.rollback() + + superuser_conn.rollback() + run_command(f"DROP SERVER {SERVER_NAME}", superuser_conn) + superuser_conn.commit() + + +def test_table_catalog_name_overrides_server( + installcheck, + superuser_conn, + pg_conn, + s3, + extension, + with_default_location, + polaris_session, + set_polaris_gucs, + create_http_helper_functions, +): + """ + The table-level catalog_name takes precedence over the server's. + We create a writable table via the built-in 'rest' catalog, then + create a read-only table via a server whose catalog_name is wrong, + overriding it on the table with the correct one. If the table + option did not take precedence, the metadata lookup would fail. + """ + if installcheck: + return + + SERVER_NAME = "rest_catname_bad" + SCHEMA_NAME = "test_catname_override" + SRC_TABLE = "catname_src" + RO_TABLE = "catname_ro" + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + + run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", pg_conn) + pg_conn.commit() + + run_command( + f"CREATE TABLE {SCHEMA_NAME}.{SRC_TABLE} (id bigint) " + f"USING iceberg WITH (catalog='rest')", + pg_conn, + ) + pg_conn.commit() + + run_command( + f"INSERT INTO {SCHEMA_NAME}.{SRC_TABLE} " + f"SELECT i FROM generate_series(1, 5) i", + pg_conn, + ) + pg_conn.commit() + + run_command( + f""" + CREATE SERVER {SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}', + catalog_name 'nonexistent_catalog') + """, + superuser_conn, + ) + superuser_conn.commit() + + run_command( + f"CREATE TABLE {SCHEMA_NAME}.{RO_TABLE} () " + f"USING iceberg WITH (catalog='{SERVER_NAME}', read_only='true', " + f"catalog_name='{server_params.PG_DATABASE}', " + f"catalog_namespace='{SCHEMA_NAME}', " + f"catalog_table_name='{SRC_TABLE}')", + pg_conn, + ) + pg_conn.commit() + + results = run_query(f"SELECT count(*) FROM {SCHEMA_NAME}.{RO_TABLE}", pg_conn) + assert results[0][0] == 5 + + pg_conn.rollback() + run_command(f"DROP SCHEMA {SCHEMA_NAME} CASCADE", pg_conn) + pg_conn.commit() + + superuser_conn.rollback() + run_command(f"DROP SERVER {SERVER_NAME}", superuser_conn) + superuser_conn.commit() From e83fd0b8e6cde1b1009d7dc1907f02175b8d1f3b Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Thu, 16 Apr 2026 14:05:33 +0200 Subject: [PATCH 15/23] Cleanup from rebase and fix style Signed-off-by: sfc-gh-npuka --- pg_lake_iceberg/include/pg_lake/http/http_client.h | 5 ++--- .../include/pg_lake/rest_catalog/rest_catalog.h | 5 ++--- pg_lake_iceberg/src/http/http_client.c | 5 ++--- pg_lake_iceberg/src/rest_catalog/rest_catalog.c | 14 +++++++------- pg_lake_iceberg/src/test/test_http_client.c | 8 +++----- pg_lake_table/src/ddl/create_table.c | 6 +++--- .../transaction/track_iceberg_metadata_changes.c | 2 +- 7 files changed, 20 insertions(+), 25 deletions(-) diff --git a/pg_lake_iceberg/include/pg_lake/http/http_client.h b/pg_lake_iceberg/include/pg_lake/http/http_client.h index 17a9b39f..8d9c3803 100644 --- a/pg_lake_iceberg/include/pg_lake/http/http_client.h +++ b/pg_lake_iceberg/include/pg_lake/http/http_client.h @@ -50,7 +50,7 @@ extern bool HttpClientTraceTraffic; #define HTTP_STATUS_SERVICE_UNAVAILABLE 503 /* Callback function to determine if a request should be retried */ -typedef bool (*HttpRetryFn) (long status, int maxRetry, int retryNo, void *context, List *headers); +typedef bool (*HttpRetryFn) (long status, int maxRetry, int retryNo); /* plain C API (no PostgreSQL types) */ extern PGDLLEXPORT HttpResult HttpGet(const char *url, List *headers); @@ -60,6 +60,5 @@ extern PGDLLEXPORT HttpResult HttpDelete(const char *url, List *headers); extern PGDLLEXPORT HttpResult HttpPut(const char *url, const char *body, List *headers); extern PGDLLEXPORT HttpResult SendHttpRequest(HttpMethod method, const char *url, const char *body, List *headers); extern PGDLLEXPORT HttpResult SendHttpRequestWithRetry(HttpMethod method, const char *url, const char *body, - List *headers, HttpRetryFn retryFn, int maxRetry, - void *retryContext); + List *headers, HttpRetryFn retryFn, int maxRetry); extern PGDLLEXPORT int LinearBackoffSleepMs(int baseMs, int retryNo); diff --git a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h index 4766a0d4..0eb4ff00 100644 --- a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h +++ b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h @@ -44,9 +44,8 @@ extern bool RestCatalogEnableVendedCredentials; typedef struct RestCatalogOptions { char *catalog; /* catalog name, used for token cache keying; - * can be 'rest' or a user-created server name - * of TYPE 'rest' - */ + * can be 'rest' or a user-created server name + * of TYPE 'rest' */ char *host; char *oauthHostPath; char *clientId; diff --git a/pg_lake_iceberg/src/http/http_client.c b/pg_lake_iceberg/src/http/http_client.c index 3f696ddc..64225a0f 100644 --- a/pg_lake_iceberg/src/http/http_client.c +++ b/pg_lake_iceberg/src/http/http_client.c @@ -276,8 +276,7 @@ CurlReturnError(CURL *curl, struct curl_slist *headerList, */ HttpResult SendHttpRequestWithRetry(HttpMethod method, const char *url, const char *body, - List *headers, HttpRetryFn retryFn, int maxRetry, - void *retryContext) + List *headers, HttpRetryFn retryFn, int maxRetry) { Assert(maxRetry > 0); @@ -287,7 +286,7 @@ SendHttpRequestWithRetry(HttpMethod method, const char *url, const char *body, { result = SendHttpRequest(method, url, body, headers); - if (retryFn != NULL && retryFn(result.status, maxRetry, retryNo, retryContext, headers)) + if (retryFn != NULL && retryFn(result.status, maxRetry, retryNo)) continue; else break; diff --git a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c index 107bf4c7..665ede7d 100644 --- a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c +++ b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c @@ -299,8 +299,8 @@ GetRestCatalogOptionsFromCatalog(const char *catalog) RestCatalogOptions *opts = palloc0(sizeof(RestCatalogOptions)); /* - * Normalize built-in catalog name to the canonical constant so that - * case variations (e.g. 'REST', 'rEst') compare equal with strcmp. + * Normalize built-in catalog name to the canonical constant so that case + * variations (e.g. 'REST', 'rEst') compare equal with strcmp. * User-created server names are case-sensitive and stored as-is. */ if (pg_strcasecmp(catalog, REST_CATALOG_NAME) == 0) @@ -319,9 +319,8 @@ GetRestCatalogOptionsFromCatalog(const char *catalog) opts->locationPrefix = GetIcebergDefaultLocationPrefix(); /* - * The built-in 'rest' name uses GUCs exclusively. - * For user-created servers, look up server options and - * override the GUC defaults. + * The built-in 'rest' name uses GUCs exclusively. For user-created + * servers, look up server options and override the GUC defaults. */ if (pg_strcasecmp(catalog, REST_CATALOG_NAME) != 0) { @@ -360,8 +359,9 @@ GetRestCatalogOptionsFromCatalog(const char *catalog) opts->catalogName = defGetString(def); else if (pg_strcasecmp(def->defname, "location_prefix") == 0) { - bool inPlace = false; - opts->locationPrefix = StripTrailingSlash(defGetString(def), inPlace); + bool inPlace = false; + + opts->locationPrefix = StripTrailingSlash(defGetString(def), inPlace); } } } diff --git a/pg_lake_iceberg/src/test/test_http_client.c b/pg_lake_iceberg/src/test/test_http_client.c index bf55dc61..185a77ac 100644 --- a/pg_lake_iceberg/src/test/test_http_client.c +++ b/pg_lake_iceberg/src/test/test_http_client.c @@ -34,7 +34,7 @@ PG_FUNCTION_INFO_V1(test_http_with_retry); static Datum build_http_result(FunctionCallInfo fcinfo, const HttpResult * r); static List *extract_headers(FunctionCallInfo fcinfo, int argno); -static bool TestShouldRetryRequestToRestCatalog(long status, int maxRetry, int retryNo, void *context, List *headers); +static bool TestShouldRetryRequestToRestCatalog(long status, int maxRetry, int retryNo); Datum @@ -132,8 +132,7 @@ test_http_with_retry(PG_FUNCTION_ARGS) HttpResult r = SendHttpRequestWithRetry(method, url, body, headers, TestShouldRetryRequestToRestCatalog, - MAX_HTTP_RETRY_FOR_REST_CATALOG, - NULL); + MAX_HTTP_RETRY_FOR_REST_CATALOG); PG_RETURN_DATUM(build_http_result(fcinfo, &r)); } @@ -193,8 +192,7 @@ build_http_result(FunctionCallInfo fcinfo, const HttpResult * r) * retries until maxRetry is reached. */ static bool -TestShouldRetryRequestToRestCatalog(long status, int maxRetry, int retryNo, - void *context, List *headers) +TestShouldRetryRequestToRestCatalog(long status, int maxRetry, int retryNo) { if (retryNo > maxRetry) return false; diff --git a/pg_lake_table/src/ddl/create_table.c b/pg_lake_table/src/ddl/create_table.c index 0f449635..ce34e017 100644 --- a/pg_lake_table/src/ddl/create_table.c +++ b/pg_lake_table/src/ddl/create_table.c @@ -789,9 +789,9 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) /* * Writable tables always derive catalog_name, catalog_namespace, * and catalog_table_name from the database name, schema name, and - * table name. Explicit catalog options on the table are rejected, - * and the server must not have catalog_name set either, since that - * would conflict with the derived values. + * table name. Explicit catalog options on the table are + * rejected, and the server must not have catalog_name set either, + * since that would conflict with the derived values. */ if (catalogNamespaceProvided != NULL || catalogTableNameProvided != NULL || diff --git a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c index a4440a53..050b7b3a 100644 --- a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c +++ b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c @@ -132,7 +132,7 @@ static MemoryContext PgLakeXactCommitContext = NULL; * where syscache lookups are forbidden. Only one REST catalog server is allowed * per transaction. */ -static RestCatalogOptions *PgLakeXactRestCatalogOpts = NULL; +static RestCatalogOptions * PgLakeXactRestCatalogOpts = NULL; /* From a75268d88e58a6ff1694d171a11117bd639ee524 Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Thu, 16 Apr 2026 16:38:50 +0200 Subject: [PATCH 16/23] Address Aykut's latest review Consolidate the three per-transaction REST catalog statics (RestCatalogRequestsHash, PgLakeXactCommitContext, PgLakeXactRestCatalogOpts) into a single RestCatalogXactState struct to reduce global state and make lifetime management clearer. Merge BlockDDLOnExtensionCatalogs and RequireRestTypeForIcebergCatalogServer into a single ValidateIcebergCatalogServerDDL hook, eliminating duplicate hook guard logic for CREATE SERVER validation. Grant USAGE on iceberg_catalog FDW to lake_write so non-superusers with write permissions can create catalog servers. Additional cleanups: - Convert unreachable host check in FetchRestCatalogAccessToken to Assert - Rename FDW validator parameter from 'catalog' to 'catalogRelId' - Replace two separate override tests with a single parameterized test_server_option_overrides_guc covering rest_endpoint, client_id, client_secret, location_prefix, and catalog_name - Add tests for no-option server creation and lake_write permissions Signed-off-by: sfc-gh-npuka --- .../pg_lake/rest_catalog/rest_catalog.h | 5 +- pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql | 2 + .../src/rest_catalog/rest_catalog.c | 65 ++--- pg_lake_table/src/init.c | 3 +- .../track_iceberg_metadata_changes.c | 181 +++++++------ .../pytests/test_iceberg_catalog_server.py | 25 ++ .../pytests/test_modify_iceberg_rest_table.py | 245 +++++++++--------- 7 files changed, 255 insertions(+), 271 deletions(-) diff --git a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h index 0eb4ff00..f0df17b2 100644 --- a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h +++ b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h @@ -126,6 +126,5 @@ extern PGDLLEXPORT RestCatalogRequest * GetAddPartitionCatalogRequest(Oid relati extern PGDLLEXPORT RestCatalogRequest * GetSetPartitionDefaultIdCatalogRequest(Oid relationId, int specId); extern PGDLLEXPORT RestCatalogRequest * GetRemoveSnapshotCatalogRequest(List *removedSnapshotIds, Oid relationId); -/* ProcessUtility handlers for iceberg_catalog servers */ -extern PGDLLEXPORT bool BlockDDLOnExtensionCatalogs(ProcessUtilityParams * processUtilityParams, void *arg); -extern PGDLLEXPORT bool RequireRestTypeForIcebergCatalogServer(ProcessUtilityParams * processUtilityParams, void *arg); +/* ProcessUtility handler for iceberg_catalog server DDL validation */ +extern PGDLLEXPORT bool ValidateIcebergCatalogServerDDL(ProcessUtilityParams * processUtilityParams, void *arg); diff --git a/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql b/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql index 70829978..9807cea6 100644 --- a/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql +++ b/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql @@ -38,3 +38,5 @@ LANGUAGE C STRICT; CREATE FOREIGN DATA WRAPPER iceberg_catalog NO HANDLER VALIDATOR lake_iceberg.iceberg_catalog_validator; + +GRANT USAGE ON FOREIGN DATA WRAPPER iceberg_catalog TO lake_write; diff --git a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c index 665ede7d..36f9aa62 100644 --- a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c +++ b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c @@ -134,7 +134,7 @@ Datum iceberg_catalog_validator(PG_FUNCTION_ARGS) { List *options_list = untransformRelOptions(PG_GETARG_DATUM(0)); - Oid catalog = PG_GETARG_OID(1); + Oid catalogRelId = PG_GETARG_OID(1); ListCell *cell; /* @@ -144,7 +144,7 @@ iceberg_catalog_validator(PG_FUNCTION_ARGS) * succeeds, but still reject if someone passes options where they don't * belong. */ - if (catalog != ForeignServerRelationId) + if (catalogRelId != ForeignServerRelationId) { if (list_length(options_list) > 0) ereport(ERROR, @@ -190,20 +190,19 @@ iceberg_catalog_validator(PG_FUNCTION_ARGS) /* - * BlockDDLOnExtensionCatalogs prevents misuse of the reserved catalog - * names 'postgres', 'object_store', and 'rest'. These are built-in - * identifiers (not actual foreign servers), so we block: + * ValidateIcebergCatalogServerDDL validates DDL on iceberg_catalog servers: * - * - CREATE SERVER with a reserved name. - * - CREATE SERVER with TYPE 'postgres' or 'object_store'. - * - RENAME TO a reserved name. + * - CREATE SERVER: rejects reserved names ('postgres', 'object_store', + * 'rest'), rejects TYPE 'postgres'/'object_store', and requires + * TYPE 'rest'. + * - ALTER SERVER RENAME TO: rejects renaming to a reserved name. * - * ALTER/DROP/OWNER on these names will fail naturally because no + * ALTER/DROP/OWNER on reserved names will fail naturally because no * server object exists. */ bool -BlockDDLOnExtensionCatalogs(ProcessUtilityParams * processUtilityParams, - void *arg) +ValidateIcebergCatalogServerDDL(ProcessUtilityParams * processUtilityParams, + void *arg) { Node *parsetree = processUtilityParams->plannedStmt->utilityStmt; @@ -235,6 +234,12 @@ BlockDDLOnExtensionCatalogs(ProcessUtilityParams * processUtilityParams, errhint("Use the built-in \"%s\" or \"%s\" catalogs, " "or create a server of type 'rest'.", POSTGRES_CATALOG_NAME, OBJECT_STORE_CATALOG_NAME))); + + if (stmt->servertype == NULL || + pg_strcasecmp(stmt->servertype, REST_CATALOG_NAME) != 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("iceberg_catalog server requires TYPE 'rest'"))); } else if (IsA(parsetree, RenameStmt)) { @@ -255,38 +260,6 @@ BlockDDLOnExtensionCatalogs(ProcessUtilityParams * processUtilityParams, } -/* - * RequireRestTypeForIcebergCatalogServer ensures that CREATE SERVER - * commands using the iceberg_catalog FDW specify TYPE 'rest'. - */ -bool -RequireRestTypeForIcebergCatalogServer(ProcessUtilityParams * processUtilityParams, - void *arg) -{ - Node *parsetree = processUtilityParams->plannedStmt->utilityStmt; - - if (creating_extension) - return false; - - if (!IsA(parsetree, CreateForeignServerStmt)) - return false; - - CreateForeignServerStmt *stmt = (CreateForeignServerStmt *) parsetree; - - if (stmt->fdwname == NULL || - strcmp(stmt->fdwname, ICEBERG_CATALOG_FDW_NAME) != 0) - return false; - - if (stmt->servertype == NULL || - pg_strcasecmp(stmt->servertype, REST_CATALOG_NAME) != 0) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("iceberg_catalog server requires TYPE 'rest'"))); - - return false; -} - - /* * GetRestCatalogOptionsFromCatalog returns a RestCatalogOptions struct. * For the built-in 'rest' catalog name the GUCs are used directly. @@ -967,11 +940,7 @@ GetRestCatalogAccessToken(RestCatalogOptions * opts, bool forceRefreshToken) static void FetchRestCatalogAccessToken(RestCatalogOptions * opts, char **accessToken, int *expiresIn) { - if (!opts->host || !*opts->host) - ereport(ERROR, - (errmsg("REST catalog host is not configured"), - errhint("Set the \"rest_endpoint\" option on the server " - "or the pg_lake_iceberg.rest_catalog_host GUC."))); + Assert(opts->host != NULL && opts->host[0] != '\0'); if (!opts->clientSecret || !*opts->clientSecret) ereport(ERROR, (errmsg("REST catalog client_secret is not configured"), diff --git a/pg_lake_table/src/init.c b/pg_lake_table/src/init.c index e71d3acf..afea03f5 100644 --- a/pg_lake_table/src/init.c +++ b/pg_lake_table/src/init.c @@ -383,8 +383,7 @@ _PG_init(void) MarkGUCPrefixReserved(PG_LAKE_TABLE); - RegisterUtilityStatementHandler(RequireRestTypeForIcebergCatalogServer, NULL); - RegisterUtilityStatementHandler(BlockDDLOnExtensionCatalogs, NULL); + RegisterUtilityStatementHandler(ValidateIcebergCatalogServerDDL, NULL); RegisterUtilityStatementHandler(ProcessVacuumPgLakeTable, NULL); RegisterUtilityStatementHandler(ProcessCreatePgLakeTable, NULL); RegisterUtilityStatementHandler(ProcessCreateAsSelectPgLakeTable, NULL); diff --git a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c index 050b7b3a..8ee6ac0a 100644 --- a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c +++ b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c @@ -117,22 +117,21 @@ static int GetEffectiveMaxSnapshotAgeInSecs(Oid relationId); static HTAB *TrackedIcebergMetadataOperationsHash = NULL; /* -* Hash table to track rest catalog requests per relation within a transaction. -*/ -static HTAB *RestCatalogRequestsHash = NULL; - - -/* some pre-allocated memory so we don't palloc() ever in XACT_COMMIT */ -static MemoryContext PgLakeXactCommitContext = NULL; - -/* - * Resolved REST catalog options for the current transaction, deep-copied into - * TopTransactionContext in RecordRestCatalogRequestInTx (when syscache is still - * accessible) because PostAllRestCatalogRequests runs at XACT_EVENT_COMMIT, - * where syscache lookups are forbidden. Only one REST catalog server is allowed - * per transaction. + * Per-transaction context for REST catalog requests. Groups the request + * hash, pre-resolved catalog options, and the pre-allocated memory context + * used at XACT_EVENT_COMMIT time (where syscache lookups and large pallocs + * are forbidden). Allocated in TopTransactionContext and automatically + * freed at transaction end. Only one REST catalog server is allowed per + * transaction. */ -static RestCatalogOptions * PgLakeXactRestCatalogOpts = NULL; +typedef struct PgLakeXactRestCatalogContext +{ + HTAB *requestsHash; + MemoryContext commitContext; + RestCatalogOptions *catalogOpts; +} PgLakeXactRestCatalogContext; + +static PgLakeXactRestCatalogContext * PgLakeXactRestCatalog = NULL; /* @@ -224,9 +223,7 @@ ResetTrackedIcebergMetadataOperation(void) void ResetRestCatalogRequests(void) { - RestCatalogRequestsHash = NULL; - PgLakeXactCommitContext = NULL; - PgLakeXactRestCatalogOpts = NULL; + PgLakeXactRestCatalog = NULL; } @@ -238,24 +235,24 @@ ResetRestCatalogRequests(void) void PostAllRestCatalogRequests(void) { - if (RestCatalogRequestsHash == NULL) + if (PgLakeXactRestCatalog == NULL) { return; } /* - * Switch to PgLakeXactCommitContext to avoid palloc() in XACT_COMMIT, as - * PgLakeXactCommitContext is pre-allocated before. + * Switch to commitContext to avoid palloc() in XACT_COMMIT, as + * commitContext is pre-allocated before. */ - MemoryContext oldContext = MemoryContextSwitchTo(PgLakeXactCommitContext); + MemoryContext oldContext = MemoryContextSwitchTo(PgLakeXactRestCatalog->commitContext); - Assert(PgLakeXactRestCatalogOpts != NULL); + Assert(PgLakeXactRestCatalog->catalogOpts != NULL); /* - * We need to iterate over the RestCatalogRequestsHash twice: 1. First, we - * need to post the create table requests to create the iceberg tables in - * the rest catalog. 2. Then, we need to post all the other modifications - * (like adding snapshots, partition specs, etc.) + * We need to iterate over the requests hash twice: 1. First, we need to + * post the create table requests to create the iceberg tables in the rest + * catalog. 2. Then, we need to post all the other modifications (like + * adding snapshots, partition specs, etc.) * * This is because the create table requests need to be completed before * we can add snapshots to the tables. And, REST API does not support @@ -263,7 +260,7 @@ PostAllRestCatalogRequests(void) */ HASH_SEQ_STATUS status; - hash_seq_init(&status, RestCatalogRequestsHash); + hash_seq_init(&status, PgLakeXactRestCatalog->requestsHash); RestCatalogRequestPerTable *requestPerTable = NULL; while ((requestPerTable = hash_seq_search(&status)) != NULL) @@ -297,8 +294,8 @@ PostAllRestCatalogRequests(void) HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, requestPerTable->tableRestUrl, createTableRequest->body, - PostHeadersWithAuth(PgLakeXactRestCatalogOpts), - PgLakeXactRestCatalogOpts); + PostHeadersWithAuth(PgLakeXactRestCatalog->catalogOpts), + PgLakeXactRestCatalog->catalogOpts); if (httpResult.status != 200) { @@ -315,8 +312,8 @@ PostAllRestCatalogRequests(void) HttpResult httpResult = SendRequestToRestCatalog(HTTP_DELETE, requestPerTable->tableRestUrl, NULL, - DeleteHeadersWithAuth(PgLakeXactRestCatalogOpts), - PgLakeXactRestCatalogOpts); + DeleteHeadersWithAuth(PgLakeXactRestCatalog->catalogOpts), + PgLakeXactRestCatalog->catalogOpts); if (httpResult.status != 204) { @@ -348,7 +345,7 @@ PostAllRestCatalogRequests(void) appendJsonKey(batchRequestBody, "table-changes"); appendStringInfo(batchRequestBody, "["); /* start array of changes */ - hash_seq_init(&status, RestCatalogRequestsHash); + hash_seq_init(&status, PgLakeXactRestCatalog->requestsHash); while ((requestPerTable = hash_seq_search(&status)) != NULL) { @@ -435,10 +432,10 @@ PostAllRestCatalogRequests(void) appendStringInfoChar(batchRequestBody, '}'); /* close json body */ char *url = psprintf(REST_CATALOG_TRANSACTION_COMMIT, - PgLakeXactRestCatalogOpts->host, catalogName); + PgLakeXactRestCatalog->catalogOpts->host, catalogName); HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, url, batchRequestBody->data, - PostHeadersWithAuth(PgLakeXactRestCatalogOpts), - PgLakeXactRestCatalogOpts); + PostHeadersWithAuth(PgLakeXactRestCatalog->catalogOpts), + PgLakeXactRestCatalog->catalogOpts); if (httpResult.status != 204) { @@ -447,7 +444,7 @@ PostAllRestCatalogRequests(void) } /* - * Switch back to old context from PgLakeXactCommitContext. + * Switch back to old context from commitContext. */ MemoryContextSwitchTo(oldContext); } @@ -561,55 +558,55 @@ InitTableMetadataTrackerHashIfNeeded(void) } /* - * InitTableMetadataTrackerHashIfNeeded is a helper function to manage the initialization - * of the hash. We allocate the hash and entries in TopTransactionContext. + * InitRestCatalogRequestsHashIfNeeded allocates the per-transaction + * PgLakeXactRestCatalog context on first use. Everything is placed in + * TopTransactionContext so it survives until XACT_EVENT_COMMIT and is + * cleaned up automatically at transaction end. */ static void InitRestCatalogRequestsHashIfNeeded(void) { - if (RestCatalogRequestsHash == NULL) - { - /* - * They always updated together. - */ - Assert(PgLakeXactCommitContext == NULL); + if (PgLakeXactRestCatalog != NULL) + return; - /* - * First allocate 1MB memory context to avoid palloc() in XACT_COMMIT - * as much as possible. Only with very large REST catalog requests we - * might need to palloc() in XACT_COMMIT, which is still better than - * always palloc()ing in XACT_COMMIT, reducing the risk of OOM - * significantly. These very large requests might happen when there - * are many tables modified in a single transaction, likely > 100 - * tables. We allocate in TopTransactionContext to preserve the - * context until the end of the transaction, and let it be cleaned up - * automatically at transaction end. - */ - PgLakeXactCommitContext = - AllocSetContextCreateInternal(TopTransactionContext, - "PgLakeXactCommitContext", - ONE_MB, ONE_MB, ONE_MB); - Assert(MemoryContextMemAllocated(PgLakeXactCommitContext, true) == ONE_MB); + MemoryContext oldctx = MemoryContextSwitchTo(TopTransactionContext); - HASHCTL ctl; + PgLakeXactRestCatalog = palloc0(sizeof(PgLakeXactRestCatalogContext)); - MemSet(&ctl, 0, sizeof(ctl)); - ctl.keysize = sizeof(Oid); - ctl.entrysize = sizeof(RestCatalogRequestPerTable); - ctl.hash = oid_hash; + /* + * Pre-allocate 1MB memory context to avoid palloc() in XACT_COMMIT as + * much as possible. Only with very large REST catalog requests we might + * need to palloc() in XACT_COMMIT, which is still better than always + * palloc()ing in XACT_COMMIT, reducing the risk of OOM significantly. + * These very large requests might happen when there are many tables + * modified in a single transaction, likely > 100 tables. + */ + PgLakeXactRestCatalog->commitContext = + AllocSetContextCreateInternal(TopTransactionContext, + "PgLakeXactCommitContext", + ONE_MB, ONE_MB, ONE_MB); + Assert(MemoryContextMemAllocated(PgLakeXactRestCatalog->commitContext, true) == ONE_MB); - /* - * We prefer to allocate everything in TopTransactionContext, not in - * PgLakeXactCommitContext, because we preserve - * PgLakeXactCommitContext mostly for REST API request bodies to avoid - * palloc() in XACT_COMMIT. - */ - ctl.hcxt = TopTransactionContext; + HASHCTL ctl; - RestCatalogRequestsHash = hash_create("Rest Catalog Requests", - 32, &ctl, - HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); - } + MemSet(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(RestCatalogRequestPerTable); + ctl.hash = oid_hash; + + /* + * We prefer to allocate the hash in TopTransactionContext, not in + * commitContext, because we reserve commitContext mostly for REST API + * request bodies to avoid palloc() in XACT_COMMIT. + */ + ctl.hcxt = TopTransactionContext; + + PgLakeXactRestCatalog->requestsHash = + hash_create("Rest Catalog Requests", + 32, &ctl, + HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); + + MemoryContextSwitchTo(oldctx); } @@ -624,7 +621,7 @@ RecordRestCatalogRequestInTx(Oid relationId, RestCatalogOperationType operationT bool isFound = false; RestCatalogRequestPerTable *requestPerTable = - hash_search(RestCatalogRequestsHash, + hash_search(PgLakeXactRestCatalog->requestsHash, &relationId, HASH_ENTER, &isFound); if (!isFound || !requestPerTable->isValid) @@ -635,7 +632,7 @@ RecordRestCatalogRequestInTx(Oid relationId, RestCatalogOperationType operationT /* Resolve the options for this relation's REST catalog */ RestCatalogOptions *resolvedOpts = GetRestCatalogOptionsForRelation(relationId); - if (PgLakeXactRestCatalogOpts == NULL) + if (PgLakeXactRestCatalog->catalogOpts == NULL) { /* * Deep-copy opts into TopTransactionContext so the struct and its @@ -643,28 +640,28 @@ RecordRestCatalogRequestInTx(Oid relationId, RestCatalogOperationType operationT */ MemoryContext oldctx = MemoryContextSwitchTo(TopTransactionContext); - PgLakeXactRestCatalogOpts = palloc0(sizeof(RestCatalogOptions)); - PgLakeXactRestCatalogOpts->catalog = pstrdup(resolvedOpts->catalog); - PgLakeXactRestCatalogOpts->host = pstrdup(resolvedOpts->host); - PgLakeXactRestCatalogOpts->oauthHostPath = resolvedOpts->oauthHostPath ? pstrdup(resolvedOpts->oauthHostPath) : NULL; - PgLakeXactRestCatalogOpts->clientId = resolvedOpts->clientId ? pstrdup(resolvedOpts->clientId) : NULL; - PgLakeXactRestCatalogOpts->clientSecret = resolvedOpts->clientSecret ? pstrdup(resolvedOpts->clientSecret) : NULL; - PgLakeXactRestCatalogOpts->scope = resolvedOpts->scope ? pstrdup(resolvedOpts->scope) : NULL; - PgLakeXactRestCatalogOpts->locationPrefix = resolvedOpts->locationPrefix ? pstrdup(resolvedOpts->locationPrefix) : NULL; - PgLakeXactRestCatalogOpts->catalogName = resolvedOpts->catalogName ? pstrdup(resolvedOpts->catalogName) : NULL; - PgLakeXactRestCatalogOpts->authType = resolvedOpts->authType; - PgLakeXactRestCatalogOpts->enableVendedCredentials = resolvedOpts->enableVendedCredentials; + PgLakeXactRestCatalog->catalogOpts = palloc0(sizeof(RestCatalogOptions)); + PgLakeXactRestCatalog->catalogOpts->catalog = pstrdup(resolvedOpts->catalog); + PgLakeXactRestCatalog->catalogOpts->host = pstrdup(resolvedOpts->host); + PgLakeXactRestCatalog->catalogOpts->oauthHostPath = resolvedOpts->oauthHostPath ? pstrdup(resolvedOpts->oauthHostPath) : NULL; + PgLakeXactRestCatalog->catalogOpts->clientId = resolvedOpts->clientId ? pstrdup(resolvedOpts->clientId) : NULL; + PgLakeXactRestCatalog->catalogOpts->clientSecret = resolvedOpts->clientSecret ? pstrdup(resolvedOpts->clientSecret) : NULL; + PgLakeXactRestCatalog->catalogOpts->scope = resolvedOpts->scope ? pstrdup(resolvedOpts->scope) : NULL; + PgLakeXactRestCatalog->catalogOpts->locationPrefix = resolvedOpts->locationPrefix ? pstrdup(resolvedOpts->locationPrefix) : NULL; + PgLakeXactRestCatalog->catalogOpts->catalogName = resolvedOpts->catalogName ? pstrdup(resolvedOpts->catalogName) : NULL; + PgLakeXactRestCatalog->catalogOpts->authType = resolvedOpts->authType; + PgLakeXactRestCatalog->catalogOpts->enableVendedCredentials = resolvedOpts->enableVendedCredentials; MemoryContextSwitchTo(oldctx); } - else if (strcmp(PgLakeXactRestCatalogOpts->catalog, resolvedOpts->catalog) != 0) + else if (strcmp(PgLakeXactRestCatalog->catalogOpts->catalog, resolvedOpts->catalog) != 0) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot modify tables from different REST catalogs " "in the same transaction"), errdetail("This transaction already targets catalog server " "\"%s\", but table %u belongs to \"%s\".", - PgLakeXactRestCatalogOpts->catalog, relationId, + PgLakeXactRestCatalog->catalogOpts->catalog, relationId, resolvedOpts->catalog))); requestPerTable->catalogName = diff --git a/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py index 7fcc086d..6ce4688e 100644 --- a/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py +++ b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py @@ -52,6 +52,31 @@ def test_create_rest_server_with_all_options(superuser_conn, extension): superuser_conn.rollback() +def test_create_rest_server_no_options(superuser_conn, extension): + """A server with no options is valid; all settings fall back to GUCs.""" + run_command( + """ + CREATE SERVER test_rest_no_opts TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + """, + superuser_conn, + ) + superuser_conn.rollback() + + +def test_lake_write_user_can_create_server(pg_conn, extension): + """A non-superuser with lake_write should be able to create a server.""" + run_command( + """ + CREATE SERVER test_lake_write_srv TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + pg_conn, + ) + pg_conn.rollback() + + def test_create_rest_server_minimal(superuser_conn, extension): """A server with just rest_endpoint should be accepted.""" run_command( diff --git a/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py b/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py index c22c8bf1..5f5fa4bd 100644 --- a/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py +++ b/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py @@ -567,7 +567,44 @@ def get_rest_table_metadata_location(encoded_namespace, encoded_table_name, pg_c return metadata["metadata"]["location"] -def test_server_location_prefix_overrides_guc( +server_option_override_params = [ + pytest.param( + "rest_endpoint", + "pg_lake_iceberg.rest_catalog_host", + "http://localhost:1", + id="rest_endpoint", + ), + pytest.param( + "client_id", + "pg_lake_iceberg.rest_catalog_client_id", + "wrong_id", + id="client_id", + ), + pytest.param( + "client_secret", + "pg_lake_iceberg.rest_catalog_client_secret", + "wrong_secret", + id="client_secret", + ), + pytest.param( + "location_prefix", + "pg_lake_iceberg.default_location_prefix", + "s3://nonexistent-broken-bucket-xyz", + id="location_prefix", + ), + pytest.param( + "catalog_name", + None, + None, + id="catalog_name", + ), +] + + +@pytest.mark.parametrize( + "option_name, guc_name, broken_guc_value", server_option_override_params +) +def test_server_option_overrides_guc( installcheck, superuser_conn, pg_conn, @@ -575,40 +612,54 @@ def test_server_location_prefix_overrides_guc( extension, polaris_session, create_http_helper_functions, + option_name, + guc_name, + broken_guc_value, ): """ - When a REST catalog server has a location_prefix option, tables must use - that prefix for their storage location. We verify this by setting the - GUC to a broken S3 bucket. + Verify that each overridable server option takes precedence over + its corresponding GUC. For most options the GUC is set to a broken + value while the server option is set to the correct value, then we + prove the table works. For catalog_name the server option is set to + a wrong value and we prove it is used (instead of the default). """ if installcheck: return - BROKEN_PREFIX = "s3://nonexistent-broken-bucket-xyz" - VALID_PREFIX = f"s3://{TEST_BUCKET}/" - SERVER_NAME = "rest_catalog_loc_prefix" - SCHEMA_NAME = TABLE_NAMESPACE - TABLE_NAME = "loc_prefix_test" - creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) client_id = creds["credentials"]["clientId"] client_secret = creds["credentials"]["clientSecret"] endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + VALID_PREFIX = f"s3://{TEST_BUCKET}/" - run_command( - f"SET pg_lake_iceberg.default_location_prefix TO '{BROKEN_PREFIX}'", - pg_conn, - ) - pg_conn.commit() + SERVER_NAME = f"rest_opt_override_{option_name}" + SCHEMA_NAME = TABLE_NAMESPACE + TABLE_NAME = f"opt_override_{option_name}" + + server_options = { + "rest_endpoint": endpoint, + "client_id": client_id, + "client_secret": client_secret, + "location_prefix": VALID_PREFIX, + } + + if option_name == "catalog_name": + server_options["catalog_name"] = "nonexistent_catalog" + + options_sql = ", ".join(f"{k} '{v}'" for k, v in server_options.items()) + + if guc_name is not None: + run_command( + f"SET {guc_name} TO '{broken_guc_value}'", + superuser_conn, + ) + superuser_conn.commit() run_command( f""" CREATE SERVER {SERVER_NAME} TYPE 'rest' FOREIGN DATA WRAPPER iceberg_catalog - OPTIONS (rest_endpoint '{endpoint}', - client_id '{client_id}', - client_secret '{client_secret}', - location_prefix '{VALID_PREFIX}') + OPTIONS ({options_sql}) """, superuser_conn, ) @@ -617,62 +668,61 @@ def test_server_location_prefix_overrides_guc( run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", pg_conn) pg_conn.commit() - run_command( - f"CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME} (id bigint, value text) " - f"USING iceberg WITH (catalog='{SERVER_NAME}')", - pg_conn, - ) - pg_conn.commit() - - run_command( - f"INSERT INTO {SCHEMA_NAME}.{TABLE_NAME} " - f"SELECT i, i::text FROM generate_series(1, 10) i", - pg_conn, - ) - pg_conn.commit() - - results = run_query(f"SELECT count(*) FROM {SCHEMA_NAME}.{TABLE_NAME}", pg_conn) - assert results[0][0] == 10 - - table_location = get_rest_table_metadata_location(SCHEMA_NAME, TABLE_NAME, pg_conn) - stripped_prefix = VALID_PREFIX.rstrip("/") - assert table_location.startswith(stripped_prefix), ( - f"Expected location to start with server prefix '{stripped_prefix}', " - f"got '{table_location}'" - ) - assert BROKEN_PREFIX not in table_location - assert ( - "//" not in table_location.split("://", 1)[1] - ), f"Double slash found in location path: '{table_location}'" - - run_command_outside_tx([f"VACUUM {SCHEMA_NAME}.{TABLE_NAME}"]) - - run_command( - f"ALTER TABLE {SCHEMA_NAME}.{TABLE_NAME} ADD COLUMN extra int", - pg_conn, - ) - pg_conn.commit() - - run_command( - f"INSERT INTO {SCHEMA_NAME}.{TABLE_NAME} " - f"SELECT i, i::text, i FROM generate_series(11, 20) i", - pg_conn, - ) - pg_conn.commit() + if option_name == "catalog_name": + err = run_command( + f"CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME} () " + f"USING iceberg WITH (catalog='{SERVER_NAME}', read_only='true')", + pg_conn, + raise_error=False, + ) + assert err is not None, ( + "Expected failure because server's catalog_name 'nonexistent_catalog' " + "should be used instead of the default database name" + ) + pg_conn.rollback() + else: + run_command( + f"CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME} (id bigint, value text) " + f"USING iceberg WITH (catalog='{SERVER_NAME}')", + pg_conn, + ) + pg_conn.commit() - results = run_query(f"SELECT count(*) FROM {SCHEMA_NAME}.{TABLE_NAME}", pg_conn) - assert results[0][0] == 20 + run_command( + f"INSERT INTO {SCHEMA_NAME}.{TABLE_NAME} " + f"SELECT i, i::text FROM generate_series(1, 10) i", + pg_conn, + ) + pg_conn.commit() - pg_conn.rollback() - run_command(f"DROP SCHEMA {SCHEMA_NAME} CASCADE", pg_conn) - pg_conn.commit() + results = run_query(f"SELECT count(*) FROM {SCHEMA_NAME}.{TABLE_NAME}", pg_conn) + assert results[0][0] == 10 + + if option_name == "location_prefix": + table_location = get_rest_table_metadata_location( + SCHEMA_NAME, TABLE_NAME, pg_conn + ) + stripped_prefix = VALID_PREFIX.rstrip("/") + assert table_location.startswith(stripped_prefix), ( + f"Expected location to start with server prefix " + f"'{stripped_prefix}', got '{table_location}'" + ) + assert broken_guc_value not in table_location + assert ( + "//" not in table_location.split("://", 1)[1] + ), f"Double slash found in location path: '{table_location}'" + + pg_conn.rollback() + run_command(f"DROP SCHEMA {SCHEMA_NAME} CASCADE", pg_conn) + pg_conn.commit() superuser_conn.rollback() - run_command(f"DROP SERVER {SERVER_NAME}", superuser_conn) + run_command(f"DROP SERVER {SERVER_NAME} CASCADE", superuser_conn) superuser_conn.commit() - run_command("RESET pg_lake_iceberg.default_location_prefix", pg_conn) - pg_conn.commit() + if guc_name is not None: + run_command(f"RESET {guc_name}", superuser_conn) + superuser_conn.commit() def test_reject_modify_different_rest_catalogs_in_single_transaction( @@ -798,63 +848,6 @@ def test_reject_writable_table_on_server_with_catalog_name( superuser_conn.commit() -def test_server_catalog_name_overrides_default( - installcheck, - superuser_conn, - pg_conn, - s3, - extension, - polaris_session, - create_http_helper_functions, -): - """ - The server's catalog_name must override the default (database name). - We prove this by creating a server with a wrong catalog_name and - creating a read-only table that does not set catalog_name itself. - The REST metadata lookup should fail because it uses the server's - value, not the default database name. - """ - if installcheck: - return - - SERVER_NAME = "rest_catalog_wrong_name" - SCHEMA_NAME = TABLE_NAMESPACE - - creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) - client_id = creds["credentials"]["clientId"] - client_secret = creds["credentials"]["clientSecret"] - endpoint = f"http://localhost:{server_params.POLARIS_PORT}" - - run_command( - f""" - CREATE SERVER {SERVER_NAME} TYPE 'rest' - FOREIGN DATA WRAPPER iceberg_catalog - OPTIONS (rest_endpoint '{endpoint}', - client_id '{client_id}', - client_secret '{client_secret}', - catalog_name 'nonexistent_catalog') - """, - superuser_conn, - ) - superuser_conn.commit() - - err = run_command( - f"CREATE TABLE {SCHEMA_NAME}.srv_catname_fail () " - f"USING iceberg WITH (catalog='{SERVER_NAME}', read_only='true')", - pg_conn, - raise_error=False, - ) - assert err is not None, ( - "Expected failure because server's catalog_name 'nonexistent_catalog' " - "should be used instead of the default database name" - ) - pg_conn.rollback() - - superuser_conn.rollback() - run_command(f"DROP SERVER {SERVER_NAME}", superuser_conn) - superuser_conn.commit() - - def test_table_catalog_name_overrides_server( installcheck, superuser_conn, From a8236b37990e1d839aff17db4af7166e9b20b454 Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Mon, 20 Apr 2026 11:55:20 +0200 Subject: [PATCH 17/23] Move changes to pg_lake_iceberg--3.3--3.4.sql Signed-off-by: sfc-gh-npuka --- pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql | 26 ------------------- pg_lake_iceberg/pg_lake_iceberg--3.3--3.4.sql | 26 +++++++++++++++++++ 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql b/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql index 9807cea6..286cf23a 100644 --- a/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql +++ b/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql @@ -14,29 +14,3 @@ CREATE OR REPLACE VIEW pg_catalog.iceberg_tables AS SELECT catalog_name, table_namespace, table_name, metadata_location, previous_metadata_location FROM lake_iceberg.tables WHERE metadata_location IS NOT NULL; - -/* - * iceberg_catalog foreign data wrapper: allows defining named catalog - * configurations via CREATE SERVER so that users are not limited to a - * single global REST catalog configured through GUC settings. - * - * Example: - * CREATE SERVER my_polaris TYPE 'rest' - * FOREIGN DATA WRAPPER iceberg_catalog - * OPTIONS (rest_endpoint 'http://polaris:8181', - * rest_auth_type 'default', - * client_id '...', - * client_secret '...'); - * - * CREATE TABLE t (a int) USING iceberg WITH (catalog = 'my_polaris'); - */ -CREATE FUNCTION lake_iceberg.iceberg_catalog_validator(text[], oid) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT; - -CREATE FOREIGN DATA WRAPPER iceberg_catalog - NO HANDLER - VALIDATOR lake_iceberg.iceberg_catalog_validator; - -GRANT USAGE ON FOREIGN DATA WRAPPER iceberg_catalog TO lake_write; diff --git a/pg_lake_iceberg/pg_lake_iceberg--3.3--3.4.sql b/pg_lake_iceberg/pg_lake_iceberg--3.3--3.4.sql index 27fca375..8e563ce0 100644 --- a/pg_lake_iceberg/pg_lake_iceberg--3.3--3.4.sql +++ b/pg_lake_iceberg/pg_lake_iceberg--3.3--3.4.sql @@ -1 +1,27 @@ -- Upgrade script for pg_lake_iceberg from 3.3 to 3.4 + +/* + * iceberg_catalog foreign data wrapper: allows defining named catalog + * configurations via CREATE SERVER so that users are not limited to a + * single global REST catalog configured through GUC settings. + * + * Example: + * CREATE SERVER my_polaris TYPE 'rest' + * FOREIGN DATA WRAPPER iceberg_catalog + * OPTIONS (rest_endpoint 'http://polaris:8181', + * rest_auth_type 'default', + * client_id '...', + * client_secret '...'); + * + * CREATE TABLE t (a int) USING iceberg WITH (catalog = 'my_polaris'); + */ +CREATE FUNCTION lake_iceberg.iceberg_catalog_validator(text[], oid) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE FOREIGN DATA WRAPPER iceberg_catalog + NO HANDLER + VALIDATOR lake_iceberg.iceberg_catalog_validator; + +GRANT USAGE ON FOREIGN DATA WRAPPER iceberg_catalog TO lake_write; From ad06a854ad0397727240edde16ed8abfffebdda4 Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Tue, 21 Apr 2026 17:58:26 +0200 Subject: [PATCH 18/23] Address Onder's amazing comprehensive review Harden catalog server security, configuration, and cross-catalog safety Refactor GetRestCatalogOptionsFromCatalog into a clean dispatcher (ResolveRestCatalogOptions) with two explicit builders: BuildRestCatalogOptionsFromGUCs for the built-in 'rest' path and BuildRestCatalogOptionsFromServer for user-created servers, plus shared helpers ApplyGUCDefaults, ApplyServerOptionOverrides, and ValidateRestCatalogOptions. Add CopyRestCatalogOptions for safe deep-copy into a target memory context. Unify the three separate server option lists (whitelist array, errhint string, applier chain) into a single IcebergCatalogOptionDesc descriptor table with type, offsetof, and validation flags. Adding or removing an option is now a one-line change. Add DDL-time value validation: reject empty strings for client_id, client_secret, scope, catalog_name; require a URI scheme for rest_endpoint, oauth_endpoint, location_prefix. Require ACL_USAGE on user-created iceberg_catalog servers at CREATE TABLE time, matching core's CREATE FOREIGN TABLE semantics. Record a DEPENDENCY_NORMAL from iceberg tables to their catalog server in pg_depend so DROP SERVER is blocked (and CASCADE drops them). Block ALTER SERVER RENAME for iceberg_catalog servers since dependent tables store the server name as a string in ftoptions. Block ALTER SERVER SET rest_endpoint when dependent writable tables exist to prevent silently redirecting them to a different REST catalog. Make GetRestCatalogName always return get_database_name(MyDatabaseId) for writable tables so ALTER SERVER ADD catalog_name cannot re-route an existing table to a different namespace. Fix token cache hash key regression: zero the key buffer with MemSet before strlcpy in BuildTokenCacheKey. Add syscache invalidation callback on FOREIGNSERVEROID to reset the token cache on ALTER/DROP SERVER, using CacheMemoryContext as parent. Add NULL guard on opts in GetRestCatalogAccessToken. Fix default_catalog GUC check hook to accept values outside a transaction (ALTER SYSTEM + pg_reload_conf path), mirroring how PostgreSQL handles check_default_tablespace. Introduce ValidateXactRestCatalog as a fail-fast guard that checks cross-catalog DML at statement time rather than at XACT_EVENT_PRE_COMMIT. Planted in postgresBeginForeignModify and AddQueryResultToTable. The existing pre-commit check is retained as a belt-and-suspenders fallback. Parametrize test_writable_rest_iceberg_table over built-in 'rest' and user-created server paths. Add tests for USAGE enforcement, dependency tracking, server rename blocking, rest_endpoint blocking, catalog_name re-routing, token cache invalidation, ALTER SYSTEM deferred validation, option value validation, multi-table same-server transactions, and cross-catalog rejection cleanup. Signed-off-by: sfc-gh-npuka --- .../pg_lake/rest_catalog/rest_catalog.h | 3 +- pg_lake_iceberg/src/init.c | 12 +- .../src/rest_catalog/rest_catalog.c | 573 ++++++++++++++---- pg_lake_iceberg/src/test/rest_catalog.c | 2 +- .../track_iceberg_metadata_changes.h | 1 + pg_lake_table/src/ddl/create_table.c | 78 ++- pg_lake_table/src/fdw/pg_lake_table.c | 3 + pg_lake_table/src/fdw/writable_table.c | 2 + .../track_iceberg_metadata_changes.c | 72 ++- .../pytests/test_iceberg_catalog_server.py | 149 ++++- .../pytests/test_modify_iceberg_rest_table.py | 573 +++++++++++++++++- .../pytests/test_writable_iceberg_common.py | 88 +++ 12 files changed, 1398 insertions(+), 158 deletions(-) diff --git a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h index f0df17b2..4e254789 100644 --- a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h +++ b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h @@ -101,8 +101,9 @@ typedef struct RestCatalogRequest #define GET_REST_CATALOG_METADATA_LOCATION "%s/api/catalog/v1/%s/namespaces/%s/tables/%s" /* Catalog options resolution */ -extern PGDLLEXPORT RestCatalogOptions * GetRestCatalogOptionsFromCatalog(const char *catalog); +extern PGDLLEXPORT RestCatalogOptions * ResolveRestCatalogOptions(const char *catalog); extern PGDLLEXPORT RestCatalogOptions * GetRestCatalogOptionsForRelation(Oid relationId); +extern PGDLLEXPORT RestCatalogOptions * CopyRestCatalogOptions(MemoryContext dst, const RestCatalogOptions * src); extern PGDLLEXPORT void RegisterNamespaceToRestCatalog(RestCatalogOptions * opts, const char *catalogName, const char *namespaceName); extern PGDLLEXPORT void StartStageRestCatalogIcebergTableCreate(Oid relationId); diff --git a/pg_lake_iceberg/src/init.c b/pg_lake_iceberg/src/init.c index 011e6735..2d9f9644 100644 --- a/pg_lake_iceberg/src/init.c +++ b/pg_lake_iceberg/src/init.c @@ -375,10 +375,16 @@ IcebergDefaultCatalogCheckHook(char **newvalue, void **extra, GucSource source) return true; /* - * When catalog access is available, also accept user-created - * iceberg_catalog foreign servers with TYPE 'rest'. + * Outside a transaction we cannot do catalog lookups to verify that the + * name refers to a valid iceberg_catalog server. Accept the value on + * faith; an invalid name will error at first use. This mirrors how + * PostgreSQL handles check_default_tablespace (see + * src/backend/commands/tablespace.c). */ - if (IsTransactionState() && IsRestCatalog(newCatalog)) + if (!IsTransactionState()) + return true; + + if (IsRestCatalog(newCatalog)) return true; GUC_check_errdetail("pg_lake_iceberg: allowed iceberg catalog options are '" POSTGRES_CATALOG_NAME "', " diff --git a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c index 36f9aa62..c2f740ac 100644 --- a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c +++ b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c @@ -20,7 +20,11 @@ #include "postgres.h" #include "miscadmin.h" +#include "access/genam.h" #include "access/reloptions.h" +#include "access/table.h" +#include "catalog/pg_class.h" +#include "catalog/pg_depend.h" #include "catalog/pg_foreign_server.h" #include "common/base64.h" #include "commands/dbcommands.h" @@ -30,9 +34,12 @@ #include "fmgr.h" #include "lib/stringinfo.h" #include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/inval.h" #include "utils/jsonb.h" #include "utils/lsyscache.h" #include "utils/memutils.h" +#include "utils/syscache.h" #include "utils/timestamp.h" #include "pg_extension_base/base_workers.h" @@ -97,32 +104,183 @@ typedef enum RestCatalogRequestRetryAction PG_FUNCTION_INFO_V1(iceberg_catalog_validator); + /* - * Valid options for iceberg_catalog servers. + * Descriptor for a single iceberg_catalog server option. This is the + * single source of truth: validation, the user-facing hint, and the + * option-to-struct applier all derive from this table. */ -static const char *iceberg_catalog_server_options[] = { - "rest_endpoint", - "scope", - "rest_auth_type", - "oauth_endpoint", - "enable_vended_credentials", - "location_prefix", - "catalog_name", - "client_id", - "client_secret", - NULL +typedef enum IcebergCatalogOptionType +{ + CATALOG_OPT_STRING, + CATALOG_OPT_BOOL, + CATALOG_OPT_AUTH_TYPE, + CATALOG_OPT_LOCATION_PREFIX +} IcebergCatalogOptionType; + +/* Validation flags checked at CREATE/ALTER SERVER time. */ +#define CATALOG_OPT_NONEMPTY 0x01 /* reject empty string */ +#define CATALOG_OPT_HAS_SCHEME 0x02 /* must contain "://" */ + +typedef struct IcebergCatalogOptionDesc +{ + const char *name; + IcebergCatalogOptionType type; + size_t offset; /* offsetof into RestCatalogOptions */ + int flags; /* CATALOG_OPT_NONEMPTY | + * CATALOG_OPT_HAS_SCHEME */ +} IcebergCatalogOptionDesc; + +static const IcebergCatalogOptionDesc iceberg_catalog_option_descs[] = { + {"rest_endpoint", CATALOG_OPT_STRING, offsetof(RestCatalogOptions, host), + CATALOG_OPT_NONEMPTY | CATALOG_OPT_HAS_SCHEME}, + {"rest_auth_type", CATALOG_OPT_AUTH_TYPE, offsetof(RestCatalogOptions, authType), 0}, + {"oauth_endpoint", CATALOG_OPT_STRING, offsetof(RestCatalogOptions, oauthHostPath), + CATALOG_OPT_NONEMPTY | CATALOG_OPT_HAS_SCHEME}, + {"scope", CATALOG_OPT_STRING, offsetof(RestCatalogOptions, scope), + CATALOG_OPT_NONEMPTY}, + {"enable_vended_credentials", CATALOG_OPT_BOOL, offsetof(RestCatalogOptions, enableVendedCredentials), 0}, + {"location_prefix", CATALOG_OPT_LOCATION_PREFIX, offsetof(RestCatalogOptions, locationPrefix), + CATALOG_OPT_NONEMPTY | CATALOG_OPT_HAS_SCHEME}, + {"catalog_name", CATALOG_OPT_STRING, offsetof(RestCatalogOptions, catalogName), + CATALOG_OPT_NONEMPTY}, + {"client_id", CATALOG_OPT_STRING, offsetof(RestCatalogOptions, clientId), + CATALOG_OPT_NONEMPTY}, + {"client_secret", CATALOG_OPT_STRING, offsetof(RestCatalogOptions, clientSecret), + CATALOG_OPT_NONEMPTY}, }; +#define NUM_CATALOG_OPTIONS lengthof(iceberg_catalog_option_descs) -static bool -is_valid_iceberg_catalog_option(const char *keyword) + +/* + * Look up a descriptor by option name, or return NULL if not found. + */ +static const IcebergCatalogOptionDesc * +FindCatalogOptionDesc(const char *name) { - for (int i = 0; iceberg_catalog_server_options[i] != NULL; i++) + for (int i = 0; i < NUM_CATALOG_OPTIONS; i++) { - if (pg_strcasecmp(keyword, iceberg_catalog_server_options[i]) == 0) - return true; + if (pg_strcasecmp(name, iceberg_catalog_option_descs[i].name) == 0) + return &iceberg_catalog_option_descs[i]; + } + return NULL; +} + + +/* + * Build the "Valid options are: ?" hint string. Cached after first call. + */ +static const char * +GetValidCatalogOptionsHint(void) +{ + static char *hint = NULL; + + if (hint == NULL) + { + StringInfoData buf; + + initStringInfo(&buf); + appendStringInfoString(&buf, "Valid options are: "); + for (int i = 0; i < NUM_CATALOG_OPTIONS; i++) + { + if (i > 0) + appendStringInfoString(&buf, ", "); + appendStringInfoString(&buf, iceberg_catalog_option_descs[i].name); + } + appendStringInfoChar(&buf, '.'); + hint = buf.data; + } + + return hint; +} + + +/* + * Validate a single option value. Called from iceberg_catalog_validator + * after the name has already been accepted. Type-specific checks run + * first, then flag-based checks (non-empty, scheme present). + */ +static void +ValidateCatalogOptionValue(const IcebergCatalogOptionDesc * desc, DefElem *def) +{ + switch (desc->type) + { + case CATALOG_OPT_AUTH_TYPE: + { + char *authType = defGetString(def); + + if (pg_strcasecmp(authType, "oauth2") != 0 && + pg_strcasecmp(authType, "default") != 0 && + pg_strcasecmp(authType, "horizon") != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid rest_auth_type option: \"%s\"", authType), + errhint("Valid values are \"oauth2\" and \"horizon\"."))); + return; + } + case CATALOG_OPT_BOOL: + (void) defGetBoolean(def); + return; + default: + break; + } + + if (desc->flags == 0) + return; + + char *value = defGetString(def); + + if ((desc->flags & CATALOG_OPT_NONEMPTY) && value[0] == '\0') + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid value for \"%s\": must not be empty", + desc->name))); + + if ((desc->flags & CATALOG_OPT_HAS_SCHEME) && strstr(value, "://") == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid value for \"%s\": \"%s\"", + desc->name, value), + errhint("Include a URI scheme (e.g. \"https://...\")."))); +} + + +/* + * Apply a single server option onto the RestCatalogOptions struct. + * Called from ApplyServerOptionOverrides for each DefElem on the server. + */ +static void +ApplyCatalogOptionValue(RestCatalogOptions * opts, + const IcebergCatalogOptionDesc * desc, DefElem *def) +{ + switch (desc->type) + { + case CATALOG_OPT_STRING: + *(char **) ((char *) opts + desc->offset) = pstrdup(defGetString(def)); + break; + case CATALOG_OPT_BOOL: + *(bool *) ((char *) opts + desc->offset) = defGetBoolean(def); + break; + case CATALOG_OPT_AUTH_TYPE: + { + char *authType = defGetString(def); + + *(int *) ((char *) opts + desc->offset) = + (pg_strcasecmp(authType, "horizon") == 0) + ? REST_CATALOG_AUTH_TYPE_HORIZON + : REST_CATALOG_AUTH_TYPE_OAUTH2; + break; + } + case CATALOG_OPT_LOCATION_PREFIX: + { + bool inPlace = false; + + *(char **) ((char *) opts + desc->offset) = + pstrdup(StripTrailingSlash(defGetString(def), inPlace)); + break; + } } - return false; } @@ -156,36 +314,69 @@ iceberg_catalog_validator(PG_FUNCTION_ARGS) foreach(cell, options_list) { DefElem *def = (DefElem *) lfirst(cell); + const IcebergCatalogOptionDesc *desc = FindCatalogOptionDesc(def->defname); - if (!is_valid_iceberg_catalog_option(def->defname)) - { + if (desc == NULL) ereport(ERROR, (errcode(ERRCODE_FDW_INVALID_OPTION_NAME), - errmsg("invalid option \"%s\" for iceberg_catalog server", def->defname), - errhint("Valid options are: rest_endpoint, rest_auth_type, " - "oauth_endpoint, scope, enable_vended_credentials, " - "location_prefix, catalog_name, client_id, client_secret."))); - } + errmsg("invalid option \"%s\" for iceberg_catalog server", + def->defname), + errhint("%s", GetValidCatalogOptionsHint()))); - if (pg_strcasecmp(def->defname, "rest_auth_type") == 0) - { - char *authType = defGetString(def); + ValidateCatalogOptionValue(desc, def); + } - if (pg_strcasecmp(authType, "oauth2") != 0 && - pg_strcasecmp(authType, "default") != 0 && - pg_strcasecmp(authType, "horizon") != 0) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("invalid rest_auth_type option: \"%s\"", authType), - errhint("Valid values are \"oauth2\" and \"horizon\"."))); - } - else if (pg_strcasecmp(def->defname, "enable_vended_credentials") == 0) + PG_RETURN_VOID(); +} + + +/* + * ServerHasDependentWritableTable returns true if the given server + * has at least one dependent writable iceberg table recorded in + * pg_depend. Used to block ALTER SERVER changes that would silently + * break existing tables. + */ +static bool +ServerHasDependentWritableTable(Oid serverOid) +{ + Relation depRel; + ScanKeyData key[2]; + SysScanDesc scan; + HeapTuple tup; + bool found = false; + + depRel = table_open(DependRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_depend_refclassid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(ForeignServerRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_refobjid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(serverOid)); + + scan = systable_beginscan(depRel, DependReferenceIndexId, true, + NULL, 2, key); + + while (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_depend depForm = (Form_pg_depend) GETSTRUCT(tup); + + if (depForm->classid != RelationRelationId) + continue; + + if (GetIcebergCatalogType(depForm->objid) == REST_CATALOG_READ_WRITE) { - (void) defGetBoolean(def); + found = true; + break; } } - PG_RETURN_VOID(); + systable_endscan(scan); + table_close(depRel, AccessShareLock); + + return found; } @@ -196,6 +387,9 @@ iceberg_catalog_validator(PG_FUNCTION_ARGS) * 'rest'), rejects TYPE 'postgres'/'object_store', and requires * TYPE 'rest'. * - ALTER SERVER RENAME TO: rejects renaming to a reserved name. + * - ALTER SERVER OPTIONS: blocks SET/ADD rest_endpoint when dependent + * writable tables exist (the table was registered at the original + * endpoint and moving it would break the metadata chain). * * ALTER/DROP/OWNER on reserved names will fail naturally because no * server object exists. @@ -254,91 +448,117 @@ ValidateIcebergCatalogServerDDL(ProcessUtilityParams * processUtilityParams, errmsg("server name \"%s\" is reserved for the extension-owned catalog", stmt->newname), errhint("Choose a different server name."))); - } - return false; -} + /* + * Renaming an iceberg_catalog server is blocked because dependent + * iceberg tables store the server name as a string option + * (catalog='') in pg_foreign_table.ftoptions. A rename would + * silently break those references. + */ + ForeignServer *server = GetForeignServerByName(strVal(stmt->object), + true); + if (server != NULL) + { + ForeignDataWrapper *fdw = GetForeignDataWrapper(server->fdwid); -/* - * GetRestCatalogOptionsFromCatalog returns a RestCatalogOptions struct. - * For the built-in 'rest' catalog name the GUCs are used directly. - * For user-created servers, the GUCs serve as defaults, - * overridden by any option set on the server. - */ -RestCatalogOptions * -GetRestCatalogOptionsFromCatalog(const char *catalog) -{ - RestCatalogOptions *opts = palloc0(sizeof(RestCatalogOptions)); + if (strcmp(fdw->fdwname, ICEBERG_CATALOG_FDW_NAME) == 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot rename iceberg_catalog server \"%s\"", + strVal(stmt->object)), + errhint("Drop and recreate the server with the new name."))); + } + } + else if (IsA(parsetree, AlterForeignServerStmt)) + { + AlterForeignServerStmt *stmt = (AlterForeignServerStmt *) parsetree; - /* - * Normalize built-in catalog name to the canonical constant so that case - * variations (e.g. 'REST', 'rEst') compare equal with strcmp. - * User-created server names are case-sensitive and stored as-is. - */ - if (pg_strcasecmp(catalog, REST_CATALOG_NAME) == 0) - opts->catalog = pstrdup(REST_CATALOG_NAME); - else - opts->catalog = pstrdup(catalog); - - /* GUC values serve as defaults */ - opts->host = RestCatalogHost; - opts->oauthHostPath = RestCatalogOauthHostPath; - opts->clientId = RestCatalogClientId; - opts->clientSecret = RestCatalogClientSecret; - opts->scope = RestCatalogScope; - opts->authType = RestCatalogAuthType; - opts->enableVendedCredentials = RestCatalogEnableVendedCredentials; - opts->locationPrefix = GetIcebergDefaultLocationPrefix(); + ForeignServer *server = GetForeignServerByName(stmt->servername, true); + + if (server == NULL) + return false; - /* - * The built-in 'rest' name uses GUCs exclusively. For user-created - * servers, look up server options and override the GUC defaults. - */ - if (pg_strcasecmp(catalog, REST_CATALOG_NAME) != 0) - { - ForeignServer *server = GetForeignServerByName(catalog, false); ForeignDataWrapper *fdw = GetForeignDataWrapper(server->fdwid); - Assert(strcmp(fdw->fdwname, ICEBERG_CATALOG_FDW_NAME) == 0); + if (strcmp(fdw->fdwname, ICEBERG_CATALOG_FDW_NAME) != 0) + return false; + /* + * Changing rest_endpoint on a server with dependent writable tables + * would silently point them at a different REST catalog, breaking the + * metadata chain. + */ ListCell *lc; - foreach(lc, server->options) + foreach(lc, stmt->options) { DefElem *def = (DefElem *) lfirst(lc); - if (pg_strcasecmp(def->defname, "rest_endpoint") == 0) - opts->host = defGetString(def); - else if (pg_strcasecmp(def->defname, "client_id") == 0) - opts->clientId = defGetString(def); - else if (pg_strcasecmp(def->defname, "client_secret") == 0) - opts->clientSecret = defGetString(def); - else if (pg_strcasecmp(def->defname, "scope") == 0) - opts->scope = defGetString(def); - else if (pg_strcasecmp(def->defname, "rest_auth_type") == 0) - { - char *authType = defGetString(def); - - opts->authType = (pg_strcasecmp(authType, "horizon") == 0) - ? REST_CATALOG_AUTH_TYPE_HORIZON - : REST_CATALOG_AUTH_TYPE_OAUTH2; - } - else if (pg_strcasecmp(def->defname, "oauth_endpoint") == 0) - opts->oauthHostPath = defGetString(def); - else if (pg_strcasecmp(def->defname, "enable_vended_credentials") == 0) - opts->enableVendedCredentials = defGetBoolean(def); - else if (pg_strcasecmp(def->defname, "catalog_name") == 0) - opts->catalogName = defGetString(def); - else if (pg_strcasecmp(def->defname, "location_prefix") == 0) + if (pg_strcasecmp(def->defname, "rest_endpoint") == 0 && + ServerHasDependentWritableTable(server->serverid)) { - bool inPlace = false; - - opts->locationPrefix = StripTrailingSlash(defGetString(def), inPlace); + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot change \"rest_endpoint\" on server \"%s\" " + "because it has dependent writable iceberg tables", + stmt->servername), + errhint("Drop the dependent tables first, or create a " + "new server with the desired endpoint."))); } } } + return false; +} + + + +/* + * ApplyGUCDefaults populates opts with the current GUC values. + * All string fields are pstrdup'd so the struct is self-contained. + */ +static void +ApplyGUCDefaults(RestCatalogOptions * opts) +{ + opts->host = RestCatalogHost ? pstrdup(RestCatalogHost) : NULL; + opts->oauthHostPath = RestCatalogOauthHostPath ? pstrdup(RestCatalogOauthHostPath) : NULL; + opts->clientId = RestCatalogClientId ? pstrdup(RestCatalogClientId) : NULL; + opts->clientSecret = RestCatalogClientSecret ? pstrdup(RestCatalogClientSecret) : NULL; + opts->scope = RestCatalogScope ? pstrdup(RestCatalogScope) : NULL; + opts->authType = RestCatalogAuthType; + opts->enableVendedCredentials = RestCatalogEnableVendedCredentials; + opts->locationPrefix = GetIcebergDefaultLocationPrefix(); +} + + +/* + * ApplyServerOptionOverrides overrides the GUC-derived defaults in opts + * with any options explicitly set on the foreign server. + */ +static void +ApplyServerOptionOverrides(RestCatalogOptions * opts, ForeignServer *server) +{ + ListCell *lc; + + foreach(lc, server->options) + { + DefElem *def = (DefElem *) lfirst(lc); + const IcebergCatalogOptionDesc *desc = FindCatalogOptionDesc(def->defname); + + if (desc != NULL) + ApplyCatalogOptionValue(opts, desc, def); + } +} + + +/* + * ValidateRestCatalogOptions checks that the resolved options have + * the minimum required fields (e.g. rest_endpoint). + */ +static void +ValidateRestCatalogOptions(const RestCatalogOptions * opts, const char *catalog) +{ if (opts->host == NULL || opts->host[0] == '\0') ereport(ERROR, (errcode(ERRCODE_FDW_OPTION_NAME_NOT_FOUND), @@ -346,11 +566,61 @@ GetRestCatalogOptionsFromCatalog(const char *catalog) catalog), errhint("Set the pg_lake_iceberg.rest_catalog_host GUC or " "the \"rest_endpoint\" option on the server."))); +} + + +/* + * Built-in 'rest' catalog: GUCs only, no server lookup. + */ +static RestCatalogOptions * +BuildRestCatalogOptionsFromGUCs(void) +{ + RestCatalogOptions *opts = palloc0(sizeof(RestCatalogOptions)); + opts->catalog = pstrdup(REST_CATALOG_NAME); + ApplyGUCDefaults(opts); + ValidateRestCatalogOptions(opts, REST_CATALOG_NAME); return opts; } +/* + * User-created iceberg_catalog server: GUC defaults + server option + * overrides. + */ +static RestCatalogOptions * +BuildRestCatalogOptionsFromServer(const char *serverName) +{ + ForeignServer *server = GetForeignServerByName(serverName, false); + ForeignDataWrapper *fdw = GetForeignDataWrapper(server->fdwid); + + Assert(strcmp(fdw->fdwname, ICEBERG_CATALOG_FDW_NAME) == 0); + + RestCatalogOptions *opts = palloc0(sizeof(RestCatalogOptions)); + + opts->catalog = pstrdup(serverName); + ApplyGUCDefaults(opts); + ApplyServerOptionOverrides(opts, server); + ValidateRestCatalogOptions(opts, serverName); + return opts; +} + + +/* + * ResolveRestCatalogOptions picks the right source based on the catalog + * identifier: GUCs for the built-in 'rest' name, server object for + * user-created iceberg_catalog servers. + */ +RestCatalogOptions * +ResolveRestCatalogOptions(const char *catalog) +{ + if (pg_strcasecmp(catalog, REST_CATALOG_NAME) == 0) + return BuildRestCatalogOptionsFromGUCs(); + + return BuildRestCatalogOptionsFromServer(catalog); +} + + /* * GetRestCatalogOptionsForRelation returns the REST catalog options for * the given relation. The catalog option value is used as the server @@ -367,7 +637,34 @@ GetRestCatalogOptionsForRelation(Oid relationId) (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("catalog option is not set for relation %u", relationId))); - return GetRestCatalogOptionsFromCatalog(catalog); + return ResolveRestCatalogOptions(catalog); +} + + +/* + * CopyRestCatalogOptions deep-copies a RestCatalogOptions into the given + * memory context. All string fields are duplicated so the result is + * self-contained and independent of the source's lifetime. + */ +RestCatalogOptions * +CopyRestCatalogOptions(MemoryContext dst, const RestCatalogOptions * src) +{ + MemoryContext oldctx = MemoryContextSwitchTo(dst); + RestCatalogOptions *copy = palloc0(sizeof(RestCatalogOptions)); + + copy->catalog = pstrdup(src->catalog); + copy->host = pstrdup(src->host); + copy->oauthHostPath = src->oauthHostPath ? pstrdup(src->oauthHostPath) : NULL; + copy->clientId = src->clientId ? pstrdup(src->clientId) : NULL; + copy->clientSecret = src->clientSecret ? pstrdup(src->clientSecret) : NULL; + copy->scope = src->scope ? pstrdup(src->scope) : NULL; + copy->locationPrefix = src->locationPrefix ? pstrdup(src->locationPrefix) : NULL; + copy->catalogName = src->catalogName ? pstrdup(src->catalogName) : NULL; + copy->authType = src->authType; + copy->enableVendedCredentials = src->enableVendedCredentials; + + MemoryContextSwitchTo(oldctx); + return copy; } @@ -848,22 +1145,51 @@ static void BuildTokenCacheKey(char *key, const RestCatalogOptions * opts) { Assert(opts->catalog != NULL); + MemSet(key, 0, TOKEN_CACHE_KEY_LEN); strlcpy(key, opts->catalog, TOKEN_CACHE_KEY_LEN); } +/* + * Syscache invalidation callback for pg_foreign_server changes. + * Any ALTER/DROP SERVER blows away the entire token cache so stale + * credentials are never reused. The cache is rebuilt lazily on the + * next token lookup. + */ +static void +InvalidateRestTokenCache(Datum arg, int cacheid, uint32 hashvalue) +{ + if (RestCatalogTokenCache == NULL) + return; + + MemoryContextReset(RestTokenCacheCtx); + RestCatalogTokenCache = NULL; +} + + /* * Initialize the per-catalog token cache hash table if needed. */ +static bool TokenCacheCallbackRegistered = false; + static void InitTokenCacheIfNeeded(void) { + if (!TokenCacheCallbackRegistered) + { + CacheRegisterSyscacheCallback(FOREIGNSERVEROID, + InvalidateRestTokenCache, + (Datum) 0); + TokenCacheCallbackRegistered = true; + } + if (RestCatalogTokenCache != NULL) return; - RestTokenCacheCtx = AllocSetContextCreate(TopMemoryContext, - "RestTokenCacheCtx", - ALLOCSET_DEFAULT_SIZES); + if (RestTokenCacheCtx == NULL) + RestTokenCacheCtx = AllocSetContextCreate(CacheMemoryContext, + "RestTokenCacheCtx", + ALLOCSET_DEFAULT_SIZES); HASHCTL ctl; @@ -885,6 +1211,11 @@ InitTokenCacheIfNeeded(void) static char * GetRestCatalogAccessToken(RestCatalogOptions * opts, bool forceRefreshToken) { + if (opts == NULL) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("REST catalog options must not be NULL when fetching access token"))); + InitTokenCacheIfNeeded(); char cacheKey[TOKEN_CACHE_KEY_LEN]; @@ -1200,10 +1531,12 @@ GetRestCatalogNamespace(Oid relationId) /* * Returns the catalog name to use for REST API calls. * - * Precedence: table option catalog_name > server option catalog_name - * > current database name. + * Writable tables always use the current database name so that a + * subsequent ALTER SERVER ? ADD/SET catalog_name cannot silently + * re-route an existing table to a different REST namespace. * - * Read-only tables must have catalog_name set (on the table or server). + * Read-only tables resolve from table option > server option, and + * must have catalog_name set on one of them. */ char * GetRestCatalogName(Oid relationId) @@ -1213,6 +1546,9 @@ GetRestCatalogName(Oid relationId) Assert(catalogType == REST_CATALOG_READ_ONLY || catalogType == REST_CATALOG_READ_WRITE); + if (catalogType == REST_CATALOG_READ_WRITE) + return get_database_name(MyDatabaseId); + ForeignTable *foreignTable = GetForeignTable(relationId); char *catalogName = GetStringOption(foreignTable->options, "catalog_name", false); @@ -1224,13 +1560,10 @@ GetRestCatalogName(Oid relationId) if (opts->catalogName != NULL) return opts->catalogName; - if (catalogType == REST_CATALOG_READ_ONLY) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("catalog_name is required for read-only REST catalog tables"), - errhint("Set catalog_name on the table or the server."))); - - return get_database_name(MyDatabaseId); + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("catalog_name is required for read-only REST catalog tables"), + errhint("Set catalog_name on the table or the server."))); } diff --git a/pg_lake_iceberg/src/test/rest_catalog.c b/pg_lake_iceberg/src/test/rest_catalog.c index 069213a2..93a96e15 100644 --- a/pg_lake_iceberg/src/test/rest_catalog.c +++ b/pg_lake_iceberg/src/test/rest_catalog.c @@ -37,7 +37,7 @@ register_namespace_to_rest_catalog(PG_FUNCTION_ARGS) char *catalogName = text_to_cstring(PG_GETARG_TEXT_P(0)); char *namespaceName = text_to_cstring(PG_GETARG_TEXT_P(1)); - RestCatalogOptions *opts = GetRestCatalogOptionsFromCatalog(REST_CATALOG_NAME); + RestCatalogOptions *opts = ResolveRestCatalogOptions(REST_CATALOG_NAME); RegisterNamespaceToRestCatalog(opts, catalogName, namespaceName); PG_RETURN_VOID(); diff --git a/pg_lake_table/include/pg_lake/transaction/track_iceberg_metadata_changes.h b/pg_lake_table/include/pg_lake/transaction/track_iceberg_metadata_changes.h index d7e2d22f..12dd26b0 100644 --- a/pg_lake_table/include/pg_lake/transaction/track_iceberg_metadata_changes.h +++ b/pg_lake_table/include/pg_lake/transaction/track_iceberg_metadata_changes.h @@ -61,3 +61,4 @@ extern PGDLLEXPORT void ResetRestCatalogRequests(void); extern PGDLLEXPORT HTAB *GetTrackedIcebergMetadataOperations(void); extern PGDLLEXPORT bool HasAnyTrackedIcebergMetadataChanges(void); extern PGDLLEXPORT bool IsIcebergTableCreatedInCurrentTransaction(Oid relation); +extern PGDLLEXPORT void ValidateXactRestCatalog(Oid relationId); diff --git a/pg_lake_table/src/ddl/create_table.c b/pg_lake_table/src/ddl/create_table.c index ce34e017..bb566c54 100644 --- a/pg_lake_table/src/ddl/create_table.c +++ b/pg_lake_table/src/ddl/create_table.c @@ -21,9 +21,11 @@ #include "access/table.h" #include "access/tableam.h" #include "access/relation.h" +#include "catalog/dependency.h" #include "catalog/namespace.h" #include "catalog/pg_attribute.h" #include "catalog/pg_class.h" +#include "catalog/pg_foreign_server.h" #include "commands/dbcommands.h" #include "commands/defrem.h" #include "commands/extension.h" @@ -91,6 +93,7 @@ static void ErrorIfUnsupportedColumnTypeForJsonOrCSVTables(List *columnDefList); static void ErrorIfUsingGeometryWithoutSpatialAnalytics(List *columnDefList); static void ErrorIfUnsupportedLakeTable(CreateForeignTableStmt *createStmt); static void ErrorIfCreateForeignTableOnIcebergCatalog(CreateForeignTableStmt *createStmt); +static void RecordIcebergCatalogServerDependency(Oid relationId, List *options); static void ErrorIfWritableTableWithReservedColumnName(List *columnDefList, PgLakeTableType tableType); static void ErrorIfInvalidFilenameColumn(List *columnDefList); static bool IsConflictingColumnNameForReadParquet(const char *columnName); @@ -382,6 +385,44 @@ ErrorIfCreateForeignTableOnIcebergCatalog(CreateForeignTableStmt *createStmt) } +/* + * RecordIcebergCatalogServerDependency records a DEPENDENCY_NORMAL from + * the iceberg table to its catalog server in pg_depend, so that + * DROP SERVER is blocked while dependent tables exist (and + * DROP SERVER CASCADE drops them). + * + * Only user-created iceberg_catalog servers get a dependency entry; + * built-in catalog names ('rest', 'postgres', 'object_store') are not + * backed by a pg_foreign_server row managed by the user. + */ +static void +RecordIcebergCatalogServerDependency(Oid relationId, List *options) +{ + char *catalog = GetStringOption(options, "catalog", false); + + if (catalog == NULL || IsCatalogOwnedByExtension(catalog)) + return; + + ForeignServer *server = GetForeignServerByName(catalog, true); + + if (server == NULL) + return; + + ObjectAddress myself; + ObjectAddress referenced; + + myself.classId = RelationRelationId; + myself.objectId = relationId; + myself.objectSubId = 0; + + referenced.classId = ForeignServerRelationId; + referenced.objectId = server->serverid; + referenced.objectSubId = 0; + + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); +} + + /* * ErrorIfUnsupportedLakeTable is a helper function for checking unsupported features * in CREATE FOREIGN TABLE statements that are pg_lake tables. @@ -694,6 +735,29 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) bool hasRestCatalogOption = HasRestCatalogTableOption(createStmt->options); bool hasObjectStoreCatalogOption = HasObjectStoreCatalogTableOption(createStmt->options); + /* + * For user-created iceberg_catalog servers, verify that the current user + * has USAGE privilege on the server. Built-in catalog names ('rest', + * 'postgres', 'object_store') have no backing server object and skip this + * check — access is controlled by the lake_write role instead. + */ + if (hasRestCatalogOption) + { + char *catalog = GetStringOption(createStmt->options, "catalog", false); + + if (!IsCatalogOwnedByExtension(catalog)) + { + ForeignServer *server = GetForeignServerByName(catalog, false); + AclResult aclresult = object_aclcheck(ForeignServerRelationId, + server->serverid, + GetUserId(), + ACL_USAGE); + + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FOREIGN_SERVER, catalog); + } + } + if (hasObjectStoreCatalogOption || hasRestCatalogOption) { Oid namespaceId = RangeVarGetAndCheckCreationNamespace(createStmt->base.relation, NoLock, NULL); @@ -767,7 +831,7 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) { char *catalogOptionValue = GetStringOption(createStmt->options, "catalog", false); RestCatalogOptions *opts = - GetRestCatalogOptionsFromCatalog(catalogOptionValue); + ResolveRestCatalogOptions(catalogOptionValue); ErrorIfRestNamespaceDoesNotExist(opts, catalogName, catalogNamespace); @@ -806,7 +870,7 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) { char *catalogOptionValue = GetStringOption(createStmt->options, "catalog", false); RestCatalogOptions *opts = - GetRestCatalogOptionsFromCatalog(catalogOptionValue); + ResolveRestCatalogOptions(catalogOptionValue); if (opts->catalogName != NULL) ereport(ERROR, @@ -839,6 +903,10 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) PgLakeCommonParentProcessUtility(params); + Oid readOnlyRelId = RangeVarGetRelid(createStmt->base.relation, NoLock, false); + + RecordIcebergCatalogServerDependency(readOnlyRelId, createStmt->options); + return true; } } @@ -902,7 +970,7 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) char *catalogOptionValue = GetStringOption(createStmt->options, "catalog", false); RestCatalogOptions *opts = - GetRestCatalogOptionsFromCatalog(catalogOptionValue); + ResolveRestCatalogOptions(catalogOptionValue); if (opts->locationPrefix != NULL) defaultLocationPrefix = opts->locationPrefix; @@ -944,6 +1012,8 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) /* the table is now created, get its OID */ Oid relationId = RangeVarGetRelid(createStmt->base.relation, NoLock, false); + RecordIcebergCatalogServerDependency(relationId, createStmt->options); + char *location; if (locationOption != NULL) @@ -1003,7 +1073,7 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) */ char *catalogOptionValue = GetStringOption(createStmt->options, "catalog", false); RestCatalogOptions *opts = - GetRestCatalogOptionsFromCatalog(catalogOptionValue); + ResolveRestCatalogOptions(catalogOptionValue); RegisterNamespaceToRestCatalog(opts, get_database_name(MyDatabaseId), get_namespace_name(namespaceId)); diff --git a/pg_lake_table/src/fdw/pg_lake_table.c b/pg_lake_table/src/fdw/pg_lake_table.c index d4107894..cd94d34d 100644 --- a/pg_lake_table/src/fdw/pg_lake_table.c +++ b/pg_lake_table/src/fdw/pg_lake_table.c @@ -99,6 +99,7 @@ #include "pg_lake/pgduck/write_data.h" #include "pg_lake/planner/restriction_collector.h" #include "pg_lake/storage/local_storage.h" +#include "pg_lake/transaction/track_iceberg_metadata_changes.h" #include "pg_lake/util/item_pointer_utils.h" #include "pg_lake/util/rel_utils.h" #include "pg_lake/util/string_utils.h" @@ -2205,6 +2206,8 @@ postgresBeginForeignModify(ModifyTableState *mtstate, if (eflags & EXEC_FLAG_EXPLAIN_ONLY) return; + ValidateXactRestCatalog(RelationGetRelid(resultRelInfo->ri_RelationDesc)); + /* Construct an execution state. */ fmstate = create_foreign_modify(resultRelInfo->ri_RelationDesc, resultRelInfo->ri_RangeTableIndex, diff --git a/pg_lake_table/src/fdw/writable_table.c b/pg_lake_table/src/fdw/writable_table.c index dcab98fe..335ce0ce 100644 --- a/pg_lake_table/src/fdw/writable_table.c +++ b/pg_lake_table/src/fdw/writable_table.c @@ -1095,6 +1095,8 @@ AddQueryResultToTable(Oid relationId, char *readQuery, TupleDesc queryTupleDesc, { Assert(queryTupleDesc != NULL && queryTupleDesc->natts > 0); + ValidateXactRestCatalog(relationId); + int64 rowsProcessed = 0; ForeignTable *foreignTable = GetForeignTable(relationId); List *options = foreignTable->options; diff --git a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c index 8ee6ac0a..365e99f3 100644 --- a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c +++ b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c @@ -611,8 +611,46 @@ InitRestCatalogRequestsHashIfNeeded(void) /* -* RecordRestCatalogRequestInTx records a REST catalog request to be sent at post-commit. -*/ + * ValidateXactRestCatalog is a fail-fast guard that prevents cross-catalog + * DML within a single transaction. It resolves the relation's catalog + * identifier and, if a different catalog was already locked in for this + * transaction, errors out immediately — before any Parquet data is written + * to S3. + * + * No-ops for relations that are not REST-backed writable iceberg tables, + * or when no catalog has been locked in yet (first DML in the xact). + */ +void +ValidateXactRestCatalog(Oid relationId) +{ + if (!IsPgLakeIcebergForeignTableById(relationId) || + GetIcebergCatalogType(relationId) != REST_CATALOG_READ_WRITE) + return; + + if (PgLakeXactRestCatalog == NULL || + PgLakeXactRestCatalog->catalogOpts == NULL) + return; + + char *catalog = GetStringOption(GetForeignTable(relationId)->options, + "catalog", false); + + if (catalog == NULL) + return; + + if (pg_strcasecmp(PgLakeXactRestCatalog->catalogOpts->catalog, catalog) != 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot modify tables from different REST catalogs " + "in the same transaction"), + errdetail("This transaction already targets catalog \"%s\", " + "but the current statement targets \"%s\".", + PgLakeXactRestCatalog->catalogOpts->catalog, catalog))); +} + + +/* + * RecordRestCatalogRequestInTx records a REST catalog request to be sent at post-commit. + */ void RecordRestCatalogRequestInTx(Oid relationId, RestCatalogOperationType operationType, const char *body) @@ -634,27 +672,17 @@ RecordRestCatalogRequestInTx(Oid relationId, RestCatalogOperationType operationT if (PgLakeXactRestCatalog->catalogOpts == NULL) { - /* - * Deep-copy opts into TopTransactionContext so the struct and its - * string fields survive until XACT_EVENT_COMMIT. - */ - MemoryContext oldctx = MemoryContextSwitchTo(TopTransactionContext); - - PgLakeXactRestCatalog->catalogOpts = palloc0(sizeof(RestCatalogOptions)); - PgLakeXactRestCatalog->catalogOpts->catalog = pstrdup(resolvedOpts->catalog); - PgLakeXactRestCatalog->catalogOpts->host = pstrdup(resolvedOpts->host); - PgLakeXactRestCatalog->catalogOpts->oauthHostPath = resolvedOpts->oauthHostPath ? pstrdup(resolvedOpts->oauthHostPath) : NULL; - PgLakeXactRestCatalog->catalogOpts->clientId = resolvedOpts->clientId ? pstrdup(resolvedOpts->clientId) : NULL; - PgLakeXactRestCatalog->catalogOpts->clientSecret = resolvedOpts->clientSecret ? pstrdup(resolvedOpts->clientSecret) : NULL; - PgLakeXactRestCatalog->catalogOpts->scope = resolvedOpts->scope ? pstrdup(resolvedOpts->scope) : NULL; - PgLakeXactRestCatalog->catalogOpts->locationPrefix = resolvedOpts->locationPrefix ? pstrdup(resolvedOpts->locationPrefix) : NULL; - PgLakeXactRestCatalog->catalogOpts->catalogName = resolvedOpts->catalogName ? pstrdup(resolvedOpts->catalogName) : NULL; - PgLakeXactRestCatalog->catalogOpts->authType = resolvedOpts->authType; - PgLakeXactRestCatalog->catalogOpts->enableVendedCredentials = resolvedOpts->enableVendedCredentials; - - MemoryContextSwitchTo(oldctx); + PgLakeXactRestCatalog->catalogOpts = + CopyRestCatalogOptions(TopTransactionContext, resolvedOpts); } - else if (strcmp(PgLakeXactRestCatalog->catalogOpts->catalog, resolvedOpts->catalog) != 0) + + /* + * Belt-and-suspenders check. All DML and DDL entry points already + * call ValidateXactRestCatalog() at statement time, so in practice we + * should never reach here with a mismatched catalog. Kept as a last + * line of defense for any future code path that forgets to do so. + */ + else if (pg_strcasecmp(PgLakeXactRestCatalog->catalogOpts->catalog, resolvedOpts->catalog) != 0) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot modify tables from different REST catalogs " diff --git a/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py index 6ce4688e..fe53f7dd 100644 --- a/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py +++ b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py @@ -169,6 +169,41 @@ def test_reject_options_on_non_server(superuser_conn, extension): superuser_conn.rollback() +# ── Option value validation ───────────────────────────────────────────────── + + +@pytest.mark.parametrize( + "option, bad_value, expected_error", + [ + ("rest_endpoint", "", "must not be empty"), + ("rest_endpoint", "localhost:8181", "URI scheme"), + ("oauth_endpoint", "", "must not be empty"), + ("oauth_endpoint", "localhost/oauth/tokens", "URI scheme"), + ("location_prefix", "", "must not be empty"), + ("location_prefix", "my-bucket/prefix", "URI scheme"), + ("catalog_name", "", "must not be empty"), + ("client_id", "", "must not be empty"), + ("client_secret", "", "must not be empty"), + ("scope", "", "must not be empty"), + ], +) +def test_reject_bad_option_values( + superuser_conn, extension, option, bad_value, expected_error +): + err = run_command( + f""" + CREATE SERVER test_bad_val TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS ({option} '{bad_value}') + """, + superuser_conn, + raise_error=False, + ) + assert err is not None, f"Expected error for {option}='{bad_value}'" + assert expected_error in str(err), f"Expected '{expected_error}' in: {err}" + superuser_conn.rollback() + + # ── CREATE FOREIGN TABLE on iceberg_catalog servers is blocked ────────────── @@ -324,6 +359,10 @@ def test_create_table_with_server_catalog( """, superuser_conn, ) + run_command( + "GRANT USAGE ON FOREIGN SERVER test_srv_catalog TO PUBLIC", + superuser_conn, + ) superuser_conn.commit() err = run_command( @@ -340,12 +379,45 @@ def test_create_table_with_server_catalog( # "invalid catalog option" error. This proves the server was resolved. assert err is not None assert "invalid catalog option" not in str(err) + assert "permission denied" not in str(err) pg_conn.rollback() run_command("DROP SERVER test_srv_catalog CASCADE", superuser_conn) superuser_conn.commit() +def test_create_table_requires_usage_on_catalog_server( + pg_conn, superuser_conn, s3, extension, with_default_location +): + """A non-superuser without USAGE on the catalog server must be + denied when creating a table that references it.""" + run_command( + """ + CREATE SERVER test_no_usage_srv TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + superuser_conn.commit() + + err = run_command( + """ + CREATE TABLE test_no_usage_tbl (id bigint) + USING iceberg + WITH (catalog = 'test_no_usage_srv') + """, + pg_conn, + raise_error=False, + ) + assert err is not None + assert "permission denied for foreign server" in str(err).lower() + pg_conn.rollback() + + run_command("DROP SERVER test_no_usage_srv", superuser_conn) + superuser_conn.commit() + + def test_invalid_catalog_name_errors(pg_conn, s3, extension, with_default_location): """A catalog name that is neither a known literal nor a valid server should error.""" err = run_command( @@ -566,8 +638,9 @@ def test_allow_drop_user_created_server(superuser_conn, extension): superuser_conn.rollback() -def test_allow_rename_user_created_server(superuser_conn, extension): - """RENAME on a user-created server should work fine.""" +def test_reject_rename_iceberg_catalog_server(superuser_conn, extension): + """Renaming an iceberg_catalog server is blocked because dependent tables + store the server name as a string option in ftoptions.""" run_command( """ CREATE SERVER user_rename_srv TYPE 'rest' @@ -576,9 +649,13 @@ def test_allow_rename_user_created_server(superuser_conn, extension): """, superuser_conn, ) - run_command( - "ALTER SERVER user_rename_srv RENAME TO user_renamed_srv", superuser_conn + err = run_command( + "ALTER SERVER user_rename_srv RENAME TO user_renamed_srv", + superuser_conn, + raise_error=False, ) + assert err is not None + assert "cannot rename iceberg_catalog server" in str(err) superuser_conn.rollback() @@ -640,6 +717,70 @@ def test_set_default_catalog_rejects_nonexistent_server(pg_conn, extension): pg_conn.rollback() +def test_alter_system_default_catalog_defers_validation( + superuser_conn, pg_conn, extension +): + """The GUC check hook for pg_lake_iceberg.default_catalog cannot do catalog + lookups during SIGHUP reload (!IsTransactionState()), so it accepts the + value on faith — mirroring PostgreSQL's check_default_tablespace. + + Sequence: create a server, ALTER SYSTEM SET to it (passes in-transaction + validation), drop the server, then pg_reload_conf() re-applies the + now-stale name. The check hook must let it through. A subsequent + CREATE TABLE must fail at runtime.""" + run_command( + """ + CREATE SERVER alter_sys_cat TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + superuser_conn.commit() + + superuser_conn.autocommit = True + run_command( + "ALTER SYSTEM SET pg_lake_iceberg.default_catalog = 'alter_sys_cat'", + superuser_conn, + ) + run_command("SELECT pg_reload_conf()", superuser_conn) + run_command("SELECT pg_sleep(0.2)", superuser_conn) + + result = run_query( + "SHOW pg_lake_iceberg.default_catalog", + superuser_conn, + ) + assert result[0][0] == "alter_sys_cat" + + run_command("DROP SERVER alter_sys_cat", superuser_conn) + run_command("SELECT pg_reload_conf()", superuser_conn) + run_command("SELECT pg_sleep(0.2)", superuser_conn) + + result = run_query( + "SHOW pg_lake_iceberg.default_catalog", + superuser_conn, + ) + assert result[0][0] == "alter_sys_cat" + superuser_conn.autocommit = False + + err = run_command( + "CREATE TABLE alter_sys_test (id bigint) USING iceberg", + pg_conn, + raise_error=False, + ) + assert err is not None + assert "invalid catalog option" in str(err).lower() + pg_conn.rollback() + + superuser_conn.autocommit = True + run_command( + "ALTER SYSTEM RESET pg_lake_iceberg.default_catalog", + superuser_conn, + ) + run_command("SELECT pg_reload_conf()", superuser_conn) + superuser_conn.autocommit = False + + # ── Case-sensitive server names ──────────────────────────────────────────── diff --git a/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py b/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py index 5f5fa4bd..068bcf07 100644 --- a/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py +++ b/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py @@ -10,9 +10,14 @@ @pytest.mark.parametrize( - "manifest_min_count_to_merge, target_manifest_size_kb, max_snapshot_age_params ", + "manifest_min_count_to_merge, target_manifest_size_kb, max_snapshot_age_params", manifest_snapshot_settings, ) +@pytest.mark.parametrize( + "create_iceberg_rest_table_parametrized", + ["rest", "user_server"], + indirect=True, +) def test_writable_rest_iceberg_table( installcheck, install_iceberg_to_duckdb, @@ -26,7 +31,7 @@ def test_writable_rest_iceberg_table( target_manifest_size_kb, max_snapshot_age_params, allow_iceberg_guc_perms, - create_iceberg_rest_table, + create_iceberg_rest_table_parametrized, create_test_helper_functions, create_http_helper_functions, ): @@ -48,7 +53,7 @@ def test_writable_rest_iceberg_table( ) superuser_conn.commit() - TABLE_NAME = create_iceberg_rest_table + TABLE_NAME = create_iceberg_rest_table_parametrized # show that we can read empty tables query = f"SELECT count(*) FROM {TABLE_NAMESPACE}.{TABLE_NAME}" @@ -663,6 +668,10 @@ def test_server_option_overrides_guc( """, superuser_conn, ) + run_command( + f"GRANT USAGE ON FOREIGN SERVER {SERVER_NAME} TO PUBLIC", + superuser_conn, + ) superuser_conn.commit() run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", pg_conn) @@ -758,6 +767,10 @@ def test_reject_modify_different_rest_catalogs_in_single_transaction( """, superuser_conn, ) + run_command( + f"GRANT USAGE ON FOREIGN SERVER {name} TO PUBLIC", + superuser_conn, + ) superuser_conn.commit() run_command(f"CREATE SCHEMA IF NOT EXISTS {TABLE_NAMESPACE}", pg_conn) @@ -786,6 +799,392 @@ def test_reject_modify_different_rest_catalogs_in_single_transaction( pg_conn.rollback() + for name, catalog in [("table_a", "rest_catalog_a"), ("table_b", "rest_catalog_b")]: + run_command( + f"DROP TABLE IF EXISTS {TABLE_NAMESPACE}.{name}", + pg_conn, + ) + pg_conn.commit() + + run_command(f"DROP SCHEMA IF EXISTS {TABLE_NAMESPACE}", pg_conn) + pg_conn.commit() + + superuser_conn.rollback() + for name in ["rest_catalog_a", "rest_catalog_b"]: + run_command(f"DROP SERVER IF EXISTS {name}", superuser_conn) + superuser_conn.commit() + + +def test_multi_table_single_transaction_on_same_server( + installcheck, + superuser_conn, + pg_conn, + s3, + extension, + with_default_location, + polaris_session, + create_http_helper_functions, +): + """ + Two tables on the *same* user-created server: INSERT into one and + UPDATE the other in a single transaction must succeed. + """ + if installcheck: + return + + SERVER_NAME = "rest_multi_tbl" + SCHEMA_NAME = TABLE_NAMESPACE + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + + run_command( + f""" + CREATE SERVER {SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}', + location_prefix 's3://{TEST_BUCKET}') + """, + superuser_conn, + ) + run_command( + f"GRANT USAGE ON FOREIGN SERVER {SERVER_NAME} TO PUBLIC", + superuser_conn, + ) + superuser_conn.commit() + + run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", pg_conn) + pg_conn.commit() + + run_command( + f"CREATE TABLE {SCHEMA_NAME}.multi_a (id bigint, value text) " + f"USING iceberg WITH (catalog='{SERVER_NAME}')", + pg_conn, + ) + run_command( + f"CREATE TABLE {SCHEMA_NAME}.multi_b (id bigint, value text) " + f"USING iceberg WITH (catalog='{SERVER_NAME}')", + pg_conn, + ) + pg_conn.commit() + + run_command( + f"INSERT INTO {SCHEMA_NAME}.multi_b SELECT i, 'old' FROM generate_series(1, 5) i", + pg_conn, + ) + pg_conn.commit() + + run_command( + f"INSERT INTO {SCHEMA_NAME}.multi_a SELECT i, i::text FROM generate_series(1, 10) i", + pg_conn, + ) + run_command( + f"UPDATE {SCHEMA_NAME}.multi_b SET value = 'new' WHERE id <= 3", + pg_conn, + ) + pg_conn.commit() + + results_a = run_query(f"SELECT count(*) FROM {SCHEMA_NAME}.multi_a", pg_conn) + assert results_a[0][0] == 10 + + results_b = run_query( + f"SELECT count(*) FROM {SCHEMA_NAME}.multi_b WHERE value = 'new'", pg_conn + ) + assert results_b[0][0] == 3 + + pg_conn.rollback() + run_command(f"DROP SCHEMA {SCHEMA_NAME} CASCADE", pg_conn) + pg_conn.commit() + + superuser_conn.rollback() + run_command(f"DROP SERVER {SERVER_NAME}", superuser_conn) + superuser_conn.commit() + + +def test_token_cache_reuses_token_across_catalog_ops( + installcheck, + superuser_conn, + pg_conn, + s3, + extension, + with_default_location, + polaris_session, + set_polaris_gucs, + create_http_helper_functions, +): + """ + The per-catalog token cache must reuse a single OAuth token across + multiple back-to-back catalog operations in the same session. + A cache miss on every call would double request latency. + + Uses pg_lake_iceberg.http_client_trace_traffic to observe actual + HTTP traffic: each token fetch shows up as a POST to .../oauth/tokens + in the connection notices. + """ + if installcheck: + return + + SCHEMA_NAME = TABLE_NAMESPACE + TABLE_NAME = "token_cache_test" + + run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", pg_conn) + pg_conn.commit() + + run_command( + f"CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME} (id bigint, value text) " + f"USING iceberg WITH (catalog='rest')", + pg_conn, + ) + pg_conn.commit() + + run_command( + "SET pg_lake_iceberg.http_client_trace_traffic TO on", + pg_conn, + ) + + pg_conn.notices.clear() + + for i in range(3): + run_command( + f"INSERT INTO {SCHEMA_NAME}.{TABLE_NAME} VALUES ({i}, 'v')", + pg_conn, + ) + pg_conn.commit() + + token_fetches = sum( + 1 for n in pg_conn.notices if "oauth/tokens" in n and "POST" in n + ) + assert token_fetches <= 1, ( + f"Expected at most 1 OAuth token fetch (cached), got {token_fetches}. " + f"Notices:\n" + "\n".join(pg_conn.notices) + ) + + run_command( + "RESET pg_lake_iceberg.http_client_trace_traffic", + pg_conn, + ) + + pg_conn.rollback() + run_command(f"DROP SCHEMA {SCHEMA_NAME} CASCADE", pg_conn) + pg_conn.commit() + + +def test_alter_server_credentials_invalidates_token_cache( + installcheck, + superuser_conn, + s3, + extension, + with_default_location, + polaris_session, + create_http_helper_functions, +): + """ + After ALTER SERVER, the cached OAuth token must be discarded so the + next catalog operation re-fetches it. We verify this by enabling + HTTP traffic tracing and checking that a POST to .../oauth/tokens + appears after the ALTER SERVER (proving the cache was invalidated). + """ + if installcheck: + return + + SERVER_NAME = "rest_token_inval" + SCHEMA_NAME = TABLE_NAMESPACE + TABLE_NAME = "token_inval_test" + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + + run_command( + f""" + CREATE SERVER {SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}', + location_prefix 's3://{TEST_BUCKET}') + """, + superuser_conn, + ) + superuser_conn.commit() + + run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", superuser_conn) + superuser_conn.commit() + + run_command( + f"CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME} (id bigint) " + f"USING iceberg WITH (catalog='{SERVER_NAME}')", + superuser_conn, + ) + superuser_conn.commit() + + run_command( + f"INSERT INTO {SCHEMA_NAME}.{TABLE_NAME} VALUES (1)", + superuser_conn, + ) + superuser_conn.commit() + + run_command( + "SET pg_lake_iceberg.http_client_trace_traffic TO on", + superuser_conn, + ) + superuser_conn.notices.clear() + + run_command( + f"INSERT INTO {SCHEMA_NAME}.{TABLE_NAME} VALUES (2)", + superuser_conn, + ) + superuser_conn.commit() + + pre_alter_fetches = sum( + 1 for n in superuser_conn.notices if "oauth/tokens" in n and "POST" in n + ) + assert pre_alter_fetches == 0, ( + f"Expected no token fetch before ALTER SERVER (token cached), " + f"got {pre_alter_fetches}. Notices:\n" + "\n".join(superuser_conn.notices) + ) + + run_command( + f"ALTER SERVER {SERVER_NAME} OPTIONS (SET client_id 'rotated-id')", + superuser_conn, + ) + superuser_conn.commit() + + superuser_conn.notices.clear() + + run_command( + f"INSERT INTO {SCHEMA_NAME}.{TABLE_NAME} VALUES (3)", + superuser_conn, + ) + + commit_failed = False + try: + superuser_conn.commit() + except psycopg2.DatabaseError: + commit_failed = True + superuser_conn.rollback() + + post_alter_notices = list(superuser_conn.notices) + post_alter_fetches = sum( + 1 for n in post_alter_notices if "oauth/tokens" in n and "POST" in n + ) + + assert commit_failed, ( + "Expected COMMIT to fail after ALTER SERVER set bogus client_id " + "(cache should have been invalidated, forcing re-auth with bad creds)" + ) + assert post_alter_fetches >= 1, ( + f"Expected token re-fetch after ALTER SERVER (cache invalidated), " + f"got {post_alter_fetches}. Notices ({len(post_alter_notices)}):\n" + + "\n".join(post_alter_notices) + ) + + run_command( + f"ALTER SERVER {SERVER_NAME} OPTIONS (SET client_id '{client_id}')", + superuser_conn, + ) + superuser_conn.commit() + + run_command( + f"INSERT INTO {SCHEMA_NAME}.{TABLE_NAME} VALUES (4)", + superuser_conn, + ) + superuser_conn.commit() + + results = run_query( + f"SELECT count(*) FROM {SCHEMA_NAME}.{TABLE_NAME}", superuser_conn + ) + assert results[0][0] == 3 + + run_command( + "RESET pg_lake_iceberg.http_client_trace_traffic", + superuser_conn, + ) + + superuser_conn.rollback() + run_command(f"DROP SCHEMA {SCHEMA_NAME} CASCADE", superuser_conn) + superuser_conn.commit() + + run_command(f"DROP SERVER {SERVER_NAME}", superuser_conn) + superuser_conn.commit() + + +def test_drop_server_blocked_by_dependent_table( + installcheck, + superuser_conn, + pg_conn, + s3, + extension, + with_default_location, + polaris_session, + create_http_helper_functions, +): + """ + Tables created with catalog='' record a pg_depend entry on + the server. DROP SERVER without CASCADE must be blocked, and + DROP SERVER CASCADE must drop the dependent table. + """ + if installcheck: + return + + SERVER_NAME = "rest_dep_test" + SCHEMA_NAME = TABLE_NAMESPACE + TABLE_NAME = "dep_tbl" + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + + run_command( + f""" + CREATE SERVER {SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}') + """, + superuser_conn, + ) + run_command( + f"GRANT USAGE ON FOREIGN SERVER {SERVER_NAME} TO PUBLIC", + superuser_conn, + ) + superuser_conn.commit() + + run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", pg_conn) + pg_conn.commit() + + run_command( + f"CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME} (id bigint) " + f"USING iceberg WITH (catalog='{SERVER_NAME}')", + pg_conn, + ) + pg_conn.commit() + + err = run_command( + f"DROP SERVER {SERVER_NAME}", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "cannot drop" in str(err).lower() + superuser_conn.rollback() + + run_command(f"DROP SERVER {SERVER_NAME} CASCADE", superuser_conn) + superuser_conn.commit() + + result = run_query( + f"SELECT count(*) FROM pg_class WHERE relname = '{TABLE_NAME}'", + pg_conn, + ) + assert result[0][0] == 0 + def test_reject_writable_table_on_server_with_catalog_name( installcheck, @@ -825,6 +1224,10 @@ def test_reject_writable_table_on_server_with_catalog_name( """, superuser_conn, ) + run_command( + f"GRANT USAGE ON FOREIGN SERVER {SERVER_NAME} TO PUBLIC", + superuser_conn, + ) superuser_conn.commit() run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", pg_conn) @@ -848,6 +1251,166 @@ def test_reject_writable_table_on_server_with_catalog_name( superuser_conn.commit() +def test_alter_server_add_catalog_name_does_not_reroute_writable_table( + installcheck, + superuser_conn, + s3, + extension, + with_default_location, + polaris_session, + create_http_helper_functions, +): + """ + Writable tables always use the database name for REST catalog routing, + ignoring the server's catalog_name. Adding catalog_name to the server + after a writable table exists must NOT change where requests go. + """ + if installcheck: + return + + SERVER_NAME = "rest_catname_reroute" + SCHEMA_NAME = TABLE_NAMESPACE + TABLE_NAME = "catname_reroute_test" + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + + run_command( + f""" + CREATE SERVER {SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}', + location_prefix 's3://{TEST_BUCKET}') + """, + superuser_conn, + ) + superuser_conn.commit() + + run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", superuser_conn) + superuser_conn.commit() + + run_command( + f"CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME} (id bigint) " + f"USING iceberg WITH (catalog='{SERVER_NAME}')", + superuser_conn, + ) + superuser_conn.commit() + + run_command( + f"INSERT INTO {SCHEMA_NAME}.{TABLE_NAME} VALUES (1)", + superuser_conn, + ) + superuser_conn.commit() + + run_command( + f"ALTER SERVER {SERVER_NAME} OPTIONS (ADD catalog_name 'nonexistent_db')", + superuser_conn, + ) + superuser_conn.commit() + + run_command( + f"INSERT INTO {SCHEMA_NAME}.{TABLE_NAME} VALUES (2)", + superuser_conn, + ) + superuser_conn.commit() + + results = run_query( + f"SELECT count(*) FROM {SCHEMA_NAME}.{TABLE_NAME}", superuser_conn + ) + assert results[0][0] == 2 + + superuser_conn.rollback() + run_command(f"DROP SCHEMA {SCHEMA_NAME} CASCADE", superuser_conn) + superuser_conn.commit() + + run_command(f"DROP SERVER {SERVER_NAME}", superuser_conn) + superuser_conn.commit() + + +def test_alter_server_rest_endpoint_blocked_with_dependent_writable_tables( + installcheck, + superuser_conn, + s3, + extension, + with_default_location, + polaris_session, + create_http_helper_functions, +): + """ + Changing rest_endpoint on a server that has dependent writable iceberg + tables must be blocked. + """ + if installcheck: + return + + SERVER_NAME = "rest_endpoint_block" + SCHEMA_NAME = TABLE_NAMESPACE + TABLE_NAME = "endpoint_block_test" + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + + run_command( + f""" + CREATE SERVER {SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}', + location_prefix 's3://{TEST_BUCKET}') + """, + superuser_conn, + ) + superuser_conn.commit() + + run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", superuser_conn) + superuser_conn.commit() + + run_command( + f"CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME} (id bigint) " + f"USING iceberg WITH (catalog='{SERVER_NAME}')", + superuser_conn, + ) + superuser_conn.commit() + + err = run_command( + f"ALTER SERVER {SERVER_NAME} OPTIONS (SET rest_endpoint 'http://other:8181')", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "dependent writable iceberg tables" in str(err) + superuser_conn.rollback() + + run_command( + f"ALTER SERVER {SERVER_NAME} OPTIONS (SET client_id '{client_id}')", + superuser_conn, + ) + superuser_conn.commit() + + run_command(f"DROP SCHEMA {SCHEMA_NAME} CASCADE", superuser_conn) + superuser_conn.commit() + + err = run_command( + f"ALTER SERVER {SERVER_NAME} OPTIONS (SET rest_endpoint 'http://other:8181')", + superuser_conn, + raise_error=False, + ) + assert ( + err is None + ), "ALTER SERVER rest_endpoint should succeed after dependent tables are dropped" + superuser_conn.rollback() + + run_command(f"DROP SERVER {SERVER_NAME}", superuser_conn) + superuser_conn.commit() + + def test_table_catalog_name_overrides_server( installcheck, superuser_conn, @@ -907,6 +1470,10 @@ def test_table_catalog_name_overrides_server( """, superuser_conn, ) + run_command( + f"GRANT USAGE ON FOREIGN SERVER {SERVER_NAME} TO PUBLIC", + superuser_conn, + ) superuser_conn.commit() run_command( diff --git a/pg_lake_table/tests/pytests/test_writable_iceberg_common.py b/pg_lake_table/tests/pytests/test_writable_iceberg_common.py index 46662abd..ff23121b 100644 --- a/pg_lake_table/tests/pytests/test_writable_iceberg_common.py +++ b/pg_lake_table/tests/pytests/test_writable_iceberg_common.py @@ -4,6 +4,7 @@ import json import re import random +from pathlib import Path TABLE_NAMESPACE = "test_writable_iceberg" @@ -270,3 +271,90 @@ def create_iceberg_rest_table( pg_conn.rollback() run_command(f"DROP SCHEMA {TABLE_NAMESPACE} CASCADE", pg_conn) pg_conn.commit() + + +USER_SERVER_NAME = "crud_test_server" + + +@pytest.fixture +def create_iceberg_user_server_rest_table( + superuser_conn, + pg_conn, + with_default_location, + generate_table_name, + polaris_session, + installcheck, +): + """Same as create_iceberg_rest_table but routes through a user-created + iceberg_catalog server instead of the built-in 'rest' GUC path.""" + if installcheck: + yield + return + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://{server_params.POLARIS_HOSTNAME}:{server_params.POLARIS_PORT}" + + run_command( + f""" + CREATE SERVER {USER_SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}', + location_prefix 's3://{TEST_BUCKET}') + """, + superuser_conn, + ) + run_command( + f"GRANT USAGE ON FOREIGN SERVER {USER_SERVER_NAME} TO PUBLIC", + superuser_conn, + ) + superuser_conn.commit() + + table_name = generate_table_name + + run_command(f"CREATE SCHEMA {TABLE_NAMESPACE}", pg_conn) + run_command( + f"CREATE TABLE {TABLE_NAMESPACE}.{table_name} " + f"(drop_col_1 INT, id_old bigint, drop_col_2 INT) " + f"USING iceberg WITH (catalog='{USER_SERVER_NAME}')", + pg_conn, + ) + + run_command( + f"ALTER TABLE {TABLE_NAMESPACE}.{table_name} " + f"DROP COLUMN drop_col_2, ADD COLUMN value text, DROP COLUMN drop_col_1", + pg_conn, + ) + run_command( + f"ALTER TABLE {TABLE_NAMESPACE}.{table_name} RENAME COLUMN id_old TO id", + pg_conn, + ) + run_command( + f"ALTER FOREIGN TABLE {TABLE_NAMESPACE}.{table_name} " + f"OPTIONS (ADD autovacuum_enabled 'false')", + pg_conn, + ) + + pg_conn.commit() + + yield table_name + + pg_conn.rollback() + run_command(f"DROP SCHEMA {TABLE_NAMESPACE} CASCADE", pg_conn) + pg_conn.commit() + run_command(f"DROP SERVER IF EXISTS {USER_SERVER_NAME}", superuser_conn) + superuser_conn.commit() + + +@pytest.fixture +def create_iceberg_rest_table_parametrized(request): + """Dispatches to either the built-in 'rest' or user-server fixture + based on the indirect parameter.""" + fixture_name = { + "rest": "create_iceberg_rest_table", + "user_server": "create_iceberg_user_server_rest_table", + }[request.param] + return request.getfixturevalue(fixture_name) From 6374ee0abd19e87f75dca23491a215c80de8ca51 Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Mon, 4 May 2026 12:30:12 +0200 Subject: [PATCH 19/23] Some cleanup from last review addressing since I left everything in Cursor's hands Signed-off-by: sfc-gh-npuka --- pg_lake_engine/src/utils/catalog_type.c | 3 +- .../pg_lake/rest_catalog/rest_catalog.h | 2 +- .../src/rest_catalog/rest_catalog.c | 50 ++++++++++++------- .../track_iceberg_metadata_changes.c | 20 ++++---- .../pytests/test_modify_iceberg_rest_table.py | 33 ++++++++---- 5 files changed, 65 insertions(+), 43 deletions(-) diff --git a/pg_lake_engine/src/utils/catalog_type.c b/pg_lake_engine/src/utils/catalog_type.c index 75bce280..bd23089b 100644 --- a/pg_lake_engine/src/utils/catalog_type.c +++ b/pg_lake_engine/src/utils/catalog_type.c @@ -110,8 +110,7 @@ HasReadOnlyOption(List *options) /* * IsCatalogOwnedByExtension returns true if the catalog name is one of * the reserved built-in names: 'rest', 'object_store', or 'postgres'. - * Comparison is case-insensitive so that "Postgres", "REST", etc. are - * also recognized as reserved. + * Comparison is case-insensitive. */ bool IsCatalogOwnedByExtension(const char *catalog) diff --git a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h index 4e254789..92ffc6f1 100644 --- a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h +++ b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h @@ -119,7 +119,7 @@ extern PGDLLEXPORT char *GetMetadataLocationForRestCatalogForIcebergTable(Oid re extern PGDLLEXPORT void ReportHTTPError(HttpResult httpResult, int level); extern PGDLLEXPORT List *PostHeadersWithAuth(RestCatalogOptions * opts); extern PGDLLEXPORT List *DeleteHeadersWithAuth(RestCatalogOptions * opts); -extern PGDLLEXPORT HttpResult SendRequestToRestCatalog(HttpMethod method, const char *url, const char *body, List *headers, RestCatalogOptions * opts); +extern PGDLLEXPORT HttpResult SendRequestToRestCatalog(RestCatalogOptions * opts, HttpMethod method, const char *url, const char *body, List *headers); extern PGDLLEXPORT RestCatalogRequest * GetAddSnapshotCatalogRequest(IcebergSnapshot * newSnapshot, Oid relationId); extern PGDLLEXPORT RestCatalogRequest * GetAddSchemaCatalogRequest(Oid relationId, DataFileSchema * dataFileSchema); extern PGDLLEXPORT RestCatalogRequest * GetSetCurrentSchemaCatalogRequest(Oid relationId, int32_t schemaId); diff --git a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c index c2f740ac..330691ba 100644 --- a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c +++ b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c @@ -68,7 +68,8 @@ int RestCatalogAuthType = REST_CATALOG_AUTH_TYPE_OAUTH2; bool RestCatalogEnableVendedCredentials = true; /* - * Per-catalog token cache. Keyed by catalog. + * Per-rest-catalog token cache. Keyed by catalog. + * Should always be accessed via GetRestCatalogAccessToken() */ #define TOKEN_CACHE_KEY_LEN NAMEDATALEN @@ -82,6 +83,8 @@ typedef struct RestCatalogTokenCacheEntry static HTAB *RestCatalogTokenCache = NULL; static MemoryContext RestTokenCacheCtx = NULL; +/* end of per-catalog token cache variables */ + static char *GetRestCatalogAccessToken(RestCatalogOptions * opts, bool forceRefreshToken); static void FetchRestCatalogAccessToken(RestCatalogOptions * opts, char **accessToken, int *expiresIn); static void CreateNamespaceOnRestCatalog(RestCatalogOptions * opts, const char *catalogName, const char *namespaceName); @@ -723,8 +726,8 @@ StartStageRestCatalogIcebergTableCreate(Oid relationId) headers = lappend(headers, vendedCreds); } - HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, postUrl, body->data, - headers, opts); + HttpResult httpResult = SendRequestToRestCatalog(opts, HTTP_POST, postUrl, body->data, + headers); if (httpResult.status != 200) { @@ -859,9 +862,8 @@ RegisterNamespaceToRestCatalog(RestCatalogOptions * opts, const char *catalogNam psprintf(REST_CATALOG_NAMESPACE_NAME, opts->host, URLEncodePath(catalogName), URLEncodePath(namespaceName)); - HttpResult httpResult = SendRequestToRestCatalog(HTTP_GET, getUrl, NULL, - GetHeadersWithAuth(opts), - opts); + HttpResult httpResult = SendRequestToRestCatalog(opts, HTTP_GET, getUrl, NULL, + GetHeadersWithAuth(opts)); switch (httpResult.status) { @@ -951,9 +953,8 @@ ErrorIfRestNamespaceDoesNotExist(RestCatalogOptions * opts, const char *catalogN psprintf(REST_CATALOG_NAMESPACE_NAME, opts->host, URLEncodePath(catalogName), URLEncodePath(namespaceName)); - HttpResult httpResult = SendRequestToRestCatalog(HTTP_GET, getUrl, NULL, - GetHeadersWithAuth(opts), - opts); + HttpResult httpResult = SendRequestToRestCatalog(opts, HTTP_GET, getUrl, NULL, + GetHeadersWithAuth(opts)); /* namespace not found */ if (httpResult.status == 404) @@ -1002,8 +1003,7 @@ GetMetadataLocationFromRestCatalog(RestCatalogOptions * opts, const char *restCa opts->host, URLEncodePath(restCatalogName), URLEncodePath(namespaceName), URLEncodePath(relationName)); List *headers = GetHeadersWithAuth(opts); - HttpResult hr = SendRequestToRestCatalog(HTTP_GET, getUrl, NULL, headers, - opts); + HttpResult hr = SendRequestToRestCatalog(opts, HTTP_GET, getUrl, NULL, headers); if (hr.status != 200) { @@ -1051,9 +1051,8 @@ CreateNamespaceOnRestCatalog(RestCatalogOptions * opts, const char *catalogName, psprintf(REST_CATALOG_NAMESPACE, opts->host, URLEncodePath(catalogName)); - HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, postUrl, body.data, - PostHeadersWithAuth(opts), - opts); + HttpResult httpResult = SendRequestToRestCatalog(opts, HTTP_POST, postUrl, body.data, + PostHeadersWithAuth(opts)); if (httpResult.status != 200) { @@ -1155,6 +1154,12 @@ BuildTokenCacheKey(char *key, const RestCatalogOptions * opts) * Any ALTER/DROP SERVER blows away the entire token cache so stale * credentials are never reused. The cache is rebuilt lazily on the * next token lookup. + * + * We ignore hashvalue and reset the whole cache rather than selectively + * invalidating a single server's entry (as postgres_fdw does). With a + * handful of servers and infrequent ALTER SERVER, the cost of a few + * extra OAuth round-trips is negligible compared to the complexity of + * keying the cache by OID for selective invalidation. */ static void InvalidateRestTokenCache(Datum arg, int cacheid, uint32 hashvalue) @@ -1169,6 +1174,11 @@ InvalidateRestTokenCache(Datum arg, int cacheid, uint32 hashvalue) /* * Initialize the per-catalog token cache hash table if needed. + * + * TokenCacheCallbackRegistered is separate from RestCatalogTokenCache because + * the callback must be registered exactly once per backend lifetime + * (CacheRegisterSyscacheCallback appends to a fixed-size array), while + * RestCatalogTokenCache is reset to NULL on every invalidation. */ static bool TokenCacheCallbackRegistered = false; @@ -1318,10 +1328,14 @@ FetchRestCatalogAccessToken(RestCatalogOptions * opts, char **accessToken, int * headers = lappend(headers, "Content-Type: application/x-www-form-urlencoded"); - /* POST — pass NULL opts to skip 419 token refresh (avoids recursion) */ - HttpResult httpResponse = SendRequestToRestCatalog(HTTP_POST, accessTokenUrl, - body.data, headers, - NULL); + /* + * Pass NULL opts so SendRequestToRestCatalog skips the 419 token-refresh + * retry branch. Otherwise a 419 here would call + * GetRestCatalogAccessToken -> FetchRestCatalogAccessToken -> + * SendRequestToRestCatalog in an infinite loop. + */ + HttpResult httpResponse = SendRequestToRestCatalog(NULL, HTTP_POST, accessTokenUrl, + body.data, headers); if (httpResponse.status != 200) ereport(ERROR, diff --git a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c index 365e99f3..6e91a348 100644 --- a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c +++ b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c @@ -292,10 +292,9 @@ PostAllRestCatalogRequests(void) if (createTableRequest != NULL) { HttpResult httpResult = - SendRequestToRestCatalog(HTTP_POST, requestPerTable->tableRestUrl, - createTableRequest->body, - PostHeadersWithAuth(PgLakeXactRestCatalog->catalogOpts), - PgLakeXactRestCatalog->catalogOpts); + SendRequestToRestCatalog(PgLakeXactRestCatalog->catalogOpts, HTTP_POST, + requestPerTable->tableRestUrl, createTableRequest->body, + PostHeadersWithAuth(PgLakeXactRestCatalog->catalogOpts)); if (httpResult.status != 200) { @@ -310,10 +309,9 @@ PostAllRestCatalogRequests(void) else if (dropTableRequest != NULL) { HttpResult httpResult = - SendRequestToRestCatalog(HTTP_DELETE, requestPerTable->tableRestUrl, - NULL, - DeleteHeadersWithAuth(PgLakeXactRestCatalog->catalogOpts), - PgLakeXactRestCatalog->catalogOpts); + SendRequestToRestCatalog(PgLakeXactRestCatalog->catalogOpts, HTTP_DELETE, + requestPerTable->tableRestUrl, NULL, + DeleteHeadersWithAuth(PgLakeXactRestCatalog->catalogOpts)); if (httpResult.status != 204) { @@ -433,9 +431,9 @@ PostAllRestCatalogRequests(void) char *url = psprintf(REST_CATALOG_TRANSACTION_COMMIT, PgLakeXactRestCatalog->catalogOpts->host, catalogName); - HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, url, batchRequestBody->data, - PostHeadersWithAuth(PgLakeXactRestCatalog->catalogOpts), - PgLakeXactRestCatalog->catalogOpts); + HttpResult httpResult = SendRequestToRestCatalog(PgLakeXactRestCatalog->catalogOpts, HTTP_POST, + url, batchRequestBody->data, + PostHeadersWithAuth(PgLakeXactRestCatalog->catalogOpts)); if (httpResult.status != 204) { diff --git a/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py b/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py index 068bcf07..5a51aa53 100644 --- a/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py +++ b/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py @@ -923,7 +923,7 @@ def test_token_cache_reuses_token_across_catalog_ops( Uses pg_lake_iceberg.http_client_trace_traffic to observe actual HTTP traffic: each token fetch shows up as a POST to .../oauth/tokens - in the connection notices. + in the connection notices. Does 3 back-to-back inserts. """ if installcheck: return @@ -935,18 +935,17 @@ def test_token_cache_reuses_token_across_catalog_ops( pg_conn.commit() run_command( - f"CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME} (id bigint, value text) " - f"USING iceberg WITH (catalog='rest')", + "SET pg_lake_iceberg.http_client_trace_traffic TO on", pg_conn, ) - pg_conn.commit() + pg_conn.notices.clear() run_command( - "SET pg_lake_iceberg.http_client_trace_traffic TO on", + f"CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME} (id bigint, value text) " + f"USING iceberg WITH (catalog='rest')", pg_conn, ) - - pg_conn.notices.clear() + pg_conn.commit() for i in range(3): run_command( @@ -958,8 +957,8 @@ def test_token_cache_reuses_token_across_catalog_ops( token_fetches = sum( 1 for n in pg_conn.notices if "oauth/tokens" in n and "POST" in n ) - assert token_fetches <= 1, ( - f"Expected at most 1 OAuth token fetch (cached), got {token_fetches}. " + assert token_fetches == 1, ( + f"Expected exactly 1 OAuth token fetch (cached), got {token_fetches}. " f"Notices:\n" + "\n".join(pg_conn.notices) ) @@ -987,6 +986,8 @@ def test_alter_server_credentials_invalidates_token_cache( next catalog operation re-fetches it. We verify this by enabling HTTP traffic tracing and checking that a POST to .../oauth/tokens appears after the ALTER SERVER (proving the cache was invalidated). + Test that cache is invalidated on bogus credentials (1 fetch, commit fails), + then cache is invalidated again on restored credentials (1 fetch, commit succeeds). """ if installcheck: return @@ -1078,8 +1079,8 @@ def test_alter_server_credentials_invalidates_token_cache( "Expected COMMIT to fail after ALTER SERVER set bogus client_id " "(cache should have been invalidated, forcing re-auth with bad creds)" ) - assert post_alter_fetches >= 1, ( - f"Expected token re-fetch after ALTER SERVER (cache invalidated), " + assert post_alter_fetches == 1, ( + f"Expected exactly 1 token re-fetch after ALTER SERVER (cache invalidated), " f"got {post_alter_fetches}. Notices ({len(post_alter_notices)}):\n" + "\n".join(post_alter_notices) ) @@ -1090,12 +1091,22 @@ def test_alter_server_credentials_invalidates_token_cache( ) superuser_conn.commit() + superuser_conn.notices.clear() + run_command( f"INSERT INTO {SCHEMA_NAME}.{TABLE_NAME} VALUES (4)", superuser_conn, ) superuser_conn.commit() + restore_fetches = sum( + 1 for n in superuser_conn.notices if "oauth/tokens" in n and "POST" in n + ) + assert restore_fetches == 1, ( + f"Expected exactly 1 token re-fetch after restoring credentials, " + f"got {restore_fetches}. Notices:\n" + "\n".join(superuser_conn.notices) + ) + results = run_query( f"SELECT count(*) FROM {SCHEMA_NAME}.{TABLE_NAME}", superuser_conn ) From 46daf1507d0f5658b5f60aeb5dcea5034e2cd43f Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Mon, 4 May 2026 15:35:11 +0200 Subject: [PATCH 20/23] Fix GetRestCatalogName dead code Signed-off-by: sfc-gh-npuka --- pg_lake_iceberg/src/rest_catalog/rest_catalog.c | 15 ++++----------- pg_lake_table/src/ddl/create_table.c | 14 +++++++++++++- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c index 330691ba..5b1347aa 100644 --- a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c +++ b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c @@ -1549,8 +1549,9 @@ GetRestCatalogNamespace(Oid relationId) * subsequent ALTER SERVER ? ADD/SET catalog_name cannot silently * re-route an existing table to a different REST namespace. * - * Read-only tables resolve from table option > server option, and - * must have catalog_name set on one of them. + * Read-only tables always have catalog_name baked into their table + * options at CREATE TABLE time (inherited from the server option or + * defaulted to the database name). */ char * GetRestCatalogName(Oid relationId) @@ -1569,15 +1570,7 @@ GetRestCatalogName(Oid relationId) if (catalogName != NULL) return catalogName; - RestCatalogOptions *opts = GetRestCatalogOptionsForRelation(relationId); - - if (opts->catalogName != NULL) - return opts->catalogName; - - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("catalog_name is required for read-only REST catalog tables"), - errhint("Set catalog_name on the table or the server."))); + elog(ERROR, "catalog_name missing on read-only REST catalog table %u", relationId); } diff --git a/pg_lake_table/src/ddl/create_table.c b/pg_lake_table/src/ddl/create_table.c index bb566c54..8fb4db9f 100644 --- a/pg_lake_table/src/ddl/create_table.c +++ b/pg_lake_table/src/ddl/create_table.c @@ -817,7 +817,19 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) if (catalogNameProvided == NULL && hasExternalCatalogReadOnlyOption) { - catalogName = get_database_name(MyDatabaseId); + if (hasRestCatalogOption) + { + char *catalogOptionValue = + GetStringOption(createStmt->options, "catalog", false); + RestCatalogOptions *opts = + ResolveRestCatalogOptions(catalogOptionValue); + + catalogName = opts->catalogName ? pstrdup(opts->catalogName) + : get_database_name(MyDatabaseId); + } + else + catalogName = get_database_name(MyDatabaseId); + createStmt->options = lappend(createStmt->options, makeDefElem("catalog_name", (Node *) makeString(catalogName), -1)); From ef097de55d816a22e4719df23f1f666a347c729e Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Sat, 23 May 2026 22:57:41 +0300 Subject: [PATCH 21/23] More cleanup from Onder's latest review Fail fast at statement time on cross-catalog DML. Rename ValidateXactRestCatalog to BindRelationToXactRestCatalog and call it from both FDW write paths (postgresBeginForeignModify and AddQueryResultToTable). The function now binds the transaction to the relation's REST catalog on the first DML and rejects any subsequent statement targeting a different catalog, so the second INSERT errors out before any Parquet is written. The pre-commit hook is kept as a belt-and-suspenders fallback for DDL paths that reach the tracker without going through the new guard. The regression test asserts the second INSERT (not COMMIT) raises. Move ValidateIcebergCatalogServerDDL registration from pg_lake_table to pg_lake_iceberg, where the handler is actually defined. Architecturally this puts ownership of catalog-server DDL validation in the extension that owns the catalog server abstraction. Fix latent dangling-pointer in GetValidCatalogOptionsHint. The static hint cache was allocated via initStringInfo in whatever short-lived context the validator happened to be running in (typically MessageContext), so the second invalid-option failure in the same backend returned freed memory to errhint. Allocate in TopMemoryContext so the cache survives transaction boundaries. The strengthened test_reject_unknown_server_option now issues two failing CREATE SERVER statements on the same connection and asserts the full option list appears in the hint on both attempts; the previous version would not have caught this bug. Also covers earlier review-driven hardening on the same branch: token cache invalidation on ALTER SERVER credentials, ALTER SERVER rest_endpoint blocking for all dependent REST iceberg tables (writable and read-only), and DROP OWNED BY / concurrent DROP SERVER dependency tests. Co-authored-by: Cursor Signed-off-by: sfc-gh-npuka --- pg_lake_iceberg/src/init.c | 3 + .../src/rest_catalog/rest_catalog.c | 39 ++- .../track_iceberg_metadata_changes.h | 2 +- pg_lake_table/src/fdw/pg_lake_table.c | 2 +- pg_lake_table/src/fdw/writable_table.c | 2 +- pg_lake_table/src/init.c | 2 - .../track_iceberg_metadata_changes.c | 77 ++++-- .../pytests/test_iceberg_catalog_server.py | 50 +++- .../pytests/test_modify_iceberg_rest_table.py | 258 +++++++++++++++++- 9 files changed, 373 insertions(+), 62 deletions(-) diff --git a/pg_lake_iceberg/src/init.c b/pg_lake_iceberg/src/init.c index 2d9f9644..ac4b4067 100644 --- a/pg_lake_iceberg/src/init.c +++ b/pg_lake_iceberg/src/init.c @@ -27,6 +27,7 @@ #include "pg_lake/avro/avro_reader.h" #include "pg_lake/avro/avro_writer.h" #include "pg_lake/copy/copy_format.h" +#include "pg_lake/ddl/utility_hook.h" #include "pg_lake/iceberg/api.h" #include "pg_lake/pgduck/numeric.h" #include "pg_lake/iceberg/catalog.h" @@ -332,6 +333,8 @@ _PG_init(void) NULL, NULL, NULL); AvroInit(); + + RegisterUtilityStatementHandler(ValidateIcebergCatalogServerDDL, NULL); } diff --git a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c index 5b1347aa..4d024c61 100644 --- a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c +++ b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c @@ -173,6 +173,12 @@ FindCatalogOptionDesc(const char *name) /* * Build the "Valid options are: ?" hint string. Cached after first call. + * + * The cache must outlive any individual transaction: this helper is only + * called on validator error paths, and ereport(ERROR) immediately aborts + * the current transaction, freeing whatever short-lived context the + * validator was running under. Allocate the buffer in TopMemoryContext + * so the static pointer remains valid for the lifetime of the backend. */ static const char * GetValidCatalogOptionsHint(void) @@ -181,8 +187,10 @@ GetValidCatalogOptionsHint(void) if (hint == NULL) { + MemoryContext oldcxt; StringInfoData buf; + oldcxt = MemoryContextSwitchTo(TopMemoryContext); initStringInfo(&buf); appendStringInfoString(&buf, "Valid options are: "); for (int i = 0; i < NUM_CATALOG_OPTIONS; i++) @@ -193,6 +201,7 @@ GetValidCatalogOptionsHint(void) } appendStringInfoChar(&buf, '.'); hint = buf.data; + MemoryContextSwitchTo(oldcxt); } return hint; @@ -334,13 +343,15 @@ iceberg_catalog_validator(PG_FUNCTION_ARGS) /* - * ServerHasDependentWritableTable returns true if the given server - * has at least one dependent writable iceberg table recorded in - * pg_depend. Used to block ALTER SERVER changes that would silently - * break existing tables. + * ServerHasDependentRestIcebergTable returns true if the given server + * has at least one dependent REST-backed iceberg table (read-only or + * writable) recorded in pg_depend. Used to block ALTER SERVER changes + * that would silently break existing tables, since both writable and + * read-only REST tables make REST API calls at runtime against the + * server's rest_endpoint. */ static bool -ServerHasDependentWritableTable(Oid serverOid) +ServerHasDependentRestIcebergTable(Oid serverOid) { Relation depRel; ScanKeyData key[2]; @@ -369,7 +380,9 @@ ServerHasDependentWritableTable(Oid serverOid) if (depForm->classid != RelationRelationId) continue; - if (GetIcebergCatalogType(depForm->objid) == REST_CATALOG_READ_WRITE) + IcebergCatalogType type = GetIcebergCatalogType(depForm->objid); + + if (type == REST_CATALOG_READ_WRITE || type == REST_CATALOG_READ_ONLY) { found = true; break; @@ -488,9 +501,9 @@ ValidateIcebergCatalogServerDDL(ProcessUtilityParams * processUtilityParams, return false; /* - * Changing rest_endpoint on a server with dependent writable tables - * would silently point them at a different REST catalog, breaking the - * metadata chain. + * Changing rest_endpoint on a server with dependent iceberg tables + * (writable or read-only) would silently point them at a different + * REST catalog, breaking the metadata chain. */ ListCell *lc; @@ -499,12 +512,12 @@ ValidateIcebergCatalogServerDDL(ProcessUtilityParams * processUtilityParams, DefElem *def = (DefElem *) lfirst(lc); if (pg_strcasecmp(def->defname, "rest_endpoint") == 0 && - ServerHasDependentWritableTable(server->serverid)) + ServerHasDependentRestIcebergTable(server->serverid)) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot change \"rest_endpoint\" on server \"%s\" " - "because it has dependent writable iceberg tables", + "because it has dependent iceberg tables", stmt->servername), errhint("Drop the dependent tables first, or create a " "new server with the desired endpoint."))); @@ -524,6 +537,8 @@ ValidateIcebergCatalogServerDDL(ProcessUtilityParams * processUtilityParams, static void ApplyGUCDefaults(RestCatalogOptions * opts) { + char *defaultLocationPrefix = GetIcebergDefaultLocationPrefix(); + opts->host = RestCatalogHost ? pstrdup(RestCatalogHost) : NULL; opts->oauthHostPath = RestCatalogOauthHostPath ? pstrdup(RestCatalogOauthHostPath) : NULL; opts->clientId = RestCatalogClientId ? pstrdup(RestCatalogClientId) : NULL; @@ -531,7 +546,7 @@ ApplyGUCDefaults(RestCatalogOptions * opts) opts->scope = RestCatalogScope ? pstrdup(RestCatalogScope) : NULL; opts->authType = RestCatalogAuthType; opts->enableVendedCredentials = RestCatalogEnableVendedCredentials; - opts->locationPrefix = GetIcebergDefaultLocationPrefix(); + opts->locationPrefix = defaultLocationPrefix ? pstrdup(defaultLocationPrefix) : NULL; } diff --git a/pg_lake_table/include/pg_lake/transaction/track_iceberg_metadata_changes.h b/pg_lake_table/include/pg_lake/transaction/track_iceberg_metadata_changes.h index 12dd26b0..e76afbd5 100644 --- a/pg_lake_table/include/pg_lake/transaction/track_iceberg_metadata_changes.h +++ b/pg_lake_table/include/pg_lake/transaction/track_iceberg_metadata_changes.h @@ -61,4 +61,4 @@ extern PGDLLEXPORT void ResetRestCatalogRequests(void); extern PGDLLEXPORT HTAB *GetTrackedIcebergMetadataOperations(void); extern PGDLLEXPORT bool HasAnyTrackedIcebergMetadataChanges(void); extern PGDLLEXPORT bool IsIcebergTableCreatedInCurrentTransaction(Oid relation); -extern PGDLLEXPORT void ValidateXactRestCatalog(Oid relationId); +extern PGDLLEXPORT void BindRelationToXactRestCatalog(Oid relationId); diff --git a/pg_lake_table/src/fdw/pg_lake_table.c b/pg_lake_table/src/fdw/pg_lake_table.c index cd94d34d..3f5cf9f4 100644 --- a/pg_lake_table/src/fdw/pg_lake_table.c +++ b/pg_lake_table/src/fdw/pg_lake_table.c @@ -2206,7 +2206,7 @@ postgresBeginForeignModify(ModifyTableState *mtstate, if (eflags & EXEC_FLAG_EXPLAIN_ONLY) return; - ValidateXactRestCatalog(RelationGetRelid(resultRelInfo->ri_RelationDesc)); + BindRelationToXactRestCatalog(RelationGetRelid(resultRelInfo->ri_RelationDesc)); /* Construct an execution state. */ fmstate = create_foreign_modify(resultRelInfo->ri_RelationDesc, diff --git a/pg_lake_table/src/fdw/writable_table.c b/pg_lake_table/src/fdw/writable_table.c index 335ce0ce..f98252e3 100644 --- a/pg_lake_table/src/fdw/writable_table.c +++ b/pg_lake_table/src/fdw/writable_table.c @@ -1095,7 +1095,7 @@ AddQueryResultToTable(Oid relationId, char *readQuery, TupleDesc queryTupleDesc, { Assert(queryTupleDesc != NULL && queryTupleDesc->natts > 0); - ValidateXactRestCatalog(relationId); + BindRelationToXactRestCatalog(relationId); int64 rowsProcessed = 0; ForeignTable *foreignTable = GetForeignTable(relationId); diff --git a/pg_lake_table/src/init.c b/pg_lake_table/src/init.c index afea03f5..9e350907 100644 --- a/pg_lake_table/src/init.c +++ b/pg_lake_table/src/init.c @@ -42,7 +42,6 @@ #include "pg_lake/planner/insert_select.h" #include "pg_lake/planner/query_pushdown.h" #include "pg_lake/util/s3_file_utils.h" -#include "pg_lake/rest_catalog/rest_catalog.h" #include "pg_lake/test/hide_lake_objects.h" #include "pg_lake/transaction/transaction_hooks.h" #include "pg_lake/transaction/track_iceberg_metadata_changes.h" @@ -383,7 +382,6 @@ _PG_init(void) MarkGUCPrefixReserved(PG_LAKE_TABLE); - RegisterUtilityStatementHandler(ValidateIcebergCatalogServerDDL, NULL); RegisterUtilityStatementHandler(ProcessVacuumPgLakeTable, NULL); RegisterUtilityStatementHandler(ProcessCreatePgLakeTable, NULL); RegisterUtilityStatementHandler(ProcessCreateAsSelectPgLakeTable, NULL); diff --git a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c index 6e91a348..f385fc49 100644 --- a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c +++ b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c @@ -609,40 +609,68 @@ InitRestCatalogRequestsHashIfNeeded(void) /* - * ValidateXactRestCatalog is a fail-fast guard that prevents cross-catalog - * DML within a single transaction. It resolves the relation's catalog - * identifier and, if a different catalog was already locked in for this - * transaction, errors out immediately — before any Parquet data is written - * to S3. + * BindRelationToXactRestCatalog binds the current transaction to the REST + * catalog associated with `relationId`, failing fast if a *different* REST + * catalog was already locked in for this transaction. * - * No-ops for relations that are not REST-backed writable iceberg tables, - * or when no catalog has been locked in yet (first DML in the xact). + * Semantics: + * - For relations that are not REST-backed writable iceberg tables: no-op. + * - For the first REST-backed write of the transaction: pre-resolves the + * full catalog options and stashes them in TopTransactionContext, so + * subsequent calls within the same transaction can be checked without + * touching pg_foreign_server again at XACT_EVENT_COMMIT. + * - For any subsequent REST-backed write whose catalog differs from the + * locked-in one: raises ERRCODE_FEATURE_NOT_SUPPORTED *before* any + * Parquet data is written to S3. + * + * Called at the top of every DML entry point that can mutate REST-backed + * iceberg tables: postgresBeginForeignModify() for row-by-row DML, and + * AddQueryResultToTable() for the INSERT...SELECT and COPY..FROM pushdown + * paths. DDL paths (CREATE TABLE / DROP TABLE) reach the same protection + * indirectly via RecordRestCatalogRequestInTx(), which is invoked + * synchronously at statement time from the utility hook. */ void -ValidateXactRestCatalog(Oid relationId) +BindRelationToXactRestCatalog(Oid relationId) { if (!IsPgLakeIcebergForeignTableById(relationId) || GetIcebergCatalogType(relationId) != REST_CATALOG_READ_WRITE) return; - if (PgLakeXactRestCatalog == NULL || - PgLakeXactRestCatalog->catalogOpts == NULL) - return; + /* + * Resolve the relation's catalog options up front. We need the full + * resolved struct (host, credentials, ...), not just the user-facing + * identifier, because XACT_EVENT_PRE_COMMIT reuses these fields when + * issuing the REST API requests and is not allowed to do syscache lookups + * by then. + */ + RestCatalogOptions *resolvedOpts = GetRestCatalogOptionsForRelation(relationId); - char *catalog = GetStringOption(GetForeignTable(relationId)->options, - "catalog", false); + InitRestCatalogRequestsHashIfNeeded(); - if (catalog == NULL) + if (PgLakeXactRestCatalog->catalogOpts == NULL) + { + PgLakeXactRestCatalog->catalogOpts = + CopyRestCatalogOptions(TopTransactionContext, resolvedOpts); return; + } - if (pg_strcasecmp(PgLakeXactRestCatalog->catalogOpts->catalog, catalog) != 0) + /* + * Both sides of the comparison are the canonical catalog name (lowercase + * "rest" for the built-in catalog, server name as stored in + * pg_foreign_server for user-defined ones). pg_strcasecmp matches the + * casing rules PostgreSQL applies to identifier resolution. + */ + if (pg_strcasecmp(PgLakeXactRestCatalog->catalogOpts->catalog, + resolvedOpts->catalog) != 0) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot modify tables from different REST catalogs " "in the same transaction"), errdetail("This transaction already targets catalog \"%s\", " "but the current statement targets \"%s\".", - PgLakeXactRestCatalog->catalogOpts->catalog, catalog))); + PgLakeXactRestCatalog->catalogOpts->catalog, + resolvedOpts->catalog))); } @@ -668,6 +696,14 @@ RecordRestCatalogRequestInTx(Oid relationId, RestCatalogOperationType operationT /* Resolve the options for this relation's REST catalog */ RestCatalogOptions *resolvedOpts = GetRestCatalogOptionsForRelation(relationId); + /* + * DDL paths (CREATE TABLE / DROP TABLE) call us directly from the + * utility hook and may be the very first thing to touch a REST + * catalog in this transaction, so this branch is still genuinely + * reached. DML paths reach us only from XACT_EVENT_PRE_COMMIT, by + * which time BindRelationToXactRestCatalog() has already populated + * catalogOpts. + */ if (PgLakeXactRestCatalog->catalogOpts == NULL) { PgLakeXactRestCatalog->catalogOpts = @@ -675,10 +711,11 @@ RecordRestCatalogRequestInTx(Oid relationId, RestCatalogOperationType operationT } /* - * Belt-and-suspenders check. All DML and DDL entry points already - * call ValidateXactRestCatalog() at statement time, so in practice we - * should never reach here with a mismatched catalog. Kept as a last - * line of defense for any future code path that forgets to do so. + * Belt-and-suspenders check. All DML and DDL entry points either + * bind through BindRelationToXactRestCatalog() at statement time or + * populate catalogOpts via the branch above, so in practice we never + * reach here with a mismatched catalog. Kept as a last line of + * defense for any future code path that forgets to do so. */ else if (pg_strcasecmp(PgLakeXactRestCatalog->catalogOpts->catalog, resolvedOpts->catalog) != 0) ereport(ERROR, diff --git a/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py index fe53f7dd..70815de1 100644 --- a/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py +++ b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py @@ -111,19 +111,43 @@ def test_create_server_horizon_auth(superuser_conn, extension): def test_reject_unknown_server_option(superuser_conn, extension): - """Unknown options should be rejected by the validator.""" - err = run_command( - """ - CREATE SERVER test_bad_opt TYPE 'rest' - FOREIGN DATA WRAPPER iceberg_catalog - OPTIONS (rest_endpoint 'http://localhost:8181', bogus_option 'x') - """, - superuser_conn, - raise_error=False, - ) - assert "invalid option" in str(err) - assert "bogus_option" in str(err) - superuser_conn.rollback() + """ + Unknown options should be rejected by the validator. + + Issued twice on the same connection because the validator caches the + hint string in a static; the second call must hit the cached path and + must still produce a well-formed hint (regression guard against the + hint being palloc'd in a per-statement memory context). + """ + EXPECTED_OPTIONS = [ + "rest_endpoint", + "rest_auth_type", + "oauth_endpoint", + "scope", + "enable_vended_credentials", + "location_prefix", + "catalog_name", + "client_id", + "client_secret", + ] + + for typo in ("bogus_option", "another_typo"): + err = run_command( + f""" + CREATE SERVER test_bad_opt_{typo} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181', {typo} 'x') + """, + superuser_conn, + raise_error=False, + ) + msg = str(err) + assert "invalid option" in msg + assert typo in msg + assert "Valid options are:" in msg + for opt in EXPECTED_OPTIONS: + assert opt in msg, f"hint missing {opt!r} on attempt {typo!r}: {msg}" + superuser_conn.rollback() def test_reject_invalid_auth_type(superuser_conn, extension): diff --git a/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py b/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py index 5a51aa53..cd1dd88c 100644 --- a/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py +++ b/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py @@ -5,6 +5,8 @@ from helpers.spark import * import json import re +import threading +import time from test_writable_iceberg_common import * @@ -746,7 +748,10 @@ def test_reject_modify_different_rest_catalogs_in_single_transaction( ): """ Modifying tables from two different REST catalog servers in the same - transaction must be rejected. + transaction must be rejected at statement time -- before any Parquet is + written to S3 for the offending statement. The first DML binds the + transaction to its REST catalog; the second DML to a different catalog + must raise immediately rather than waiting for XACT_EVENT_PRE_COMMIT. """ if installcheck: return @@ -783,19 +788,25 @@ def test_reject_modify_different_rest_catalogs_in_single_transaction( ) pg_conn.commit() + # First INSERT succeeds and binds the transaction to rest_catalog_a. run_command( f"INSERT INTO {TABLE_NAMESPACE}.table_a SELECT i FROM generate_series(1, 10) i", pg_conn, ) - run_command( - f"INSERT INTO {TABLE_NAMESPACE}.table_b SELECT i FROM generate_series(1, 10) i", - pg_conn, - ) + # Second INSERT must raise at statement time, not at COMMIT. The + # "the current statement targets" detail wording is emitted only by + # BindRelationToXactRestCatalog(); the XACT_EVENT_PRE_COMMIT fallback + # uses "table %u belongs to" instead, so this match also pins down + # which code path fired. with pytest.raises( - psycopg2.errors.FeatureNotSupported, match="different REST catalogs" + psycopg2.errors.FeatureNotSupported, + match=r"the current statement targets", ): - pg_conn.commit() + run_command( + f"INSERT INTO {TABLE_NAMESPACE}.table_b SELECT i FROM generate_series(1, 10) i", + pg_conn, + ) pg_conn.rollback() @@ -1125,7 +1136,7 @@ def test_alter_server_credentials_invalidates_token_cache( superuser_conn.commit() -def test_drop_server_blocked_by_dependent_table( +def test_drop_server_with_dependent_iceberg_table( installcheck, superuser_conn, pg_conn, @@ -1197,6 +1208,228 @@ def test_drop_server_blocked_by_dependent_table( assert result[0][0] == 0 +def test_drop_owned_by_with_dependent_iceberg_table( + installcheck, + superuser_conn, + pg_conn, + s3, + extension, + with_default_location, + polaris_session, + create_http_helper_functions, +): + """ + When a role owns an iceberg_catalog server that has dependent iceberg + tables, DROP OWNED BY must propagate the dependency: + - DROP OWNED BY RESTRICT -> blocked + - DROP OWNED BY CASCADE -> drops the dependent table too + """ + if installcheck: + return + + ROLE_NAME = "rest_owned_test_role" + SERVER_NAME = "rest_owned_test_srv" + SCHEMA_NAME = TABLE_NAMESPACE + TABLE_NAME = "owned_dep_tbl" + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + + run_command(f"DROP ROLE IF EXISTS {ROLE_NAME}", superuser_conn, raise_error=False) + superuser_conn.commit() + + run_command( + f"CREATE ROLE {ROLE_NAME} WITH CREATEDB LOGIN", + superuser_conn, + ) + run_command( + f"GRANT USAGE ON FOREIGN DATA WRAPPER iceberg_catalog TO {ROLE_NAME}", + superuser_conn, + ) + superuser_conn.commit() + + run_command(f"SET ROLE {ROLE_NAME}", superuser_conn) + run_command( + f""" + CREATE SERVER {SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}') + """, + superuser_conn, + ) + run_command( + f"GRANT USAGE ON FOREIGN SERVER {SERVER_NAME} TO PUBLIC", + superuser_conn, + ) + run_command("RESET ROLE", superuser_conn) + superuser_conn.commit() + + run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", pg_conn) + pg_conn.commit() + + run_command( + f"CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME} (id bigint) " + f"USING iceberg WITH (catalog='{SERVER_NAME}')", + pg_conn, + ) + pg_conn.commit() + + err = run_command( + f"DROP OWNED BY {ROLE_NAME} RESTRICT", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "cannot drop" in str(err).lower() + superuser_conn.rollback() + + server_exists = run_query( + f"SELECT count(*) FROM pg_foreign_server WHERE srvname = '{SERVER_NAME}'", + superuser_conn, + ) + assert server_exists[0][0] == 1 + + table_exists = run_query( + f"SELECT count(*) FROM pg_class WHERE relname = '{TABLE_NAME}'", + pg_conn, + ) + assert table_exists[0][0] == 1 + + run_command(f"DROP OWNED BY {ROLE_NAME} CASCADE", superuser_conn) + superuser_conn.commit() + + server_exists = run_query( + f"SELECT count(*) FROM pg_foreign_server WHERE srvname = '{SERVER_NAME}'", + superuser_conn, + ) + assert server_exists[0][0] == 0 + + table_exists = run_query( + f"SELECT count(*) FROM pg_class WHERE relname = '{TABLE_NAME}'", + pg_conn, + ) + assert table_exists[0][0] == 0 + + run_command(f"DROP ROLE {ROLE_NAME}", superuser_conn) + superuser_conn.commit() + + +def test_drop_server_blocks_on_active_query( + installcheck, + superuser_conn, + s3, + extension, + with_default_location, + polaris_session, + create_http_helper_functions, +): + """ + DROP SERVER ... CASCADE on a server with a dependent iceberg table must + block while another transaction holds AccessShareLock on that table. + This verifies that the pg_depend edge registered by + RecordIcebergCatalogServerDependency is honored by core's dependency + walker, which acquires AccessExclusiveLock on each dependent table. + """ + if installcheck: + return + + SERVER_NAME = "rest_lock_test_srv" + SCHEMA_NAME = TABLE_NAMESPACE + TABLE_NAME = "lock_dep_tbl" + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + + run_command( + f""" + CREATE SERVER {SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}') + """, + superuser_conn, + ) + superuser_conn.commit() + + run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", superuser_conn) + run_command( + f"CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME} (id bigint) " + f"USING iceberg WITH (catalog='{SERVER_NAME}')", + superuser_conn, + ) + superuser_conn.commit() + + holder_conn = open_pg_conn("postgres") + dropper_conn = open_pg_conn("postgres") + try: + run_command("BEGIN", holder_conn) + run_command( + f"SELECT count(*) FROM {SCHEMA_NAME}.{TABLE_NAME}", + holder_conn, + ) + + drop_done = threading.Event() + drop_error = [] + + def run_drop(): + try: + run_command(f"DROP SERVER {SERVER_NAME} CASCADE", dropper_conn) + dropper_conn.commit() + except Exception as e: + drop_error.append(e) + finally: + drop_done.set() + + drop_thread = threading.Thread(target=run_drop) + drop_thread.start() + + deadline = time.time() + 10 + blocked = False + while time.time() < deadline: + rows = run_query( + """ + SELECT count(*) FROM pg_stat_activity + WHERE wait_event_type = 'Lock' + AND query LIKE 'DROP SERVER%' + """, + superuser_conn, + ) + superuser_conn.rollback() + if int(rows[0][0]) == 1: + blocked = True + break + time.sleep(0.1) + + assert blocked, "Expected DROP SERVER to be blocked on AccessShareLock" + assert not drop_done.is_set(), "DROP SERVER should not have completed yet" + + run_command("COMMIT", holder_conn) + + drop_thread.join(timeout=15) + assert drop_done.is_set(), "DROP SERVER did not complete after lock released" + assert not drop_error, f"DROP SERVER failed: {drop_error[0]}" + + result = run_query( + f"SELECT count(*) FROM pg_class WHERE relname = '{TABLE_NAME}'", + superuser_conn, + ) + assert result[0][0] == 0 + finally: + try: + holder_conn.rollback() + except Exception: + pass + holder_conn.close() + dropper_conn.close() + + def test_reject_writable_table_on_server_with_catalog_name( installcheck, superuser_conn, @@ -1342,7 +1575,7 @@ def test_alter_server_add_catalog_name_does_not_reroute_writable_table( superuser_conn.commit() -def test_alter_server_rest_endpoint_blocked_with_dependent_writable_tables( +def test_alter_server_rest_endpoint_blocked_with_dependent_tables( installcheck, superuser_conn, s3, @@ -1352,8 +1585,9 @@ def test_alter_server_rest_endpoint_blocked_with_dependent_writable_tables( create_http_helper_functions, ): """ - Changing rest_endpoint on a server that has dependent writable iceberg - tables must be blocked. + Changing rest_endpoint on a server that has dependent iceberg tables + (writable or read-only) must be blocked, because both types make REST + API calls against the server's endpoint at runtime. """ if installcheck: return @@ -1396,7 +1630,7 @@ def test_alter_server_rest_endpoint_blocked_with_dependent_writable_tables( raise_error=False, ) assert err is not None - assert "dependent writable iceberg tables" in str(err) + assert "dependent iceberg tables" in str(err) superuser_conn.rollback() run_command( From 0e65cd7b8265cebab4be455470ee4b8dfa4b466e Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Mon, 25 May 2026 14:37:15 +0300 Subject: [PATCH 22/23] Back built-in iceberg catalogs with foreign server objects The three reserved short catalog names ('postgres', 'object_store', 'rest') previously had no backing pg_foreign_server row, which split the resolution path in two (BuildRestCatalogOptionsFromGUCs for the built-in REST, BuildRestCatalogOptionsFromServer for user-created ones) and prevented us from recording pg_depend edges for tables that use the short names. Pre-create three iceberg_catalog servers at extension upgrade time: pg_lake_postgres_catalog, pg_lake_object_store_catalog, and pg_lake_rest_catalog. The prefixed long names cannot collide with names users may already have, notably the very common 'CREATE SERVER postgres FOREIGN DATA WRAPPER postgres_fdw'. User-facing DDL stays identical: users keep writing WITH (catalog='rest' / 'postgres' / 'object_store'). A new ResolveCatalogServerName helper maps short -> long at server lookup time, and opts->catalog continues to carry the short user-facing name so error messages, the cross-catalog DML guard, and the token cache key never expose the long names. The built-in servers are pure structural anchors: ALTER OPTIONS, ALTER OWNER, RENAME, and DROP are all blocked. Configuration for the built-in REST catalog continues to live in GUCs. The resolution path collapses to a single BuildRestCatalogOptionsFromServer that applies GUC defaults first and then any server options (always empty for the built-ins), so the built-in REST and user-created REST go through one code path. The long names are rejected as user-facing catalog= values by IsRestCatalog, the default_catalog GUC check hook, and create_table.c with a clear hint pointing to the short form. Tables created with the short reserved names now record a pg_depend edge against the corresponding built-in server, so DROP EXTENSION CASCADE cleans them up via the standard dependency walker. ValidateIcebergCatalogServerDDL now also rejects: - CREATE SERVER with one of the internal long names - ALTER SERVER ... RENAME TO one of the internal long names - ALTER SERVER OPTIONS on any built-in server (immutable anchors) - ALTER SERVER ... OWNER TO on any built-in server Two small static helpers (RejectIfBuiltinCatalogServerName and RejectIfModifyingBuiltinCatalogServer) collapse the repeating "is built-in" + ereport patterns into single calls. Upgrade safety: the --3.3--3.4 script does a pre-flight check that errors with a clear hint if any of the three long names already exists in the target database, so ALTER EXTENSION UPDATE fails loudly rather than partway through. Tests: 23 new cases in test_iceberg_catalog_server.py covering the three servers exist and are extension-owned, CREATE / RENAME-TO / ALTER OPTIONS / ALTER OWNER / DROP on the long names all blocked, catalog= with a long name rejected at CREATE TABLE, the pg_depend edge for the built-in postgres catalog, and -- the original collision concern -- a pre-existing postgres_fdw server literally named 'postgres' coexists with pg_lake_postgres_catalog. Co-authored-by: Cursor Signed-off-by: sfc-gh-npuka --- .../include/pg_lake/util/catalog_type.h | 20 ++ pg_lake_engine/src/utils/catalog_type.c | 71 +++++ pg_lake_iceberg/pg_lake_iceberg--3.3--3.4.sql | 55 ++++ .../src/rest_catalog/rest_catalog.c | 158 ++++++++--- pg_lake_table/src/ddl/create_table.c | 35 ++- .../pytests/test_iceberg_catalog_server.py | 246 ++++++++++++++++++ 6 files changed, 540 insertions(+), 45 deletions(-) diff --git a/pg_lake_engine/include/pg_lake/util/catalog_type.h b/pg_lake_engine/include/pg_lake/util/catalog_type.h index ab29380f..50e7a37c 100644 --- a/pg_lake_engine/include/pg_lake/util/catalog_type.h +++ b/pg_lake_engine/include/pg_lake/util/catalog_type.h @@ -22,11 +22,29 @@ /* * The allowed values for IcebergDefaultCatalog, case insensitive. +* +* These are the user-facing short names used as the catalog= option value +* on CREATE TABLE ... USING iceberg. Internally they map to the +* prefixed built-in server names below; users never type the prefixed +* names directly. */ #define POSTGRES_CATALOG_NAME "postgres" #define OBJECT_STORE_CATALOG_NAME "object_store" #define REST_CATALOG_NAME "rest" +/* + * Built-in iceberg_catalog server names. Pre-created by the extension + * upgrade script and exist purely as anchors for pg_depend edges and the + * uniform server-lookup path. All ALTER/DROP/RENAME on these names is + * blocked (configuration for the built-in catalogs lives in GUCs). + * + * The prefix keeps them clear of names users are likely to have already + * used (notably the very common "CREATE SERVER postgres FDW postgres_fdw"). + */ +#define PG_LAKE_POSTGRES_CATALOG_SERVER_NAME "pg_lake_postgres_catalog" +#define PG_LAKE_OBJECT_STORE_CATALOG_SERVER_NAME "pg_lake_object_store_catalog" +#define PG_LAKE_REST_CATALOG_SERVER_NAME "pg_lake_rest_catalog" + typedef enum IcebergCatalogType { NONE_CATALOG = 0, @@ -66,3 +84,5 @@ extern PGDLLEXPORT bool HasObjectStoreCatalogTableOption(List *options); extern PGDLLEXPORT bool HasReadOnlyOption(List *options); extern PGDLLEXPORT bool IsCatalogOwnedByExtension(const char *catalog); extern PGDLLEXPORT bool IsRestCatalog(const char *catalog); +extern PGDLLEXPORT const char *ResolveCatalogServerName(const char *catalog); +extern PGDLLEXPORT bool IsBuiltinCatalogServerName(const char *serverName); diff --git a/pg_lake_engine/src/utils/catalog_type.c b/pg_lake_engine/src/utils/catalog_type.c index bd23089b..905aa238 100644 --- a/pg_lake_engine/src/utils/catalog_type.c +++ b/pg_lake_engine/src/utils/catalog_type.c @@ -125,6 +125,12 @@ IsCatalogOwnedByExtension(const char *catalog) * IsRestCatalog returns true if the catalog name identifies a REST catalog. * This includes the built-in 'rest' literal and any user-created * iceberg_catalog server whose TYPE is 'rest'. + * + * The internal built-in server names (e.g. "pg_lake_rest_catalog") are + * deliberately rejected: they are implementation details and must not be + * usable as catalog= option values on CREATE TABLE. Users always type + * the short name "rest", which is mapped to the long server name only + * inside the resolution layer. */ bool IsRestCatalog(const char *catalog) @@ -135,6 +141,9 @@ IsRestCatalog(const char *catalog) if (pg_strcasecmp(catalog, REST_CATALOG_NAME) == 0) return true; + if (IsBuiltinCatalogServerName(catalog)) + return false; + /* Try to look up a server with this name */ bool missingOK = true; ForeignServer *server = GetForeignServerByName(catalog, missingOK); @@ -147,6 +156,68 @@ IsRestCatalog(const char *catalog) if (strcmp(fdw->fdwname, ICEBERG_CATALOG_FDW_NAME) != 0) return false; + /* + * Any iceberg_catalog server reaching this point is user-created, and + * ValidateIcebergCatalogServerDDL forces all user-created iceberg_catalog + * servers to TYPE 'rest'. + */ Assert(pg_strcasecmp(server->servertype, REST_CATALOG_NAME) == 0); return true; } + + +/* + * ResolveCatalogServerName maps a user-facing catalog identifier to the + * actual pg_foreign_server.srvname. + * + * For the three reserved short names ('postgres', 'object_store', 'rest') + * the result is the corresponding pre-created built-in server name. + * Any other input is returned unchanged (user-created server names match + * their catalog= option value verbatim). + * + * The returned pointer is either a string literal or the input pointer; + * callers must not free it. + */ +const char * +ResolveCatalogServerName(const char *catalog) +{ + if (catalog == NULL) + return NULL; + + if (pg_strcasecmp(catalog, REST_CATALOG_NAME) == 0) + return PG_LAKE_REST_CATALOG_SERVER_NAME; + if (pg_strcasecmp(catalog, POSTGRES_CATALOG_NAME) == 0) + return PG_LAKE_POSTGRES_CATALOG_SERVER_NAME; + if (pg_strcasecmp(catalog, OBJECT_STORE_CATALOG_NAME) == 0) + return PG_LAKE_OBJECT_STORE_CATALOG_SERVER_NAME; + + return catalog; +} + + +/* + * IsBuiltinCatalogServerName returns true if the given name matches one + * of the three pre-created built-in iceberg_catalog servers. + * + * Comparison is case-insensitive: both PostgreSQL-parsed identifiers + * (already downcased by the parser unless quoted) and free-form string + * literals supplied as catalog= option values flow through this helper, + * and we want to reject typos like 'PG_LAKE_REST_CATALOG' just as + * firmly as the canonical form. + * + * This is the long-name counterpart to IsCatalogOwnedByExtension, which + * operates on the user-facing short names. Used by the DDL protection + * hook to lock down ALTER/RENAME/OWNER on the extension's structural + * anchors and by create_table.c to reject the long names as catalog= + * option values. + */ +bool +IsBuiltinCatalogServerName(const char *serverName) +{ + if (serverName == NULL) + return false; + + return pg_strcasecmp(serverName, PG_LAKE_REST_CATALOG_SERVER_NAME) == 0 || + pg_strcasecmp(serverName, PG_LAKE_POSTGRES_CATALOG_SERVER_NAME) == 0 || + pg_strcasecmp(serverName, PG_LAKE_OBJECT_STORE_CATALOG_SERVER_NAME) == 0; +} diff --git a/pg_lake_iceberg/pg_lake_iceberg--3.3--3.4.sql b/pg_lake_iceberg/pg_lake_iceberg--3.3--3.4.sql index 8e563ce0..6fb0909c 100644 --- a/pg_lake_iceberg/pg_lake_iceberg--3.3--3.4.sql +++ b/pg_lake_iceberg/pg_lake_iceberg--3.3--3.4.sql @@ -25,3 +25,58 @@ CREATE FOREIGN DATA WRAPPER iceberg_catalog VALIDATOR lake_iceberg.iceberg_catalog_validator; GRANT USAGE ON FOREIGN DATA WRAPPER iceberg_catalog TO lake_write; + +/* + * Built-in catalog servers. + * + * These three servers are pre-created as structural anchors for the + * pg_depend dependency edges that iceberg tables record against their + * catalog server. They are extension-owned and immutable: ALTER, DROP, + * RENAME, and OWNER changes on them are all blocked. Configuration + * for the built-in catalogs lives in GUCs, not in server options. + * + * Users keep typing the short names ('postgres', 'object_store', 'rest') + * as the catalog= option value on CREATE TABLE; ResolveCatalogServerName + * maps short -> long at server lookup time. The long names are prefixed + * so they cannot collide with names users may already have in their + * databases (e.g. a postgres_fdw server literally named 'postgres'). + * + * Pre-flight: error early with a clear hint if any of the long names is + * already in use. This prevents a confusing "server already exists" + * mid-upgrade. + */ +DO $do$ +DECLARE + conflicting text; +BEGIN + SELECT srvname INTO conflicting + FROM pg_foreign_server + WHERE srvname IN ('pg_lake_postgres_catalog', + 'pg_lake_object_store_catalog', + 'pg_lake_rest_catalog') + LIMIT 1; + + IF conflicting IS NOT NULL THEN + RAISE EXCEPTION + 'pg_lake_iceberg upgrade conflicts with existing foreign server %', conflicting + USING HINT = 'Drop or rename the server and re-run ALTER EXTENSION pg_lake_iceberg UPDATE. ' + 'pg_lake_iceberg reserves the names pg_lake_postgres_catalog, ' + 'pg_lake_object_store_catalog, and pg_lake_rest_catalog for internal use.'; + END IF; +END $do$; + +CREATE SERVER pg_lake_postgres_catalog + TYPE 'postgres' + FOREIGN DATA WRAPPER iceberg_catalog; + +CREATE SERVER pg_lake_object_store_catalog + TYPE 'object_store' + FOREIGN DATA WRAPPER iceberg_catalog; + +CREATE SERVER pg_lake_rest_catalog + TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog; + +GRANT USAGE ON FOREIGN SERVER pg_lake_postgres_catalog TO lake_write; +GRANT USAGE ON FOREIGN SERVER pg_lake_object_store_catalog TO lake_write; +GRANT USAGE ON FOREIGN SERVER pg_lake_rest_catalog TO lake_write; diff --git a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c index 4d024c61..606539dc 100644 --- a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c +++ b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c @@ -397,18 +397,72 @@ ServerHasDependentRestIcebergTable(Oid serverOid) /* - * ValidateIcebergCatalogServerDDL validates DDL on iceberg_catalog servers: + * RejectIfBuiltinCatalogServerName errors out if `name` is one of the + * three internal pg_lake_iceberg catalog server names. Shared by the + * CREATE SERVER and ALTER SERVER ... RENAME TO branches, where the + * concern is "users must not be able to create or end up with a server + * carrying one of these reserved names". + */ +static void +RejectIfBuiltinCatalogServerName(const char *name) +{ + if (!IsBuiltinCatalogServerName(name)) + return; + + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("server name \"%s\" is reserved for an internal " + "pg_lake_iceberg catalog server", name), + errhint("Choose a different server name."))); +} + + +/* + * RejectIfModifyingBuiltinCatalogServer errors out if `name` refers to + * one of the three built-in pg_lake_iceberg catalog servers. The + * `operation` verb fills in the user-facing error ("cannot %s the + * built-in...") and should read naturally in that template (e.g. + * "alter", "change owner of"). Shared by the ALTER SERVER OPTIONS + * and ALTER SERVER ... OWNER TO branches. + */ +static void +RejectIfModifyingBuiltinCatalogServer(const char *name, const char *operation) +{ + if (!IsBuiltinCatalogServerName(name)) + return; + + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot %s the built-in pg_lake_iceberg " + "catalog server \"%s\"", operation, name), + errhint("Configure built-in catalogs via " + "pg_lake_iceberg GUCs, or create a " + "user-defined iceberg_catalog server."))); +} + + +/* + * ValidateIcebergCatalogServerDDL validates DDL on iceberg_catalog servers. + * + * Two layers of protection: + * + * 1. Short reserved names ('postgres', 'object_store', 'rest') -- these + * are the user-facing catalog= values. We block CREATE SERVER and + * RENAME TO these names so users can't shadow the built-in catalogs. * - * - CREATE SERVER: rejects reserved names ('postgres', 'object_store', - * 'rest'), rejects TYPE 'postgres'/'object_store', and requires - * TYPE 'rest'. - * - ALTER SERVER RENAME TO: rejects renaming to a reserved name. - * - ALTER SERVER OPTIONS: blocks SET/ADD rest_endpoint when dependent - * writable tables exist (the table was registered at the original - * endpoint and moving it would break the metadata chain). + * 2. Built-in long server names ('pg_lake_postgres_catalog', etc.) -- + * these are the pre-created anchors. Outside of CREATE/ALTER + * EXTENSION we block CREATE/ALTER/RENAME/OWNER on them entirely so + * they remain pure structural anchors with all configuration in + * GUCs. DROP is blocked by core via extension membership. * - * ALTER/DROP/OWNER on reserved names will fail naturally because no - * server object exists. + * Additionally: + * - CREATE SERVER must specify TYPE 'rest'; TYPE 'postgres' and + * 'object_store' are reserved for the built-in servers. + * - Renaming any iceberg_catalog server is blocked (dependent tables + * record the server name as a string option in ftoptions). + * - For user-created REST servers, ALTER SERVER OPTIONS may not change + * rest_endpoint while dependent iceberg tables exist. */ bool ValidateIcebergCatalogServerDDL(ProcessUtilityParams * processUtilityParams, @@ -434,6 +488,8 @@ ValidateIcebergCatalogServerDDL(ProcessUtilityParams * processUtilityParams, stmt->servername), errhint("Choose a different server name."))); + RejectIfBuiltinCatalogServerName(stmt->servername); + if (stmt->servertype != NULL && (pg_strcasecmp(stmt->servertype, POSTGRES_CATALOG_NAME) == 0 || pg_strcasecmp(stmt->servertype, OBJECT_STORE_CATALOG_NAME) == 0)) @@ -465,11 +521,14 @@ ValidateIcebergCatalogServerDDL(ProcessUtilityParams * processUtilityParams, stmt->newname), errhint("Choose a different server name."))); + RejectIfBuiltinCatalogServerName(stmt->newname); + /* * Renaming an iceberg_catalog server is blocked because dependent * iceberg tables store the server name as a string option * (catalog='') in pg_foreign_table.ftoptions. A rename would - * silently break those references. + * silently break those references. This covers both user-created + * servers and the three built-in ones. */ ForeignServer *server = GetForeignServerByName(strVal(stmt->object), true); @@ -486,6 +545,17 @@ ValidateIcebergCatalogServerDDL(ProcessUtilityParams * processUtilityParams, errhint("Drop and recreate the server with the new name."))); } } + else if (IsA(parsetree, AlterOwnerStmt)) + { + AlterOwnerStmt *stmt = (AlterOwnerStmt *) parsetree; + + if (stmt->objectType != OBJECT_FOREIGN_SERVER) + return false; + + const char *serverName = strVal(stmt->object); + + RejectIfModifyingBuiltinCatalogServer(serverName, "change owner of"); + } else if (IsA(parsetree, AlterForeignServerStmt)) { AlterForeignServerStmt *stmt = (AlterForeignServerStmt *) parsetree; @@ -501,9 +571,18 @@ ValidateIcebergCatalogServerDDL(ProcessUtilityParams * processUtilityParams, return false; /* - * Changing rest_endpoint on a server with dependent iceberg tables - * (writable or read-only) would silently point them at a different - * REST catalog, breaking the metadata chain. + * The three built-in servers are immutable structural anchors. + * Configuration for the built-in catalogs lives in GUCs; reject any + * option, version, or other ALTER change so we never end up with + * hidden server-side state that pg_dump cannot round-trip cleanly for + * an extension-owned object. + */ + RejectIfModifyingBuiltinCatalogServer(stmt->servername, "alter"); + + /* + * Changing rest_endpoint on a user-created server with dependent + * iceberg tables (writable or read-only) would silently point them at + * a different REST catalog, breaking the metadata chain. */ ListCell *lc; @@ -588,26 +667,25 @@ ValidateRestCatalogOptions(const RestCatalogOptions * opts, const char *catalog) /* - * Built-in 'rest' catalog: GUCs only, no server lookup. - */ -static RestCatalogOptions * -BuildRestCatalogOptionsFromGUCs(void) -{ - RestCatalogOptions *opts = palloc0(sizeof(RestCatalogOptions)); - - opts->catalog = pstrdup(REST_CATALOG_NAME); - ApplyGUCDefaults(opts); - ValidateRestCatalogOptions(opts, REST_CATALOG_NAME); - return opts; -} - - -/* - * User-created iceberg_catalog server: GUC defaults + server option - * overrides. + * Build RestCatalogOptions for an iceberg_catalog server. + * + * The built-in pg_lake_rest_catalog server and any user-created + * iceberg_catalog REST server go through the same path: GUC defaults + * first, then server-level options applied on top. ALTER SERVER OPTIONS + * is blocked on the built-in server, so in practice its option set is + * always empty and the GUC defaults survive untouched -- which is + * exactly the historical "GUCs-only built-in REST" behavior, now + * reached through a single code path. + * + * `userVisibleCatalog` is the short identifier the user typed + * (e.g. "rest" or a user server name); it is what we store in + * opts->catalog so that error messages, the cross-catalog DML check, + * and the token cache key all stay in user-facing terms. The long + * built-in server name never leaks past this function. */ static RestCatalogOptions * -BuildRestCatalogOptionsFromServer(const char *serverName) +BuildRestCatalogOptionsFromServer(const char *serverName, + const char *userVisibleCatalog) { ForeignServer *server = GetForeignServerByName(serverName, false); ForeignDataWrapper *fdw = GetForeignDataWrapper(server->fdwid); @@ -616,26 +694,26 @@ BuildRestCatalogOptionsFromServer(const char *serverName) RestCatalogOptions *opts = palloc0(sizeof(RestCatalogOptions)); - opts->catalog = pstrdup(serverName); + opts->catalog = pstrdup(userVisibleCatalog); ApplyGUCDefaults(opts); ApplyServerOptionOverrides(opts, server); - ValidateRestCatalogOptions(opts, serverName); + ValidateRestCatalogOptions(opts, userVisibleCatalog); return opts; } /* - * ResolveRestCatalogOptions picks the right source based on the catalog - * identifier: GUCs for the built-in 'rest' name, server object for - * user-created iceberg_catalog servers. + * ResolveRestCatalogOptions builds RestCatalogOptions for the catalog + * identifier the user typed. The short reserved names ('postgres', + * 'object_store', 'rest') are mapped to their pre-created built-in + * server names; all other inputs are looked up verbatim. */ RestCatalogOptions * ResolveRestCatalogOptions(const char *catalog) { - if (pg_strcasecmp(catalog, REST_CATALOG_NAME) == 0) - return BuildRestCatalogOptionsFromGUCs(); + const char *serverName = ResolveCatalogServerName(catalog); - return BuildRestCatalogOptionsFromServer(catalog); + return BuildRestCatalogOptionsFromServer(serverName, catalog); } diff --git a/pg_lake_table/src/ddl/create_table.c b/pg_lake_table/src/ddl/create_table.c index 8fb4db9f..df791310 100644 --- a/pg_lake_table/src/ddl/create_table.c +++ b/pg_lake_table/src/ddl/create_table.c @@ -391,19 +391,22 @@ ErrorIfCreateForeignTableOnIcebergCatalog(CreateForeignTableStmt *createStmt) * DROP SERVER is blocked while dependent tables exist (and * DROP SERVER CASCADE drops them). * - * Only user-created iceberg_catalog servers get a dependency entry; - * built-in catalog names ('rest', 'postgres', 'object_store') are not - * backed by a pg_foreign_server row managed by the user. + * Both user-created iceberg_catalog servers and the three pre-created + * built-in catalog servers get a dependency entry: the short reserved + * names ('rest', 'postgres', 'object_store') are mapped to the + * corresponding built-in server (e.g. 'pg_lake_rest_catalog') via + * ResolveCatalogServerName. */ static void RecordIcebergCatalogServerDependency(Oid relationId, List *options) { char *catalog = GetStringOption(options, "catalog", false); - if (catalog == NULL || IsCatalogOwnedByExtension(catalog)) + if (catalog == NULL) return; - ForeignServer *server = GetForeignServerByName(catalog, true); + const char *serverName = ResolveCatalogServerName(catalog); + ForeignServer *server = GetForeignServerByName(serverName, true); if (server == NULL) return; @@ -731,6 +734,28 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) createStmt->options = lappend(createStmt->options, defaultCatalog); } + else + { + /* + * The pre-created built-in catalog servers (pg_lake_rest_catalog, + * pg_lake_postgres_catalog, pg_lake_object_store_catalog) are + * internal anchors and must not be addressable via the user-facing + * catalog= option. Users say catalog='rest' / 'postgres' / + * 'object_store' and we map short -> long internally. + */ + char *catalogVal = strVal(catalogOption->arg); + + if (IsBuiltinCatalogServerName(catalogVal)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("catalog name \"%s\" is reserved for an internal " + "pg_lake_iceberg catalog server", + catalogVal), + errhint("Use catalog='%s', '%s', or '%s' instead.", + REST_CATALOG_NAME, + POSTGRES_CATALOG_NAME, + OBJECT_STORE_CATALOG_NAME))); + } bool hasRestCatalogOption = HasRestCatalogTableOption(createStmt->options); bool hasObjectStoreCatalogOption = HasObjectStoreCatalogTableOption(createStmt->options); diff --git a/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py index 70815de1..3131d65f 100644 --- a/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py +++ b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py @@ -845,3 +845,249 @@ def test_case_sensitive_server_names(superuser_conn, extension): run_command("DROP SERVER test_cs", superuser_conn) run_command('DROP SERVER "TEST_CS"', superuser_conn) superuser_conn.rollback() + + +# ── Built-in catalog servers ─────────────────────────────────────────────── + + +BUILTIN_CATALOG_SERVERS = [ + ("pg_lake_postgres_catalog", "postgres"), + ("pg_lake_object_store_catalog", "object_store"), + ("pg_lake_rest_catalog", "rest"), +] + + +def test_builtin_catalog_servers_exist(pg_conn, extension): + """The three built-in iceberg_catalog servers are pre-created by the + extension and survive as anchors for pg_depend edges.""" + result = run_query( + """ + SELECT srvname, srvtype + FROM pg_foreign_server s + JOIN pg_foreign_data_wrapper fdw ON s.srvfdw = fdw.oid + WHERE fdw.fdwname = 'iceberg_catalog' + AND srvname LIKE 'pg_lake_%_catalog' + ORDER BY srvname + """, + pg_conn, + ) + names_types = [(row["srvname"], row["srvtype"]) for row in result] + assert names_types == sorted(BUILTIN_CATALOG_SERVERS) + + +def test_builtin_servers_are_extension_owned(pg_conn, extension): + """Built-in servers are members of the pg_lake_iceberg extension so + they get included in pg_dump's CREATE EXTENSION shadow rather than + emitted as standalone CREATE SERVER statements.""" + result = run_query( + """ + SELECT srvname + FROM pg_foreign_server s + JOIN pg_depend d ON d.objid = s.oid + JOIN pg_extension e ON e.oid = d.refobjid + WHERE e.extname = 'pg_lake_iceberg' + AND d.deptype = 'e' + AND d.classid = 'pg_foreign_server'::regclass + ORDER BY srvname + """, + pg_conn, + ) + assert [r["srvname"] for r in result] == [ + name for name, _ in sorted(BUILTIN_CATALOG_SERVERS) + ] + + +@pytest.mark.parametrize("long_name,_short_name", BUILTIN_CATALOG_SERVERS) +def test_reject_create_server_with_builtin_long_name( + long_name, _short_name, superuser_conn, extension +): + """CREATE SERVER with the internal long name is blocked; the long + names are implementation details and must never be addressable as + a user-typed server name.""" + err = run_command( + f""" + CREATE SERVER {long_name} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "reserved for an internal" in str(err) + superuser_conn.rollback() + + +@pytest.mark.parametrize("long_name,_short_name", BUILTIN_CATALOG_SERVERS) +def test_reject_rename_to_builtin_long_name( + long_name, _short_name, superuser_conn, extension +): + """Renaming a user-created server TO an internal long name is blocked.""" + run_command( + """ + CREATE SERVER rn_long_src TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + err = run_command( + f"ALTER SERVER rn_long_src RENAME TO {long_name}", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "reserved for an internal" in str(err) + superuser_conn.rollback() + + +@pytest.mark.parametrize("long_name,_short_name", BUILTIN_CATALOG_SERVERS) +def test_alter_options_on_builtin_server_blocked( + long_name, _short_name, superuser_conn, extension +): + """ALTER SERVER OPTIONS on a built-in server is unconditionally blocked + — built-ins are immutable structural anchors and all configuration + lives in GUCs.""" + err = run_command( + f"ALTER SERVER {long_name} OPTIONS (ADD rest_endpoint 'http://x:8181')", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "cannot alter the built-in pg_lake_iceberg catalog server" in str(err) + superuser_conn.rollback() + + +@pytest.mark.parametrize("long_name,_short_name", BUILTIN_CATALOG_SERVERS) +def test_alter_owner_on_builtin_server_blocked( + long_name, _short_name, superuser_conn, extension +): + """ALTER SERVER ... OWNER TO on a built-in server is blocked.""" + err = run_command( + f"ALTER SERVER {long_name} OWNER TO lake_write", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "cannot change owner of the built-in" in str(err) + superuser_conn.rollback() + + +@pytest.mark.parametrize("long_name,_short_name", BUILTIN_CATALOG_SERVERS) +def test_drop_builtin_server_blocked(long_name, _short_name, superuser_conn, extension): + """DROP SERVER on a built-in server is blocked by PostgreSQL itself, + because the server is extension-owned (pg_depend deptype = 'e').""" + err = run_command(f"DROP SERVER {long_name}", superuser_conn, raise_error=False) + assert err is not None + assert "extension pg_lake_iceberg" in str(err) or "depends on" in str(err) + superuser_conn.rollback() + + +def test_reject_rename_builtin_server(superuser_conn, extension): + """Renaming the built-in servers is blocked by the iceberg_catalog + rename guard (which already blocks renaming any iceberg_catalog + server, built-in or user-created).""" + err = run_command( + "ALTER SERVER pg_lake_rest_catalog RENAME TO renamed_builtin", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "cannot rename iceberg_catalog server" in str(err) + superuser_conn.rollback() + + +@pytest.mark.parametrize("long_name,_short_name", BUILTIN_CATALOG_SERVERS) +def test_reject_catalog_option_with_builtin_long_name( + long_name, _short_name, pg_conn, extension +): + """The catalog= option on CREATE TABLE must use the short names + ('rest', 'postgres', 'object_store'); the internal long names are + rejected with a clear error pointing to the short forms.""" + err = run_command( + f"CREATE TABLE long_name_tbl (id int) USING iceberg WITH (catalog='{long_name}')", + pg_conn, + raise_error=False, + ) + assert err is not None + assert "reserved for an internal" in str(err) + pg_conn.rollback() + + +def test_postgres_fdw_named_postgres_does_not_conflict(superuser_conn, extension): + """A pre-existing CREATE SERVER named 'postgres' (e.g. from postgres_fdw) + does not collide with pg_lake's built-in postgres catalog. The user-facing + short name 'postgres' maps internally to 'pg_lake_postgres_catalog', so the + two coexist without interaction.""" + err = run_command( + "CREATE EXTENSION IF NOT EXISTS postgres_fdw", superuser_conn, raise_error=False + ) + if err is not None: + pytest.skip(f"postgres_fdw not available: {err}") + + run_command( + """ + CREATE SERVER postgres FOREIGN DATA WRAPPER postgres_fdw + OPTIONS (host 'localhost', port '5432', dbname 'postgres') + """, + superuser_conn, + ) + superuser_conn.commit() + + try: + coexist = run_query( + """ + SELECT srvname, fdw.fdwname + FROM pg_foreign_server s + JOIN pg_foreign_data_wrapper fdw ON s.srvfdw = fdw.oid + WHERE srvname IN ('postgres', 'pg_lake_postgres_catalog') + ORDER BY srvname + """, + superuser_conn, + ) + rows = [(r["srvname"], r["fdwname"]) for r in coexist] + assert ("postgres", "postgres_fdw") in rows + assert ("pg_lake_postgres_catalog", "iceberg_catalog") in rows + finally: + run_command("DROP SERVER postgres", superuser_conn) + superuser_conn.commit() + + +def test_pg_depend_edge_for_builtin_postgres_catalog_table( + pg_conn, s3, extension, with_default_location +): + """A table created with catalog='postgres' (the short reserved name) + records a pg_depend edge against the built-in pg_lake_postgres_catalog + server. This is what makes DROP EXTENSION CASCADE clean up such + tables, and is the structural reason for pre-creating the built-in + servers in the first place.""" + run_command( + """ + CREATE TABLE pg_depend_tbl (id int) + USING iceberg + WITH (catalog = 'postgres') + """, + pg_conn, + ) + pg_conn.commit() + + try: + result = run_query( + """ + SELECT s.srvname + FROM pg_class c + JOIN pg_depend d ON d.objid = c.oid + AND d.classid = 'pg_class'::regclass + AND d.refclassid = 'pg_foreign_server'::regclass + JOIN pg_foreign_server s ON s.oid = d.refobjid + JOIN pg_foreign_data_wrapper fdw ON s.srvfdw = fdw.oid + WHERE c.relname = 'pg_depend_tbl' + AND fdw.fdwname = 'iceberg_catalog' + """, + pg_conn, + ) + assert len(result) == 1 + assert result[0]["srvname"] == "pg_lake_postgres_catalog" + finally: + run_command("DROP TABLE pg_depend_tbl", pg_conn) + pg_conn.commit() From 87dbbc15e06c90bc3bdf1500ea7320948560e069 Mon Sep 17 00:00:00 2001 From: sfc-gh-npuka Date: Mon, 25 May 2026 15:41:38 +0300 Subject: [PATCH 23/23] Use iceberg_catalog server OID as canonical REST catalog identity Now that every REST catalog -- built-in and user-created -- is backed by a real iceberg_catalog foreign server, the server's OID is a stable, unambiguous identifier. This commit pivots the in-memory machinery off case-insensitive name comparison and onto OID equality. * RestCatalogOptions gains a leading `Oid serverOid` field, populated in BuildRestCatalogOptionsFromServer from server->serverid and propagated by CopyRestCatalogOptions. The struct comment is updated to call out that `catalog` is now purely a user-facing label kept around for error messages, and that `serverOid` is the canonical identity. * The per-catalog OAuth token cache is rekeyed from a NAMEDATALEN char buffer to sizeof(Oid). BuildTokenCacheKey is removed; the lookup site now passes &opts->serverOid directly, with a defensive Assert(OidIsValid(opts->serverOid)) so a future caller that forgets to resolve options cannot silently funnel every catalog into the same cache slot. InvalidateRestTokenCache keeps its existing full-flush behavior on any pg_foreign_server change; targeted invalidation is not worth the per-entry bookkeeping at the rate ALTER SERVER actually happens. * The cross-transaction "same REST catalog throughout" guard in BindRelationToXactRestCatalog and the belt-and-suspenders branch of RecordRestCatalogRequestInTx now compare serverOid directly instead of pg_strcasecmp'ing catalog names. The two user-facing names are still surfaced in errdetail so the message remains in the terms the user typed. This also closes the corner case where the same physical server is referenced via different casings in successive statements: they now collapse to the same OID and are treated as one catalog. Co-authored-by: Cursor Signed-off-by: sfc-gh-npuka --- .../pg_lake/rest_catalog/rest_catalog.h | 23 ++++++--- .../src/rest_catalog/rest_catalog.c | 51 ++++++++----------- .../track_iceberg_metadata_changes.c | 19 ++++--- 3 files changed, 48 insertions(+), 45 deletions(-) diff --git a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h index 92ffc6f1..36ba0357 100644 --- a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h +++ b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h @@ -36,16 +36,25 @@ extern int RestCatalogAuthType; extern bool RestCatalogEnableVendedCredentials; /* - * Resolved REST catalog connection options. For the built-in 'rest' - * catalog the fields come entirely from GUC settings. For user-created - * catalogs (CREATE SERVER ... FOREIGN DATA WRAPPER iceberg_catalog) the - * server options override the GUC defaults. + * Resolved REST catalog connection options. All REST catalogs -- + * built-in ('rest') and user-created (CREATE SERVER ... FOREIGN DATA + * WRAPPER iceberg_catalog) -- are backed by a real pg_foreign_server + * row; ApplyGUCDefaults populates the defaults, ApplyServerOptionOverrides + * layers on any per-server options. + * + * The canonical identity of a catalog is `serverOid` (the OID of the + * iceberg_catalog server row). Use it for in-memory equality, token + * cache keys, and syscache-driven invalidation. `catalog` stores the + * user-visible short name (e.g. 'rest', 'my_polaris') purely for error + * messages. */ typedef struct RestCatalogOptions { - char *catalog; /* catalog name, used for token cache keying; - * can be 'rest' or a user-created server name - * of TYPE 'rest' */ + Oid serverOid; /* iceberg_catalog server OID; canonical + * identity, never InvalidOid for resolved + * opts */ + char *catalog; /* short user-facing name; used in error + * messages, never for equality */ char *host; char *oauthHostPath; char *clientId; diff --git a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c index 606539dc..58a2a3e5 100644 --- a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c +++ b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c @@ -68,14 +68,12 @@ int RestCatalogAuthType = REST_CATALOG_AUTH_TYPE_OAUTH2; bool RestCatalogEnableVendedCredentials = true; /* - * Per-rest-catalog token cache. Keyed by catalog. - * Should always be accessed via GetRestCatalogAccessToken() + * Per-rest-catalog token cache, keyed by the iceberg_catalog server OID. + * Should always be accessed via GetRestCatalogAccessToken(). */ -#define TOKEN_CACHE_KEY_LEN NAMEDATALEN - typedef struct RestCatalogTokenCacheEntry { - char key[TOKEN_CACHE_KEY_LEN]; + Oid serverOid; /* hash key */ char *accessToken; TimestampTz accessTokenExpiry; } RestCatalogTokenCacheEntry; @@ -527,8 +525,7 @@ ValidateIcebergCatalogServerDDL(ProcessUtilityParams * processUtilityParams, * Renaming an iceberg_catalog server is blocked because dependent * iceberg tables store the server name as a string option * (catalog='') in pg_foreign_table.ftoptions. A rename would - * silently break those references. This covers both user-created - * servers and the three built-in ones. + * silently break those references. */ ForeignServer *server = GetForeignServerByName(strVal(stmt->object), true); @@ -694,6 +691,7 @@ BuildRestCatalogOptionsFromServer(const char *serverName, RestCatalogOptions *opts = palloc0(sizeof(RestCatalogOptions)); + opts->serverOid = server->serverid; opts->catalog = pstrdup(userVisibleCatalog); ApplyGUCDefaults(opts); ApplyServerOptionOverrides(opts, server); @@ -748,6 +746,7 @@ CopyRestCatalogOptions(MemoryContext dst, const RestCatalogOptions * src) MemoryContext oldctx = MemoryContextSwitchTo(dst); RestCatalogOptions *copy = palloc0(sizeof(RestCatalogOptions)); + copy->serverOid = src->serverOid; copy->catalog = pstrdup(src->catalog); copy->host = pstrdup(src->host); copy->oauthHostPath = src->oauthHostPath ? pstrdup(src->oauthHostPath) : NULL; @@ -1230,18 +1229,6 @@ ReportHTTPError(HttpResult httpResult, int level) } -/* - * Build a cache key for the per-catalog token cache. - */ -static void -BuildTokenCacheKey(char *key, const RestCatalogOptions * opts) -{ - Assert(opts->catalog != NULL); - MemSet(key, 0, TOKEN_CACHE_KEY_LEN); - strlcpy(key, opts->catalog, TOKEN_CACHE_KEY_LEN); -} - - /* * Syscache invalidation callback for pg_foreign_server changes. * Any ALTER/DROP SERVER blows away the entire token cache so stale @@ -1249,10 +1236,10 @@ BuildTokenCacheKey(char *key, const RestCatalogOptions * opts) * next token lookup. * * We ignore hashvalue and reset the whole cache rather than selectively - * invalidating a single server's entry (as postgres_fdw does). With a - * handful of servers and infrequent ALTER SERVER, the cost of a few - * extra OAuth round-trips is negligible compared to the complexity of - * keying the cache by OID for selective invalidation. + * invalidating a single server's entry. With a handful of servers and + * infrequent ALTER SERVER, the cost of a few extra OAuth round-trips is + * negligible compared to the complexity of tracking per-entry hash + * values for targeted invalidation. */ static void InvalidateRestTokenCache(Datum arg, int cacheid, uint32 hashvalue) @@ -1297,7 +1284,7 @@ InitTokenCacheIfNeeded(void) HASHCTL ctl; memset(&ctl, 0, sizeof(ctl)); - ctl.keysize = TOKEN_CACHE_KEY_LEN; + ctl.keysize = sizeof(Oid); ctl.entrysize = sizeof(RestCatalogTokenCacheEntry); ctl.hcxt = RestTokenCacheCtx; @@ -1309,7 +1296,7 @@ InitTokenCacheIfNeeded(void) /* * Gets an access token from rest catalog. Caches the token per catalog - * (keyed by catalog) until it is about to expire. + * (keyed by iceberg_catalog server OID) until it is about to expire. */ static char * GetRestCatalogAccessToken(RestCatalogOptions * opts, bool forceRefreshToken) @@ -1319,15 +1306,19 @@ GetRestCatalogAccessToken(RestCatalogOptions * opts, bool forceRefreshToken) (errcode(ERRCODE_INTERNAL_ERROR), errmsg("REST catalog options must not be NULL when fetching access token"))); - InitTokenCacheIfNeeded(); - - char cacheKey[TOKEN_CACHE_KEY_LEN]; + /* + * Every resolved RestCatalogOptions originates from + * BuildRestCatalogOptionsFromServer, which always sets serverOid. A + * missing OID would silently funnel every catalog into the same cache + * slot, so trap it loudly here. + */ + Assert(OidIsValid(opts->serverOid)); - BuildTokenCacheKey(cacheKey, opts); + InitTokenCacheIfNeeded(); bool found = false; RestCatalogTokenCacheEntry *entry = - hash_search(RestCatalogTokenCache, cacheKey, HASH_ENTER, &found); + hash_search(RestCatalogTokenCache, &opts->serverOid, HASH_ENTER, &found); if (!found) { diff --git a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c index f385fc49..136befbd 100644 --- a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c +++ b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c @@ -656,13 +656,14 @@ BindRelationToXactRestCatalog(Oid relationId) } /* - * Both sides of the comparison are the canonical catalog name (lowercase - * "rest" for the built-in catalog, server name as stored in - * pg_foreign_server for user-defined ones). pg_strcasecmp matches the - * casing rules PostgreSQL applies to identifier resolution. + * Identity is the iceberg_catalog server OID, not the user-typed name. + * Two requests for the same physical server -- whether the user spelled + * the catalog as 'rest', 'REST', or as the underlying built-in server + * name on different statements -- collapse to the same OID and are + * treated as one catalog. The user-facing names are still reported in + * the error message. */ - if (pg_strcasecmp(PgLakeXactRestCatalog->catalogOpts->catalog, - resolvedOpts->catalog) != 0) + if (PgLakeXactRestCatalog->catalogOpts->serverOid != resolvedOpts->serverOid) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot modify tables from different REST catalogs " @@ -715,9 +716,11 @@ RecordRestCatalogRequestInTx(Oid relationId, RestCatalogOperationType operationT * bind through BindRelationToXactRestCatalog() at statement time or * populate catalogOpts via the branch above, so in practice we never * reach here with a mismatched catalog. Kept as a last line of - * defense for any future code path that forgets to do so. + * defense for any future code path that forgets to do so. See the + * companion comment in BindRelationToXactRestCatalog for why identity + * is the server OID, not the user-typed name. */ - else if (pg_strcasecmp(PgLakeXactRestCatalog->catalogOpts->catalog, resolvedOpts->catalog) != 0) + else if (PgLakeXactRestCatalog->catalogOpts->serverOid != resolvedOpts->serverOid) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot modify tables from different REST catalogs "